diff --git a/parser/image.go b/parser/image.go deleted file mode 100644 index a25bddd..0000000 --- a/parser/image.go +++ /dev/null @@ -1,37 +0,0 @@ -package parser - -func getDirectImageURL(response mediaObject) string { - return response.graphql.shortcodeMedia.DisplayURL -} - -/* -// Not used at the moment - -// GetImageURL returns image URL from shortcode -func getImageURL(shortcode string, items chan<- DownloadItem) { - myLogger := log.WithField("module", "image") - var url = fmt.Sprintf(mediaURL, shortcode) - - var response mediaObject - - data, err := worker.GetPage(url) - if err != nil { - myLogger.Errorln("Error fetching page", err.Error()) - } - - // unmarshal the JSON to the interface - err = json.Unmarshal(data, &response) - if err != nil { - myLogger.Errorln("Error unmashaling JSON", err.Error()) - fmt.Println(string(data)) - } - - item := DownloadItem{ - URL: getDirectImageURL(response), - } - - items <- item - - myLogger.Debugf("Got image from shortcode %s", shortcode) -} -*/ diff --git a/parser/image_test.go b/parser/image_test.go deleted file mode 100644 index 7eff78b..0000000 --- a/parser/image_test.go +++ /dev/null @@ -1,25 +0,0 @@ -package parser - -import "testing" - -func Test_getDirectImageURL(t *testing.T) { - type args struct { - response mediaObject - } - tests := []struct { - name string - args args - want string - }{ - {"Basic test", - args{response: mediaObject{graphql: graphql{shortcodeMedia{DisplayURL: "http://httpbin.org/"}}}}, - "http://httpbin.org/"}, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - if got := getDirectImageURL(tt.args.response); got != tt.want { - t.Errorf("getDirectImageURL() = %v, want %v", got, tt.want) - } - }) - } -} diff --git a/parser/insta_json.go b/parser/insta_json.go deleted file mode 100644 index a251355..0000000 --- a/parser/insta_json.go +++ /dev/null @@ -1,135 +0,0 @@ -// Collection of common elements from Instagram JSON API - -package parser - -import "time" - -// DownloadItem contains all data needed to download a file -type DownloadItem struct { - URL string - UserID string // Username - ID string // Numeric ID of user - Shortcode string // Shortcode for the media - Created time.Time -} - -// User defines the user who has posted stuff -type user struct { - Biography string `json:"biography"` - BlockedByViewer bool `json:"blocked_by_viewer"` - ConnectedFbPage interface{} `json:"connected_fb_page"` - CountryBlock bool `json:"country_block"` - ExternalURL interface{} `json:"external_url"` - ExternalURLLinkshimmed interface{} `json:"external_url_linkshimmed"` - FollowedByViewer bool `json:"followed_by_viewer"` - FollowsViewer bool `json:"follows_viewer"` - FullName string `json:"full_name"` - HasBlockedViewer bool `json:"has_blocked_viewer"` - HasRequestedViewer bool `json:"has_requested_viewer"` - ID string `json:"id"` - IsPrivate bool `json:"is_private"` - IsVerified bool `json:"is_verified"` - media `json:"media"` - ProfilePicURL string `json:"profile_pic_url"` - ProfilePicURLHd string `json:"profile_pic_url_hd"` - RequestedByViewer bool `json:"requested_by_viewer"` - Username string `json:"username"` -} - -// MediaObject defines the root element of shortcode replies -type mediaObject struct { - graphql `json:"graphql"` -} - -// Graphql response element, directly under MediaObject -type graphql struct { - shortcodeMedia `json:"shortcode_media"` -} - -// ShortcodeMedia - all media retrieved via direct shortcode link -type shortcodeMedia struct { - Typename string `json:"__typename"` - CaptionIsEdited bool `json:"caption_is_edited"` - CommentsDisabled bool `json:"comments_disabled"` - DisplayURL string `json:"display_url"` - GatingInfo interface{} `json:"gating_info"` - ID string `json:"id"` - IsAd bool `json:"is_ad"` - IsVideo bool `json:"is_video"` - Location interface{} `json:"location"` - MediaPreview string `json:"media_preview"` - Shortcode string `json:"shortcode"` - ShouldLogClientEvent bool `json:"should_log_client_event"` - TakenAtTimestamp int `json:"taken_at_timestamp"` - TrackingToken string `json:"tracking_token"` - VideoURL string `json:"video_url"` - VideoViewCount int `json:"video_view_count"` - ViewerHasLiked bool `json:"viewer_has_liked"` - ViewerHasSaved bool `json:"viewer_has_saved"` - ViewerHasSavedToCollection bool `json:"viewer_has_saved_to_collection"` - DisplayResourcess []displayResources `json:"display_resources"` - edgeSidecarToChildren `json:"edge_sidecar_to_children"` -} - -// PageInfo tells us if there is a new page after this one -type pageInfo struct { - EndCursor string `json:"end_cursor"` - HasNextPage bool `json:"has_next_page"` -} - -type edges struct { - node `json:"node"` -} - -type displayResources struct { - ConfigHeight int `json:"config_height"` - ConfigWidth int `json:"config_width"` - Src string `json:"src"` -} - -type edgeSidecarToChildren struct { - Edgess []edges `json:"edges"` -} - -type media struct { - Count int `json:"count"` - Nodess []nodes `json:"nodes"` - pageInfo `json:"page_info"` -} - -type nodes struct { - Typename string `json:"__typename"` - Caption string `json:"caption"` - Code string `json:"code"` - CommentsDisabled bool `json:"comments_disabled"` - Date int `json:"date"` - DisplaySrc string `json:"display_src"` - GatingInfo interface{} `json:"gating_info"` - ID string `json:"id"` - IsVideo bool `json:"is_video"` - MediaPreview string `json:"media_preview"` - VideoViews int `json:"video_views"` -} - -type node struct { - Typename string `json:"__typename"` - CommentsDisabled bool `json:"comments_disabled"` - DisplayURL string `json:"display_url"` - // edgeMediaToCaption `json:"edge_media_to_caption"` - ID string `json:"id"` - IsVideo bool `json:"is_video"` - Shortcode string `json:"shortcode"` - TakenAtTimestamp int `json:"taken_at_timestamp"` - ThumbnailSrc string `json:"thumbnail_src"` -} - -/* -type edgeMediaToCaption struct { - Edgess []edges `json:"edges"` -} -*/ -type edgeOwnerToTimelineMedia struct { - Count int `json:"count"` - Edgess []edges `json:"edges"` - pageInfo `json:"page_info"` -} diff --git a/parser/page.go b/parser/page.go deleted file mode 100644 index 7b920c3..0000000 --- a/parser/page.go +++ /dev/null @@ -1,111 +0,0 @@ -package parser - -import ( - "encoding/json" - "errors" - "fmt" - "sync" - "time" - - log "github.com/Sirupsen/logrus" - "github.com/lepinkainen/instafetch/worker" -) - -var ( - // ID and EndCursor - nextPageURL = "https://www.instagram.com/graphql/query/?query_id=17888483320059182&id=%s&first=100&after=%s" - //nextPageURL = "https://www.instagram.com/graphql/query/?query_id=17852405266163336&id=%s&first=100&after=%s" - - // QueryID: 17852405266163336 - // 17863787143139595 - // 17875800862117404 - // 17888483320059182 -) - -type nextpage struct { - data `json:"data"` - Status string `json:"status"` - Message string `json:"message"` -} - -type data struct { - nextPageUser `json:"user"` -} - -type nextPageUser struct { - edgeOwnerToTimelineMedia `json:"edge_owner_to_timeline_media"` -} - -/* -func getPageImageItem(edge edges) DownloadItem { - return DownloadItem{ - URL: edge.DisplayURL, - } -} -*/ - -// fetches all urls from a page and returns the cursor for the next page -func parseNextPage(baseItem DownloadItem, id string, endCursor string, items chan<- DownloadItem) (string, error) { - myLogger := log.WithField("module", "page") - - myLogger.Debug("-- Parsing next page") - - // generate url for the page - var url = fmt.Sprintf(nextPageURL, id, endCursor) - - // interface to hold the instagram JSON - var response nextpage - - data, err := worker.GetPage(url) - if err != nil { - myLogger.Errorln("Error fetching page", err.Error()) - } - - // unmarshal the JSON to the interface - err = json.Unmarshal(data, &response) - if err != nil { - myLogger.Errorln("Error unmashaling JSON", err.Error()) - fmt.Println(string(data)) - } - - if response.Status == "fail" { - return "", errors.New(response.Message) - } - - var wgSubWorkers sync.WaitGroup - - for _, image := range response.data.Edgess { - item := DownloadItem(baseItem) - item.Shortcode = image.Shortcode - - switch shortcode := image.Typename; shortcode { - case "GraphVideo": - wgSubWorkers.Add(1) - go func(item DownloadItem, items chan<- DownloadItem) { - defer wgSubWorkers.Done() - - getVideoURL(item, items) - }(item, items) - case "GraphSidecar": - wgSubWorkers.Add(1) - go func(item DownloadItem, items chan<- DownloadItem) { - defer wgSubWorkers.Done() - - getSidecarURLs(item, items) - }(item, items) - case "GraphImage": - item.Created = time.Unix(int64(image.node.TakenAtTimestamp), 0) - item.URL = image.DisplayURL - items <- item - default: - myLogger.Errorf("Unknown media type: '%v'", image.Typename) - } - } - wgSubWorkers.Wait() - - // return info about next page for looping through all pages - if response.HasNextPage { - return response.EndCursor, nil - } - return "", nil -} diff --git a/parser/stream.go b/parser/stream.go deleted file mode 100644 index 780abf0..0000000 --- a/parser/stream.go +++ /dev/null @@ -1,167 +0,0 @@ -package parser - -import ( - "sync" - "time" - - log "github.com/Sirupsen/logrus" - - "encoding/json" - "fmt" - - "github.com/lepinkainen/instafetch/worker" -) - -var ( - instagramStreamURL = "https://www.instagram.com/%s/?__a=1" // completed with username -) - -// API Structs autogenerated with https://github.com/mohae/json2go/tree/master/cmd/json2go - -// InstagramAPI is a reply to the main page query -type instagramAPI struct { - LoggingPageID string `json:"logging_page_id"` - user `json:"user"` -} - -// Settings defines the options for the downloaders -type Settings struct { - LatestOnly bool - Silent bool -} - -// getNextPageInfo returns the user ID and endcursor for the next page -// or empty strings if none exist -func getNextPageInfo(response instagramAPI) (string, string) { - - if !response.user.media.pageInfo.HasNextPage { - return "", "" - } - - var id = response.user.ID - var endCursor = response.user.media.pageInfo.EndCursor - - return id, endCursor -} - -// the first page is a bit different from the other pages -func getFirstPage(userName string) (instagramAPI, error) { - myLogger := log.WithField("module", "stream") - var url = fmt.Sprintf(instagramStreamURL, userName) - - // interface to hold the instagram JSON - var response instagramAPI - - data, err := worker.GetPage(url) - if err != nil { - myLogger.Errorln("Error fetching page: ", err.Error()) - return response, err - } - - myLogger.Debugf("Page for %s fetched", userName) - - // unmarshal the JSON to the interface - err = json.Unmarshal(data, &response) - if err != nil { - myLogger.Errorf("Error unmashaling JSON for user %s: %v", userName, err.Error()) - fmt.Println(string(data)) - return response, err - } - - myLogger.Debugf("Data for %s unmarshaled", userName) - - return response, nil -} - -// parse all data from the first page -func parseFirstPage(baseItem DownloadItem, res instagramAPI, items chan<- DownloadItem) { - myLogger := log.WithField("module", "stream") - - var wgSubWorkers sync.WaitGroup - - // get media urls according to type - for _, media := range res.user.media.Nodess { - item := DownloadItem(baseItem) - item.Shortcode = media.Code - - switch shortcode := media.Typename; shortcode { - case "GraphVideo": - wgSubWorkers.Add(1) - go func(item DownloadItem, items chan<- DownloadItem) { - defer wgSubWorkers.Done() - - getVideoURL(item, items) - }(item, items) - case "GraphSidecar": - wgSubWorkers.Add(1) - go func(item DownloadItem, items chan<- DownloadItem) { - defer wgSubWorkers.Done() - - getSidecarURLs(item, items) - }(item, items) - case "GraphImage": - item.Created = time.Unix(int64(media.Date), 0) - item.URL = media.DisplaySrc - items <- item - default: - myLogger.Errorf("Unknown media type: '%v'", media.Typename) - - } - } - - wgSubWorkers.Wait() -} - -// MediaURLs returns direct links to all media on an users stream -func MediaURLs(userName string, settings Settings, items chan<- DownloadItem) error { - myLogger := log.WithField("module", "stream").WithField("username", userName) - - if !settings.Silent { - myLogger.Infof("Parsing %s", userName) - } - - response, err := getFirstPage(userName) - if err != nil { - myLogger.Errorf("Error when parsing first page for %s", userName) - return err - } - - // Basic info for items to download - baseItem := DownloadItem{ - UserID: response.Username, - ID: response.user.ID, - } - - parseFirstPage(baseItem, response, items) - - if !settings.Silent { - myLogger.Infof("Parsed first page for %s", userName) - } - - if !settings.LatestOnly { - userID, endCursor := getNextPageInfo(response) - - page := 1 - - // only fetch a new page once every X seconds - throttle := time.NewTicker(time.Second * 2).C - - for endCursor != "" { - endCursor, err = parseNextPage(baseItem, userID, endCursor, items) - if err != nil { - if err.Error() == "rate limited" { - return err - } - myLogger.Errorf("Subpage parsing error: %v", err) - } - page = page + 1 - if !settings.Silent { - myLogger.Infof("Parsed page %d for %s", page, userName) - } - <-throttle - } - log.Infof("All %d pages done for %s", page, userName) - } - - return nil -} diff --git a/parser/stream_test.go b/parser/stream_test.go deleted file mode 100644 index 9e01c76..0000000 --- a/parser/stream_test.go +++ /dev/null @@ -1,39 +0,0 @@ -package parser - -import "testing" - -func Test_getNextPageInfo(t *testing.T) { - type args struct { - response instagramAPI - } - tests := []struct { - name string - args args - want string - want1 string - }{ - {"Has next page", args{instagramAPI{user: user{ID: "12345", media: media{pageInfo: pageInfo{ - HasNextPage: true, - EndCursor: "thisistheend", - }}}}}, - "12345", - "thisistheend"}, - {"Last page", args{instagramAPI{user: user{ID: "12345", media: media{pageInfo: pageInfo{ - HasNextPage: false, - EndCursor: "thisistheend", - }}}}}, - "", - ""}, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - got, got1 := getNextPageInfo(tt.args.response) - if got != tt.want { - t.Errorf("getNextPageInfo() got = %v, want %v", got, tt.want) - } - if got1 != tt.want1 { - t.Errorf("getNextPageInfo() got1 = %v, want %v", got1, tt.want1) - } - }) - } -} diff --git a/parser/video.go b/parser/video.go deleted file mode 100644 index 78f54eb..0000000 --- a/parser/video.go +++ /dev/null @@ -1,46 +0,0 @@ -package parser - -import ( - "encoding/json" - "fmt" - "time" - - log "github.com/Sirupsen/logrus" - "github.com/lepinkainen/instafetch/worker" -) - -var ( - mediaURL = "https://www.instagram.com/p/%s/?__a=1" // completed with shortcode -) - -func getDirectVideoURL(response mediaObject) string { - return response.graphql.shortcodeMedia.VideoURL -} - -// GetVideoURL parses a video page and returns the direct video URL -func getVideoURL(baseItem DownloadItem, items chan<- DownloadItem) { - myLogger := log.WithField("module", "video") - var url = fmt.Sprintf(mediaURL, baseItem.Shortcode) - - var response mediaObject - - data, err := worker.GetPage(url) - if err != nil { - myLogger.Errorln("Error fetching page", err.Error()) - } - - // unmarshal the JSON to the interface - err = json.Unmarshal(data, &response) - if err != nil { - myLogger.Errorln("Error unmashaling JSON", err.Error()) - fmt.Println(string(data)) - } - - item := DownloadItem(baseItem) - item.URL = getDirectVideoURL(response) - item.Created = time.Unix(int64(response.TakenAtTimestamp), 0) // save created as go Time - - items <- item - - myLogger.Debugf("Got video from shortcode %s", baseItem.Shortcode) -} diff --git a/parser/video_test.go b/parser/video_test.go deleted file mode 100644 index 17a39a6..0000000 --- a/parser/video_test.go +++ /dev/null @@ -1,25 +0,0 @@ -package parser - -import "testing" - -func Test_getDirectVideoURL(t *testing.T) { - type args struct { - response mediaObject - } - tests := []struct { - name string - args args - want string - }{ - {"Basic test", - args{response: mediaObject{graphql: graphql{shortcodeMedia{VideoURL: "http://httpbin.org/"}}}}, - "http://httpbin.org/"}, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - if got := getDirectVideoURL(tt.args.response); got != tt.want { - t.Errorf("getDirectVideoURL() = %v, want %v", got, tt.want) - } - }) - } -}