From acf3c646dbd7cee2449f99981184b8186efad847 Mon Sep 17 00:00:00 2001 From: Andrew Udvare Date: Sun, 23 Sep 2018 17:09:47 -0400 Subject: [PATCH 1/4] [InstagramUser] Add support for saved media --- youtube_dl/extractor/instagram.py | 55 ++++++++++++++++++++++++++++++- 1 file changed, 54 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py index 7e0e838f0..a1fa2ddfe 100644 --- a/youtube_dl/extractor/instagram.py +++ b/youtube_dl/extractor/instagram.py @@ -8,6 +8,7 @@ import re from .common import InfoExtractor from ..compat import ( compat_str, + compat_urllib_parse_urlparse, compat_HTTPError, ) from ..utils import ( @@ -228,7 +229,7 @@ class InstagramIE(InfoExtractor): class InstagramUserIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?instagram\.com/(?P[^/]{2,})/?(?:$|[?#])' + _VALID_URL = r'https?://(?:www\.)?instagram\.com/(?P[^/]{2,})/?(?:saved/?)(?:$|[?#])' IE_DESC = 'Instagram user profile' IE_NAME = 'instagram:user' _TEST = { @@ -347,10 +348,62 @@ class InstagramUserIE(InfoExtractor): if not cursor or not isinstance(cursor, compat_str): break + def _saved_entries(self, username): + cookies = self._get_cookies('https://www.instagram.com/%s/saved/' % username) + cursor = '' + has_next_page = True + + while has_next_page: + query = { + 'query_hash': '8c86fed24fa03a8a2eea2a70a80c7b6b', + 'variables': json.dumps({ + 'id': cookies.get('ds_user_id').value, + 'first': 12, + 'after': cursor, + }) + } + resp = self._download_json('https://www.instagram.com/graphql/query/', + username, + query=query) + media = resp['data']['user']['edge_saved_media'] + + for edge in media.get('edges', []): + node = edge.get('node') + if not node or not isinstance(node, dict): + continue + if node.get('__typename') != 'GraphVideo' and node.get('is_video') is not True: + continue + video_id = node.get('shortcode') + if not video_id: + continue + info = self.url_result( + 'https://instagram.com/p/%s/' % video_id, + ie=InstagramIE.ie_key(), video_id=video_id) + + yield info + + page_info = media.get('page_info') + if not page_info or not isinstance(page_info, dict): + break + + has_next_page = page_info.get('has_next_page') + if not has_next_page: + break + + cursor = page_info.get('end_cursor') + if not cursor or not isinstance(cursor, compat_str): + break + def _real_extract(self, url): username = self._match_id(url) webpage = self._download_webpage(url, username) + last = filter(lambda x: len(x) > 0, + compat_urllib_parse_urlparse(url).path.split('/'))[-1] + if last == 'saved': + return self.playlist_result( + self._saved_entries(username), + username, '%s - Saved media' % username) data = self._parse_json( self._search_regex( From b4b9ab4a08de100f65b96db7f4fad7e932502b07 Mon Sep 17 00:00:00 2001 From: Andrew Udvare Date: Thu, 27 Sep 2018 04:50:28 -0400 Subject: [PATCH 2/4] [InstagramUser] Make use of try_get() --- youtube_dl/extractor/instagram.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py index a1fa2ddfe..d11fdfc96 100644 --- a/youtube_dl/extractor/instagram.py +++ b/youtube_dl/extractor/instagram.py @@ -365,11 +365,12 @@ class InstagramUserIE(InfoExtractor): resp = self._download_json('https://www.instagram.com/graphql/query/', username, query=query) - media = resp['data']['user']['edge_saved_media'] + media = try_get(resp, + lambda x: x['data']['user']['edge_saved_media']) for edge in media.get('edges', []): - node = edge.get('node') - if not node or not isinstance(node, dict): + node = try_get(edge, lambda x: x['node'], dict) + if not node: continue if node.get('__typename') != 'GraphVideo' and node.get('is_video') is not True: continue @@ -382,16 +383,16 @@ class InstagramUserIE(InfoExtractor): yield info - page_info = media.get('page_info') - if not page_info or not isinstance(page_info, dict): + page_info = try_get(media, lambda x: x['page_info'], dict) + if not page_info: break has_next_page = page_info.get('has_next_page') if not has_next_page: break - cursor = page_info.get('end_cursor') - if not cursor or not isinstance(cursor, compat_str): + cursor = try_get(page_info, lambda x: x['end_cursor'], compat_str) + if not cursor: break def _real_extract(self, url): From 1bd46156c0af689996995e119c867a7988ffcbd2 Mon Sep 17 00:00:00 2001 From: Andrew Udvare Date: Sun, 27 Jan 2019 21:49:48 -0500 Subject: [PATCH 3/4] [InstagramSaved] add test --- youtube_dl/extractor/instagram.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py index 25900e692..c33e8ccb2 100644 --- a/youtube_dl/extractor/instagram.py +++ b/youtube_dl/extractor/instagram.py @@ -360,6 +360,18 @@ class InstagramSavedIE(InstagramPlaylistIE): _VALID_URL = r'https?://(?:www\.)?instagram\.com/(?P[^/]{2,})/saved/?(?:$|[?#])' IE_DESC = 'Instagram saved media' IE_NAME = 'instagram:saved' + _TEST = { + 'url': 'https://www.instagram.com/tatsh2dx/saved/', + 'info_dict': { + 'id': 'tatsh2dx', + 'title': 'tatsh2dx', + }, + 'playlist_count': 5, + 'params': { + 'skip_download': True, + 'playlistend': 5, + } + } _QUERY_HASH = '8c86fed24fa03a8a2eea2a70a80c7b6b' From b159416b40305815f0aa88854a2fff5e4430c1e0 Mon Sep 17 00:00:00 2001 From: Andrew Udvare Date: Sun, 27 Jan 2019 22:07:09 -0500 Subject: [PATCH 4/4] [extractors] remove unrelated change --- youtube_dl/extractor/extractors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 322205b62..4c93de5c8 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1397,7 +1397,7 @@ from .webofstories import ( WebOfStoriesPlaylistIE, ) from .weibo import ( - WeiboIE, + WeiboIE, WeiboMobileIE ) from .weiqitv import WeiqiTVIE