From 9103bbc5cd11957de2e906e4401dcf4df9511d28 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sun, 3 Nov 2013 12:11:13 +0100 Subject: [PATCH] Add the 'webpage_url' field to info_dict The url for the video page, it must allow to reproduce the result. It's automatically set by YoutubeDL if it's missing. --- test/test_download.py | 3 +++ youtube_dl/YoutubeDL.py | 13 ++++++++++--- youtube_dl/extractor/common.py | 3 +++ youtube_dl/extractor/vimeo.py | 13 ++++++------- youtube_dl/extractor/youtube.py | 3 ++- 5 files changed, 24 insertions(+), 11 deletions(-) diff --git a/test/test_download.py b/test/test_download.py index dfb04d010..d6cc9ec33 100644 --- a/test/test_download.py +++ b/test/test_download.py @@ -148,6 +148,9 @@ def generator(test_case): # Check for the presence of mandatory fields for key in ('id', 'url', 'title', 'ext'): self.assertTrue(key in info_dict.keys() and info_dict[key]) + # Check for mandatory fields that are automatically set by YoutubeDL + for key in ['webpage_url', 'extractor']: + self.assertTrue(info_dict.get(key), u'Missing field: %s' % key) finally: try_rm_tcs_files() diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index a3e0a700f..8938a2cd3 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -354,8 +354,11 @@ class YoutubeDL(object): '_type': 'compat_list', 'entries': ie_result, } - if 'extractor' not in ie_result: - ie_result['extractor'] = ie.IE_NAME + self.add_extra_info(ie_result, + { + 'extractor': ie.IE_NAME, + 'webpage_url': url + }) return self.process_ie_result(ie_result, download, extra_info) except ExtractorError as de: # An error we somewhat expected self.report_error(compat_str(de), de.format_traceback()) @@ -417,6 +420,7 @@ class YoutubeDL(object): 'playlist': playlist, 'playlist_index': i + playliststart, 'extractor': ie_result['extractor'], + 'webpage_url': ie_result['webpage_url'], } entry_result = self.process_ie_result(entry, download=download, @@ -427,7 +431,10 @@ class YoutubeDL(object): elif result_type == 'compat_list': def _fixup(r): self.add_extra_info(r, - {'extractor': ie_result['extractor']}) + { + 'extractor': ie_result['extractor'], + 'webpage_url': ie_result['webpage_url'], + }) return r ie_result['entries'] = [ self.process_ie_result(_fixup(r), download, extra_info) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index cef4dce85..e0ccba533 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -71,6 +71,9 @@ class InfoExtractor(object): ("3D" or "DASH video") * width Width of the video, if known * height Height of the video, if known + webpage_url: The url to the video webpage, if given to youtube-dl it + should allow to get the same result again. (It will be set + by YoutubeDL if it's missing) Unless mentioned otherwise, the fields should be Unicode strings. diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index c7d864a2b..62273fd33 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -20,7 +20,7 @@ class VimeoIE(InfoExtractor): """Information extractor for vimeo.com.""" # _VALID_URL matches Vimeo URLs - _VALID_URL = r'(?Phttps?://)?(?:(?:www|player)\.)?vimeo(?Ppro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?Pplay_redirect_hls\?clip_id=)?(?:videos?/)?(?P[0-9]+)/?(?:[?].*)?(?:#.*)?$' + _VALID_URL = r'(?Phttps?://)?(?:(?:www|(?Pplayer))\.)?vimeo(?Ppro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?Pplay_redirect_hls\?clip_id=)?(?:videos?/)?(?P[0-9]+)/?(?:[?].*)?(?:#.*)?$' _NETRC_MACHINE = 'vimeo' IE_NAME = u'vimeo' _TESTS = [ @@ -128,11 +128,9 @@ class VimeoIE(InfoExtractor): raise ExtractorError(u'Invalid URL: %s' % url) video_id = mobj.group('id') - if not mobj.group('proto'): - url = 'https://' + url - elif mobj.group('pro'): + if mobj.group('pro') or mobj.group('player'): url = 'http://player.vimeo.com/video/' + video_id - elif mobj.group('direct_link'): + else: url = 'https://vimeo.com/' + video_id # Retrieve video webpage to extract further information @@ -234,7 +232,7 @@ class VimeoIE(InfoExtractor): if len(formats) == 0: raise ExtractorError(u'No known codec found') - return [{ + return { 'id': video_id, 'uploader': video_uploader, 'uploader_id': video_uploader_id, @@ -243,7 +241,8 @@ class VimeoIE(InfoExtractor): 'thumbnail': video_thumbnail, 'description': video_description, 'formats': formats, - }] + 'webpage_url': url, + } class VimeoChannelIE(InfoExtractor): diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index a19abe1f0..6ddd6ef06 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1485,7 +1485,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): 'subtitles': video_subtitles, 'duration': video_duration, 'age_limit': 18 if age_gate else 0, - 'annotations': video_annotations + 'annotations': video_annotations, + 'webpage_url': 'https://www.youtube.com/watch?v=%s' % video_id, }) return results