From b3d39be2393b3e5199d1abc3000d96ded3c4e4d7 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 8 Aug 2019 23:23:48 +0100 Subject: [PATCH 01/16] [discovery] extract series meta field(#21808) --- youtube_dl/extractor/discovery.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/discovery.py b/youtube_dl/extractor/discovery.py index 6287ca685..6a2712cc5 100644 --- a/youtube_dl/extractor/discovery.py +++ b/youtube_dl/extractor/discovery.py @@ -94,6 +94,8 @@ class DiscoveryIE(DiscoveryGoBaseIE): self._API_BASE_URL + 'content/videos', display_id, 'Downloading content JSON metadata', headers=headers, query={ + 'embed': 'show.name', + 'fields': 'authenticated,description.detailed,duration,episodeNumber,id,name,parental.rating,season.number,show,tags', 'slug': display_id, 'show_slug': show_slug, })[0] From 64b6a4e91ef735c8d07e93c4e670a01bcb10e7bb Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 9 Aug 2019 08:16:53 +0100 Subject: [PATCH 02/16] [youtube] fix annotations extraction(closes #22045) --- youtube_dl/extractor/youtube.py | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 44740cb75..0a0d2f41a 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1595,17 +1595,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): video_id = mobj.group(2) return video_id - def _extract_annotations(self, video_id): - return self._download_webpage( - 'https://www.youtube.com/annotations_invideo', video_id, - note='Downloading annotations', - errnote='Unable to download video annotations', fatal=False, - query={ - 'features': 1, - 'legacy': 1, - 'video_id': video_id, - }) - @staticmethod def _extract_chapters(description, duration): if not description: @@ -2277,7 +2266,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # annotations video_annotations = None if self._downloader.params.get('writeannotations', False): - video_annotations = self._extract_annotations(video_id) + xsrf_token = self._search_regex( + r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P[A-Za-z0-9+/=]+)\2', + video_webpage, 'xsrf token', group='xsrf_token', fatal=False) + invideo_url = try_get( + player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str) + if xsrf_token and invideo_url: + xsrf_field_name = self._search_regex( + r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P\w+)\2', + video_webpage, 'xsrf field name', + group='xsrf_field_name', default='session_token') + video_annotations = self._download_webpage( + self._proto_relative_url(invideo_url), + video_id, note='Downloading annotations', + errnote='Unable to download video annotations', fatal=False, + data=urlencode_postdata({xsrf_field_name: xsrf_token})) chapters = self._extract_chapters(description_original, video_duration) From ffddb112642ab394ecc11db055a064368eed42f7 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 9 Aug 2019 08:19:41 +0100 Subject: [PATCH 03/16] [YoutubeDL] check annotations availabilty(closes #18582) --- youtube_dl/YoutubeDL.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 3e832fec2..6a44bc7ba 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1783,6 +1783,8 @@ class YoutubeDL(object): annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext')) if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)): self.to_screen('[info] Video annotations are already present') + elif not info_dict.get('annotations'): + self.report_warning('There are no annotations to write.') else: try: self.to_screen('[info] Writing video annotations to: ' + annofn) From 3bce4ff7d96d845ec67ffe8e9e2715474f190d89 Mon Sep 17 00:00:00 2001 From: lightmare Date: Sun, 11 Aug 2019 01:57:43 +0200 Subject: [PATCH 04/16] [downloader/fragment] Fix ETA calculation of resumed download (#21992) --- youtube_dl/downloader/fragment.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/youtube_dl/downloader/fragment.py b/youtube_dl/downloader/fragment.py index f2e5733b6..02f35459e 100644 --- a/youtube_dl/downloader/fragment.py +++ b/youtube_dl/downloader/fragment.py @@ -190,12 +190,13 @@ class FragmentFD(FileDownloader): }) def _start_frag_download(self, ctx): + resume_len = ctx['complete_frags_downloaded_bytes'] total_frags = ctx['total_frags'] # This dict stores the download progress, it's updated by the progress # hook state = { 'status': 'downloading', - 'downloaded_bytes': ctx['complete_frags_downloaded_bytes'], + 'downloaded_bytes': resume_len, 'fragment_index': ctx['fragment_index'], 'fragment_count': total_frags, 'filename': ctx['filename'], @@ -234,8 +235,8 @@ class FragmentFD(FileDownloader): state['downloaded_bytes'] += frag_downloaded_bytes - ctx['prev_frag_downloaded_bytes'] if not ctx['live']: state['eta'] = self.calc_eta( - start, time_now, estimated_size, - state['downloaded_bytes']) + start, time_now, estimated_size - resume_len, + state['downloaded_bytes'] - resume_len) state['speed'] = s.get('speed') or ctx.get('speed') ctx['speed'] = state['speed'] ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes From 351f37c022b24144c064fab39bd6d134e166c31c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 13 Aug 2019 05:02:52 +0700 Subject: [PATCH 05/16] [youtube:playlist] Improve flat extraction (closes #21927) --- youtube_dl/extractor/youtube.py | 49 +++++++++++++++++++++++++++++---- 1 file changed, 43 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 0a0d2f41a..b63f19bb0 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -31,6 +31,7 @@ from ..utils import ( clean_html, dict_get, error_to_compat_str, + extract_attributes, ExtractorError, float_or_none, get_element_by_attribute, @@ -324,17 +325,18 @@ class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor): for video_id, video_title in self.extract_videos_from_page(content): yield self.url_result(video_id, 'Youtube', video_id, video_title) - def extract_videos_from_page(self, page): - ids_in_page = [] - titles_in_page = [] - for mobj in re.finditer(self._VIDEO_RE, page): + def extract_videos_from_page_impl(self, video_re, page, ids_in_page, titles_in_page): + for mobj in re.finditer(video_re, page): # The link with index 0 is not the first video of the playlist (not sure if still actual) if 'index' in mobj.groupdict() and mobj.group('id') == '0': continue video_id = mobj.group('id') - video_title = unescapeHTML(mobj.group('title')) + video_title = unescapeHTML( + mobj.group('title')) if 'title' in mobj.groupdict() else None if video_title: video_title = video_title.strip() + if video_title == '► Play all': + video_title = None try: idx = ids_in_page.index(video_id) if video_title and not titles_in_page[idx]: @@ -342,6 +344,12 @@ class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor): except ValueError: ids_in_page.append(video_id) titles_in_page.append(video_title) + + def extract_videos_from_page(self, page): + ids_in_page = [] + titles_in_page = [] + self.extract_videos_from_page_impl( + self._VIDEO_RE, page, ids_in_page, titles_in_page) return zip(ids_in_page, titles_in_page) @@ -2438,7 +2446,8 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): (%(playlist_id)s) )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE} _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s' - _VIDEO_RE = r'href="\s*/watch\?v=(?P[0-9A-Za-z_-]{11})(?:&(?:[^"]*?index=(?P\d+))?(?:[^>]+>(?P[^<]+))?)?' + _VIDEO_RE_TPL = r'href="\s*/watch\?v=%s(?:&(?:[^"]*?index=(?P<index>\d+))?(?:[^>]+>(?P<title>[^<]+))?)?' + _VIDEO_RE = _VIDEO_RE_TPL % r'(?P<id>[0-9A-Za-z_-]{11})' IE_NAME = 'youtube:playlist' _TESTS = [{ 'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re', @@ -2603,6 +2612,34 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): def _real_initialize(self): self._login() + def extract_videos_from_page(self, page): + ids_in_page = [] + titles_in_page = [] + + for item in re.findall( + r'(<[^>]*\bdata-video-id\s*=\s*["\'][0-9A-Za-z_-]{11}[^>]+>)', page): + attrs = extract_attributes(item) + video_id = attrs['data-video-id'] + video_title = unescapeHTML(attrs.get('data-title')) + if video_title: + video_title = video_title.strip() + ids_in_page.append(video_id) + titles_in_page.append(video_title) + + # Fallback with old _VIDEO_RE + self.extract_videos_from_page_impl( + self._VIDEO_RE, page, ids_in_page, titles_in_page) + + # Relaxed fallbacks + self.extract_videos_from_page_impl( + r'href="\s*/watch\?v\s*=\s*(?P<id>[0-9A-Za-z_-]{11})', page, + ids_in_page, titles_in_page) + self.extract_videos_from_page_impl( + r'data-video-ids\s*=\s*["\'](?P<id>[0-9A-Za-z_-]{11})', page, + ids_in_page, titles_in_page) + + return zip(ids_in_page, titles_in_page) + def _extract_mix(self, playlist_id): # The mixes are generated from a single video # the id of the playlist is just 'RD' + video_id From 69611a1616774011278007b1429e0151481f8879 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 13 Aug 2019 23:10:05 +0700 Subject: [PATCH 06/16] [ChangeLog] Actualize [ci skip] --- ChangeLog | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/ChangeLog b/ChangeLog index 7db147498..8c938ccd5 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,19 @@ +version <unreleased> + +Core +* [downloader/fragment] Fix ETA calculation of resumed download (#21992) +* [YoutubeDL] Check annotations availability (#18582) + +Extractors +* [youtube:playlist] Improve flat extraction (#21927) +* [youtube] Fix annotations extraction (#22045) ++ [discovery] Extract series meta field (#21808) +* [youtube] Improve error detection (#16445) +* [vimeo] Fix album extraction (#1933, #15704, #15855, #18967, #21986) ++ [roosterteeth] Add support for watch URLs +* [discovery] Limit video data by show slug (#21980) + + version 2019.08.02 Extractors From def849e0e61ec7ff0d59557e7950147557138a85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 13 Aug 2019 23:18:38 +0700 Subject: [PATCH 07/16] release 2019.08.13 --- .github/ISSUE_TEMPLATE/1_broken_site.md | 6 +- .../ISSUE_TEMPLATE/2_site_support_request.md | 4 +- .../ISSUE_TEMPLATE/3_site_feature_request.md | 4 +- .github/ISSUE_TEMPLATE/4_bug_report.md | 6 +- .github/ISSUE_TEMPLATE/5_feature_request.md | 4 +- CONTRIBUTING.md | 66 +++++++++++++++++++ ChangeLog | 2 +- youtube_dl/version.py | 2 +- 8 files changed, 80 insertions(+), 14 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md index 4d3894ad3..4c75c8d5d 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.md +++ b/.github/ISSUE_TEMPLATE/1_broken_site.md @@ -18,7 +18,7 @@ title: '' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.08.02. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.08.13. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape. - Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates. @@ -26,7 +26,7 @@ Carefully read and work through this check list in order to prevent the most com --> - [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running youtube-dl version **2019.08.02** +- [ ] I've verified that I'm running youtube-dl version **2019.08.13** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones @@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.08.02 + [debug] youtube-dl version 2019.08.13 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md index 796e11e54..8e8c43c47 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.md +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md @@ -19,7 +19,7 @@ labels: 'site-support-request' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.08.02. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.08.13. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights. - Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates. @@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com --> - [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running youtube-dl version **2019.08.02** +- [ ] I've verified that I'm running youtube-dl version **2019.08.13** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've searched the bugtracker for similar site support requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md index aa2348548..df719a29c 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -18,13 +18,13 @@ title: '' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.08.02. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.08.13. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Finally, put x into all relevant boxes (like this [x]) --> - [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running youtube-dl version **2019.08.02** +- [ ] I've verified that I'm running youtube-dl version **2019.08.13** - [ ] I've searched the bugtracker for similar site feature requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md index 5b2501a65..3616db1a7 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -18,7 +18,7 @@ title: '' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.08.02. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.08.13. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape. - Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates. @@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com --> - [ ] I'm reporting a broken site support issue -- [ ] I've verified that I'm running youtube-dl version **2019.08.02** +- [ ] I've verified that I'm running youtube-dl version **2019.08.13** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar bug reports including closed ones @@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.08.02 + [debug] youtube-dl version 2019.08.13 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md index d1758a95c..0fa37aef1 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.md +++ b/.github/ISSUE_TEMPLATE/5_feature_request.md @@ -19,13 +19,13 @@ labels: 'request' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.08.02. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.08.13. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Finally, put x into all relevant boxes (like this [x]) --> - [ ] I'm reporting a feature request -- [ ] I've verified that I'm running youtube-dl version **2019.08.02** +- [ ] I've verified that I'm running youtube-dl version **2019.08.13** - [ ] I've searched the bugtracker for similar feature requests including closed ones diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index cd9ccbe96..ac759ddc4 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -339,6 +339,72 @@ Incorrect: 'PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4' ``` +### Inline values + +Extracting variables is acceptable for reducing code duplication and improving readability of complex expressions. However, you should avoid extracting variables used only once and moving them to opposite parts of the extractor file, which makes reading the linear flow difficult. + +#### Example + +Correct: + +```python +title = self._html_search_regex(r'<title>([^<]+)', webpage, 'title') +``` + +Incorrect: + +```python +TITLE_RE = r'([^<]+)' +# ...some lines of code... +title = self._html_search_regex(TITLE_RE, webpage, 'title') +``` + +### Collapse fallbacks + +Multiple fallback values can quickly become unwieldy. Collapse multiple fallback values into a single expression via a list of patterns. + +#### Example + +Good: + +```python +description = self._html_search_meta( + ['og:description', 'description', 'twitter:description'], + webpage, 'description', default=None) +``` + +Unwieldy: + +```python +description = ( + self._og_search_description(webpage, default=None) + or self._html_search_meta('description', webpage, default=None) + or self._html_search_meta('twitter:description', webpage, default=None)) +``` + +Methods supporting list of patterns are: `_search_regex`, `_html_search_regex`, `_og_search_property`, `_html_search_meta`. + +### Trailing parentheses + +Always move trailing parentheses after the last argument. + +#### Example + +Correct: + +```python + lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'], + list) +``` + +Incorrect: + +```python + lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'], + list, +) +``` + ### Use convenience conversion and parsing functions Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well. diff --git a/ChangeLog b/ChangeLog index 8c938ccd5..9b9e2e149 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2019.08.13 Core * [downloader/fragment] Fix ETA calculation of resumed download (#21992) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 0f7fdb23d..b53a08cae 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.08.02' +__version__ = '2019.08.13' From 0326bcb6c1a45067a804d2f32aa854a3fcd9b4ce Mon Sep 17 00:00:00 2001 From: Chuck Cho Date: Thu, 15 Aug 2019 15:14:47 -0700 Subject: [PATCH 08/16] [piksel] add subtitle capability (#20506) --- youtube_dl/extractor/piksel.py | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/piksel.py b/youtube_dl/extractor/piksel.py index c0c276a50..401298cb8 100644 --- a/youtube_dl/extractor/piksel.py +++ b/youtube_dl/extractor/piksel.py @@ -18,15 +18,14 @@ class PikselIE(InfoExtractor): _VALID_URL = r'https?://player\.piksel\.com/v/(?P[a-z0-9]+)' _TESTS = [ { - 'url': 'http://player.piksel.com/v/nv60p12f', - 'md5': 'd9c17bbe9c3386344f9cfd32fad8d235', + 'url': 'http://player.piksel.com/v/ums2867l', + 'md5': '34e34c8d89dc2559976a6079db531e85', 'info_dict': { - 'id': 'nv60p12f', + 'id': 'ums2867l', 'ext': 'mp4', - 'title': 'فن الحياة - الحلقة 1', - 'description': 'احدث برامج الداعية الاسلامي " مصطفي حسني " فى رمضان 2016علي النهار نور', - 'timestamp': 1465231790, - 'upload_date': '20160606', + 'title': 'GX-005 with Caption', + 'timestamp': 1481335659, + 'upload_date': '20161210' } }, { @@ -39,7 +38,7 @@ class PikselIE(InfoExtractor): 'title': 'WAW- State of Washington vs. Donald J. Trump, et al', 'description': 'State of Washington vs. Donald J. Trump, et al, Case Number 17-CV-00141-JLR, TRO Hearing, Civil Rights Case, 02/3/2017, 1:00 PM (PST), Seattle Federal Courthouse, Seattle, WA, Judge James L. Robart presiding.', 'timestamp': 1486171129, - 'upload_date': '20170204', + 'upload_date': '20170204' } } ] @@ -113,6 +112,13 @@ class PikselIE(InfoExtractor): }) self._sort_formats(formats) + subtitles = {} + for caption in video_data.get('captions', []): + caption_url = caption.get('url') + if caption_url: + subtitles.setdefault(caption.get('locale', 'en'), []).append({ + 'url': caption_url}) + return { 'id': video_id, 'title': title, @@ -120,4 +126,5 @@ class PikselIE(InfoExtractor): 'thumbnail': video_data.get('thumbnailUrl'), 'timestamp': parse_iso8601(video_data.get('dateadd')), 'formats': formats, + 'subtitles': subtitles, } From 0add33abcb9eb3ac93f7af312940b033b4ae4168 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 16 Aug 2019 23:36:23 +0700 Subject: [PATCH 09/16] [youtube] Improve unavailable message extraction (refs #22117) --- youtube_dl/extractor/youtube.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index b63f19bb0..57d76a5a2 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1809,10 +1809,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor): break def extract_unavailable_message(): - return self._html_search_regex( - (r'(?s)]+id=["\']unavailable-submessage["\'][^>]+>(.+?)]+id=["\']unavailable-message["\'][^>]*>(.+?)'), - video_webpage, 'unavailable message', default=None) + messages = [] + for tag, kind in (('h1', 'message'), ('div', 'submessage')): + msg = self._html_search_regex( + r'(?s)<{tag}[^>]+id=["\']unavailable-{kind}["\'][^>]*>(.+?)'.format(tag=tag, kind=kind), + video_webpage, 'unavailable %s' % kind, default=None) + if msg: + messages.append(msg) + if messages: + return '\n'.join(messages) if not video_info: unavailable_message = extract_unavailable_message() From 393cc31d5eba52018b3de4dd76361d79cb1b5f49 Mon Sep 17 00:00:00 2001 From: supritkumar Date: Tue, 20 Aug 2019 22:52:59 -0400 Subject: [PATCH 10/16] [einthusan] Add support for einthusan.ca (#22171) --- youtube_dl/extractor/einthusan.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/einthusan.py b/youtube_dl/extractor/einthusan.py index 1fb00c9b0..4e0f8bc81 100644 --- a/youtube_dl/extractor/einthusan.py +++ b/youtube_dl/extractor/einthusan.py @@ -19,7 +19,7 @@ from ..utils import ( class EinthusanIE(InfoExtractor): - _VALID_URL = r'https?://(?Peinthusan\.(?:tv|com))/movie/watch/(?P[^/?#&]+)' + _VALID_URL = r'https?://(?Peinthusan\.(?:tv|com|ca))/movie/watch/(?P[^/?#&]+)' _TESTS = [{ 'url': 'https://einthusan.tv/movie/watch/9097/', 'md5': 'ff0f7f2065031b8a2cf13a933731c035', @@ -36,6 +36,9 @@ class EinthusanIE(InfoExtractor): }, { 'url': 'https://einthusan.com/movie/watch/9097/', 'only_matching': True, + }, { + 'url': 'https://einthusan.ca/movie/watch/4E9n/?lang=hindi', + 'only_matching': True, }] # reversed from jsoncrypto.prototype.decrypt() in einthusan-PGMovieWatcher.js From 183a18c4e7dad802404e932f3a7c33fad8db7891 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 26 Aug 2019 03:35:09 +0700 Subject: [PATCH 11/16] [usanetwork] Fix extraction (closes #22105) --- youtube_dl/extractor/usanetwork.py | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/youtube_dl/extractor/usanetwork.py b/youtube_dl/extractor/usanetwork.py index 823340776..54c7495cc 100644 --- a/youtube_dl/extractor/usanetwork.py +++ b/youtube_dl/extractor/usanetwork.py @@ -1,11 +1,9 @@ # coding: utf-8 from __future__ import unicode_literals -import re - from .adobepass import AdobePassIE from ..utils import ( - extract_attributes, + NO_DEFAULT, smuggle_url, update_url_query, ) @@ -31,22 +29,22 @@ class USANetworkIE(AdobePassIE): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - player_params = extract_attributes(self._search_regex( - r'(]+data-usa-tve-player-container[^>]*>)', webpage, 'player params')) - video_id = player_params['data-mpx-guid'] - title = player_params['data-episode-title'] + def _x(name, default=NO_DEFAULT): + return self._search_regex( + r'data-%s\s*=\s*(["\'])(?P(?:(?!\1).)+)\1' % name, + webpage, name, default=default, group='value') - account_pid, path = re.search( - r'data-src="(?:https?)?//player\.theplatform\.com/p/([^/]+)/.*?/(media/guid/\d+/\d+)', - webpage).groups() + video_id = _x('mpx-guid') + title = _x('episode-title') + mpx_account_id = _x('mpx-account-id', '2304992029') query = { 'mbr': 'true', } - if player_params.get('data-is-full-episode') == '1': + if _x('is-full-episode', None) == '1': query['manifest'] = 'm3u' - if player_params.get('data-entitlement') == 'auth': + if _x('is-entitlement', None) == '1': adobe_pass = {} drupal_settings = self._search_regex( r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);', @@ -57,7 +55,7 @@ class USANetworkIE(AdobePassIE): adobe_pass = drupal_settings.get('adobePass', {}) resource = self._get_mvpd_resource( adobe_pass.get('adobePassResourceId', 'usa'), - title, video_id, player_params.get('data-episode-rating', 'TV-14')) + title, video_id, _x('episode-rating', 'TV-14')) query['auth'] = self._extract_mvpd_auth( url, video_id, adobe_pass.get('adobePassRequestorId', 'usa'), resource) @@ -65,11 +63,11 @@ class USANetworkIE(AdobePassIE): info.update({ '_type': 'url_transparent', 'url': smuggle_url(update_url_query( - 'http://link.theplatform.com/s/%s/%s' % (account_pid, path), + 'http://link.theplatform.com/s/HNK2IC/media/guid/%s/%s' % (mpx_account_id, video_id), query), {'force_smil_url': True}), 'id': video_id, 'title': title, - 'series': player_params.get('data-show-title'), + 'series': _x('show-title', None), 'episode': title, 'ie_key': 'ThePlatform', }) From d1fcf255c5402d75a3f7b450bd1e795196d5817a Mon Sep 17 00:00:00 2001 From: phan-ctrl <54398886+phan-ctrl@users.noreply.github.com> Date: Tue, 27 Aug 2019 10:16:04 +0700 Subject: [PATCH 12/16] [safari] Fix authentication (closes #22161) (#22184) --- youtube_dl/extractor/safari.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/safari.py b/youtube_dl/extractor/safari.py index 8d4806794..bd9ee1647 100644 --- a/youtube_dl/extractor/safari.py +++ b/youtube_dl/extractor/safari.py @@ -68,9 +68,10 @@ class SafariBaseIE(InfoExtractor): raise ExtractorError( 'Unable to login: %s' % credentials, expected=True) - # oreilly serves two same groot_sessionid cookies in Set-Cookie header - # and expects first one to be actually set - self._apply_first_set_cookie_header(urlh, 'groot_sessionid') + # oreilly serves two same instances of the following cookies + # in Set-Cookie header and expects first one to be actually set + for cookie in ('groot_sessionid', 'orm-jwt', 'orm-rt'): + self._apply_first_set_cookie_header(urlh, cookie) _, urlh = self._download_webpage_handle( auth.get('redirect_uri') or next_uri, None, 'Completing login',) From 494d664e679c5b0f85e3c899579e7eb8a1cc8246 Mon Sep 17 00:00:00 2001 From: sofutru <54445344+sofutru@users.noreply.github.com> Date: Wed, 28 Aug 2019 01:39:59 +0700 Subject: [PATCH 13/16] [youtube] Add support for invidious.nixnet.xyz and yt.elukerio.org (#22223) --- youtube_dl/extractor/youtube.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 57d76a5a2..25d056b3c 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -387,8 +387,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): (?:www\.)?invidious\.enkirton\.net/| (?:www\.)?invidious\.13ad\.de/| (?:www\.)?invidious\.mastodon\.host/| + (?:www\.)?invidious\.nixnet\.xyz/| (?:www\.)?tube\.poal\.co/| (?:www\.)?vid\.wxzm\.sx/| + (?:www\.)?yt\.elukerio\.org/| youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains (?:.*?\#/)? # handle anchor (#/) redirect urls (?: # the various things that can precede the ID: From b72305f07892daa287bd51b70dd8eeeed627e7d6 Mon Sep 17 00:00:00 2001 From: Jay Date: Mon, 26 Aug 2019 23:16:18 +0800 Subject: [PATCH 14/16] [bbccouk] Extend _VALID_URL (closes #19200) --- youtube_dl/extractor/bbc.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index e76507951..3f820eed2 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -40,6 +40,7 @@ class BBCCoUkIE(InfoExtractor): iplayer(?:/[^/]+)?/(?:episode/|playlist/)| music/(?:clips|audiovideo/popular)[/#]| radio/player/| + sounds/play/| events/[^/]+/play/[^/]+/ ) (?P%s)(?!/(?:episodes|broadcasts|clips)) @@ -220,6 +221,20 @@ class BBCCoUkIE(InfoExtractor): # rtmp download 'skip_download': True, }, + }, { + 'url': 'https://www.bbc.co.uk/sounds/play/m0007jzb', + 'note': 'Audio', + 'info_dict': { + 'id': 'm0007jz9', + 'ext': 'mp4', + 'title': 'BBC Proms, 2019, Prom 34: West–Eastern Divan Orchestra', + 'description': "Live BBC Proms. West–Eastern Divan Orchestra with Daniel Barenboim and Martha Argerich.", + 'duration': 9840, + }, + 'params': { + # rtmp download + 'skip_download': True, + } }, { 'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4', 'only_matching': True, From acc86c9a978a916fed1d1ed9ecf201d2c1a3060c Mon Sep 17 00:00:00 2001 From: Jay Date: Mon, 26 Aug 2019 23:04:38 +0800 Subject: [PATCH 15/16] [bbc] Fix some tests --- youtube_dl/extractor/bbc.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index 3f820eed2..901c5a54f 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -71,7 +71,7 @@ class BBCCoUkIE(InfoExtractor): 'info_dict': { 'id': 'b039d07m', 'ext': 'flv', - 'title': 'Leonard Cohen, Kaleidoscope - BBC Radio 4', + 'title': 'Kaleidoscope, Leonard Cohen', 'description': 'The Canadian poet and songwriter reflects on his musical career.', }, 'params': { @@ -624,7 +624,7 @@ class BBCIE(BBCCoUkIE): 'url': 'http://www.bbc.com/news/world-europe-32668511', 'info_dict': { 'id': 'world-europe-32668511', - 'title': 'Russia stages massive WW2 parade despite Western boycott', + 'title': 'Russia stages massive WW2 parade', 'description': 'md5:00ff61976f6081841f759a08bf78cc9c', }, 'playlist_count': 2, From b500955a58efb019133b3214ea81635342728f1f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 28 Aug 2019 01:58:07 +0700 Subject: [PATCH 16/16] [openload] Add support for oload.vip (closes #22205) --- youtube_dl/extractor/openload.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 030355257..ab4980d4d 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -243,7 +243,12 @@ class PhantomJSwrapper(object): class OpenloadIE(InfoExtractor): - _DOMAINS = r'(?:openload\.(?:co|io|link|pw)|oload\.(?:tv|best|biz|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|press|pw|life|live|space|services|website)|oladblock\.(?:services|xyz|me)|openloed\.co)' + _DOMAINS = r'''(?x) + (?: + openload\.(?:co|io|link|pw)| + oload\.(?:tv|best|biz|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|press|pw|life|live|space|services|website|vip)| + oladblock\.(?:services|xyz|me)|openloed\.co) + ''' _VALID_URL = r'''(?x) https?:// (?P @@ -383,6 +388,9 @@ class OpenloadIE(InfoExtractor): }, { 'url': 'https://openloed.co/f/b8NWEgkqNLI/', 'only_matching': True, + }, { + 'url': 'https://oload.vip/f/kUEfGclsU9o', + 'only_matching': True, }] @classmethod