diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md
index 4d3894ad3..4c75c8d5d 100644
--- a/.github/ISSUE_TEMPLATE/1_broken_site.md
+++ b/.github/ISSUE_TEMPLATE/1_broken_site.md
@@ -18,7 +18,7 @@ title: ''
- [ ] I'm reporting a broken site support
-- [ ] I've verified that I'm running youtube-dl version **2019.08.02**
+- [ ] I've verified that I'm running youtube-dl version **2019.08.13**
- [ ] I've checked that all provided URLs are alive and playable in a browser
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
- [ ] I've searched the bugtracker for similar issues including closed ones
@@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
[debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
- [debug] youtube-dl version 2019.08.02
+ [debug] youtube-dl version 2019.08.13
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {}
diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md
index 796e11e54..8e8c43c47 100644
--- a/.github/ISSUE_TEMPLATE/2_site_support_request.md
+++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md
@@ -19,7 +19,7 @@ labels: 'site-support-request'
- [ ] I'm reporting a new site support request
-- [ ] I've verified that I'm running youtube-dl version **2019.08.02**
+- [ ] I've verified that I'm running youtube-dl version **2019.08.13**
- [ ] I've checked that all provided URLs are alive and playable in a browser
- [ ] I've checked that none of provided URLs violate any copyrights
- [ ] I've searched the bugtracker for similar site support requests including closed ones
diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md
index aa2348548..df719a29c 100644
--- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md
+++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md
@@ -18,13 +18,13 @@ title: ''
- [ ] I'm reporting a site feature request
-- [ ] I've verified that I'm running youtube-dl version **2019.08.02**
+- [ ] I've verified that I'm running youtube-dl version **2019.08.13**
- [ ] I've searched the bugtracker for similar site feature requests including closed ones
diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md
index 5b2501a65..3616db1a7 100644
--- a/.github/ISSUE_TEMPLATE/4_bug_report.md
+++ b/.github/ISSUE_TEMPLATE/4_bug_report.md
@@ -18,7 +18,7 @@ title: ''
- [ ] I'm reporting a broken site support issue
-- [ ] I've verified that I'm running youtube-dl version **2019.08.02**
+- [ ] I've verified that I'm running youtube-dl version **2019.08.13**
- [ ] I've checked that all provided URLs are alive and playable in a browser
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
- [ ] I've searched the bugtracker for similar bug reports including closed ones
@@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
[debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
- [debug] youtube-dl version 2019.08.02
+ [debug] youtube-dl version 2019.08.13
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {}
diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md
index d1758a95c..0fa37aef1 100644
--- a/.github/ISSUE_TEMPLATE/5_feature_request.md
+++ b/.github/ISSUE_TEMPLATE/5_feature_request.md
@@ -19,13 +19,13 @@ labels: 'request'
- [ ] I'm reporting a feature request
-- [ ] I've verified that I'm running youtube-dl version **2019.08.02**
+- [ ] I've verified that I'm running youtube-dl version **2019.08.13**
- [ ] I've searched the bugtracker for similar feature requests including closed ones
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index cd9ccbe96..ac759ddc4 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -339,6 +339,72 @@ Incorrect:
'PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4'
```
+### Inline values
+
+Extracting variables is acceptable for reducing code duplication and improving readability of complex expressions. However, you should avoid extracting variables used only once and moving them to opposite parts of the extractor file, which makes reading the linear flow difficult.
+
+#### Example
+
+Correct:
+
+```python
+title = self._html_search_regex(r'
([^<]+)', webpage, 'title')
+```
+
+Incorrect:
+
+```python
+TITLE_RE = r'([^<]+)'
+# ...some lines of code...
+title = self._html_search_regex(TITLE_RE, webpage, 'title')
+```
+
+### Collapse fallbacks
+
+Multiple fallback values can quickly become unwieldy. Collapse multiple fallback values into a single expression via a list of patterns.
+
+#### Example
+
+Good:
+
+```python
+description = self._html_search_meta(
+ ['og:description', 'description', 'twitter:description'],
+ webpage, 'description', default=None)
+```
+
+Unwieldy:
+
+```python
+description = (
+ self._og_search_description(webpage, default=None)
+ or self._html_search_meta('description', webpage, default=None)
+ or self._html_search_meta('twitter:description', webpage, default=None))
+```
+
+Methods supporting list of patterns are: `_search_regex`, `_html_search_regex`, `_og_search_property`, `_html_search_meta`.
+
+### Trailing parentheses
+
+Always move trailing parentheses after the last argument.
+
+#### Example
+
+Correct:
+
+```python
+ lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'],
+ list)
+```
+
+Incorrect:
+
+```python
+ lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'],
+ list,
+)
+```
+
### Use convenience conversion and parsing functions
Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
diff --git a/ChangeLog b/ChangeLog
index 7db147498..9b9e2e149 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,19 @@
+version 2019.08.13
+
+Core
+* [downloader/fragment] Fix ETA calculation of resumed download (#21992)
+* [YoutubeDL] Check annotations availability (#18582)
+
+Extractors
+* [youtube:playlist] Improve flat extraction (#21927)
+* [youtube] Fix annotations extraction (#22045)
++ [discovery] Extract series meta field (#21808)
+* [youtube] Improve error detection (#16445)
+* [vimeo] Fix album extraction (#1933, #15704, #15855, #18967, #21986)
++ [roosterteeth] Add support for watch URLs
+* [discovery] Limit video data by show slug (#21980)
+
+
version 2019.08.02
Extractors
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 3e832fec2..6a44bc7ba 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -1783,6 +1783,8 @@ class YoutubeDL(object):
annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
self.to_screen('[info] Video annotations are already present')
+ elif not info_dict.get('annotations'):
+ self.report_warning('There are no annotations to write.')
else:
try:
self.to_screen('[info] Writing video annotations to: ' + annofn)
diff --git a/youtube_dl/downloader/fragment.py b/youtube_dl/downloader/fragment.py
index f2e5733b6..02f35459e 100644
--- a/youtube_dl/downloader/fragment.py
+++ b/youtube_dl/downloader/fragment.py
@@ -190,12 +190,13 @@ class FragmentFD(FileDownloader):
})
def _start_frag_download(self, ctx):
+ resume_len = ctx['complete_frags_downloaded_bytes']
total_frags = ctx['total_frags']
# This dict stores the download progress, it's updated by the progress
# hook
state = {
'status': 'downloading',
- 'downloaded_bytes': ctx['complete_frags_downloaded_bytes'],
+ 'downloaded_bytes': resume_len,
'fragment_index': ctx['fragment_index'],
'fragment_count': total_frags,
'filename': ctx['filename'],
@@ -234,8 +235,8 @@ class FragmentFD(FileDownloader):
state['downloaded_bytes'] += frag_downloaded_bytes - ctx['prev_frag_downloaded_bytes']
if not ctx['live']:
state['eta'] = self.calc_eta(
- start, time_now, estimated_size,
- state['downloaded_bytes'])
+ start, time_now, estimated_size - resume_len,
+ state['downloaded_bytes'] - resume_len)
state['speed'] = s.get('speed') or ctx.get('speed')
ctx['speed'] = state['speed']
ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes
diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py
index e76507951..901c5a54f 100644
--- a/youtube_dl/extractor/bbc.py
+++ b/youtube_dl/extractor/bbc.py
@@ -40,6 +40,7 @@ class BBCCoUkIE(InfoExtractor):
iplayer(?:/[^/]+)?/(?:episode/|playlist/)|
music/(?:clips|audiovideo/popular)[/#]|
radio/player/|
+ sounds/play/|
events/[^/]+/play/[^/]+/
)
(?P%s)(?!/(?:episodes|broadcasts|clips))
@@ -70,7 +71,7 @@ class BBCCoUkIE(InfoExtractor):
'info_dict': {
'id': 'b039d07m',
'ext': 'flv',
- 'title': 'Leonard Cohen, Kaleidoscope - BBC Radio 4',
+ 'title': 'Kaleidoscope, Leonard Cohen',
'description': 'The Canadian poet and songwriter reflects on his musical career.',
},
'params': {
@@ -220,6 +221,20 @@ class BBCCoUkIE(InfoExtractor):
# rtmp download
'skip_download': True,
},
+ }, {
+ 'url': 'https://www.bbc.co.uk/sounds/play/m0007jzb',
+ 'note': 'Audio',
+ 'info_dict': {
+ 'id': 'm0007jz9',
+ 'ext': 'mp4',
+ 'title': 'BBC Proms, 2019, Prom 34: West–Eastern Divan Orchestra',
+ 'description': "Live BBC Proms. West–Eastern Divan Orchestra with Daniel Barenboim and Martha Argerich.",
+ 'duration': 9840,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ }
}, {
'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4',
'only_matching': True,
@@ -609,7 +624,7 @@ class BBCIE(BBCCoUkIE):
'url': 'http://www.bbc.com/news/world-europe-32668511',
'info_dict': {
'id': 'world-europe-32668511',
- 'title': 'Russia stages massive WW2 parade despite Western boycott',
+ 'title': 'Russia stages massive WW2 parade',
'description': 'md5:00ff61976f6081841f759a08bf78cc9c',
},
'playlist_count': 2,
diff --git a/youtube_dl/extractor/discovery.py b/youtube_dl/extractor/discovery.py
index 6287ca685..6a2712cc5 100644
--- a/youtube_dl/extractor/discovery.py
+++ b/youtube_dl/extractor/discovery.py
@@ -94,6 +94,8 @@ class DiscoveryIE(DiscoveryGoBaseIE):
self._API_BASE_URL + 'content/videos',
display_id, 'Downloading content JSON metadata',
headers=headers, query={
+ 'embed': 'show.name',
+ 'fields': 'authenticated,description.detailed,duration,episodeNumber,id,name,parental.rating,season.number,show,tags',
'slug': display_id,
'show_slug': show_slug,
})[0]
diff --git a/youtube_dl/extractor/einthusan.py b/youtube_dl/extractor/einthusan.py
index 1fb00c9b0..4e0f8bc81 100644
--- a/youtube_dl/extractor/einthusan.py
+++ b/youtube_dl/extractor/einthusan.py
@@ -19,7 +19,7 @@ from ..utils import (
class EinthusanIE(InfoExtractor):
- _VALID_URL = r'https?://(?Peinthusan\.(?:tv|com))/movie/watch/(?P[^/?#&]+)'
+ _VALID_URL = r'https?://(?Peinthusan\.(?:tv|com|ca))/movie/watch/(?P[^/?#&]+)'
_TESTS = [{
'url': 'https://einthusan.tv/movie/watch/9097/',
'md5': 'ff0f7f2065031b8a2cf13a933731c035',
@@ -36,6 +36,9 @@ class EinthusanIE(InfoExtractor):
}, {
'url': 'https://einthusan.com/movie/watch/9097/',
'only_matching': True,
+ }, {
+ 'url': 'https://einthusan.ca/movie/watch/4E9n/?lang=hindi',
+ 'only_matching': True,
}]
# reversed from jsoncrypto.prototype.decrypt() in einthusan-PGMovieWatcher.js
diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py
index 030355257..ab4980d4d 100644
--- a/youtube_dl/extractor/openload.py
+++ b/youtube_dl/extractor/openload.py
@@ -243,7 +243,12 @@ class PhantomJSwrapper(object):
class OpenloadIE(InfoExtractor):
- _DOMAINS = r'(?:openload\.(?:co|io|link|pw)|oload\.(?:tv|best|biz|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|press|pw|life|live|space|services|website)|oladblock\.(?:services|xyz|me)|openloed\.co)'
+ _DOMAINS = r'''(?x)
+ (?:
+ openload\.(?:co|io|link|pw)|
+ oload\.(?:tv|best|biz|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|press|pw|life|live|space|services|website|vip)|
+ oladblock\.(?:services|xyz|me)|openloed\.co)
+ '''
_VALID_URL = r'''(?x)
https?://
(?P
@@ -383,6 +388,9 @@ class OpenloadIE(InfoExtractor):
}, {
'url': 'https://openloed.co/f/b8NWEgkqNLI/',
'only_matching': True,
+ }, {
+ 'url': 'https://oload.vip/f/kUEfGclsU9o',
+ 'only_matching': True,
}]
@classmethod
diff --git a/youtube_dl/extractor/piksel.py b/youtube_dl/extractor/piksel.py
index c0c276a50..401298cb8 100644
--- a/youtube_dl/extractor/piksel.py
+++ b/youtube_dl/extractor/piksel.py
@@ -18,15 +18,14 @@ class PikselIE(InfoExtractor):
_VALID_URL = r'https?://player\.piksel\.com/v/(?P[a-z0-9]+)'
_TESTS = [
{
- 'url': 'http://player.piksel.com/v/nv60p12f',
- 'md5': 'd9c17bbe9c3386344f9cfd32fad8d235',
+ 'url': 'http://player.piksel.com/v/ums2867l',
+ 'md5': '34e34c8d89dc2559976a6079db531e85',
'info_dict': {
- 'id': 'nv60p12f',
+ 'id': 'ums2867l',
'ext': 'mp4',
- 'title': 'فن الحياة - الحلقة 1',
- 'description': 'احدث برامج الداعية الاسلامي " مصطفي حسني " فى رمضان 2016علي النهار نور',
- 'timestamp': 1465231790,
- 'upload_date': '20160606',
+ 'title': 'GX-005 with Caption',
+ 'timestamp': 1481335659,
+ 'upload_date': '20161210'
}
},
{
@@ -39,7 +38,7 @@ class PikselIE(InfoExtractor):
'title': 'WAW- State of Washington vs. Donald J. Trump, et al',
'description': 'State of Washington vs. Donald J. Trump, et al, Case Number 17-CV-00141-JLR, TRO Hearing, Civil Rights Case, 02/3/2017, 1:00 PM (PST), Seattle Federal Courthouse, Seattle, WA, Judge James L. Robart presiding.',
'timestamp': 1486171129,
- 'upload_date': '20170204',
+ 'upload_date': '20170204'
}
}
]
@@ -113,6 +112,13 @@ class PikselIE(InfoExtractor):
})
self._sort_formats(formats)
+ subtitles = {}
+ for caption in video_data.get('captions', []):
+ caption_url = caption.get('url')
+ if caption_url:
+ subtitles.setdefault(caption.get('locale', 'en'), []).append({
+ 'url': caption_url})
+
return {
'id': video_id,
'title': title,
@@ -120,4 +126,5 @@ class PikselIE(InfoExtractor):
'thumbnail': video_data.get('thumbnailUrl'),
'timestamp': parse_iso8601(video_data.get('dateadd')),
'formats': formats,
+ 'subtitles': subtitles,
}
diff --git a/youtube_dl/extractor/safari.py b/youtube_dl/extractor/safari.py
index 8d4806794..bd9ee1647 100644
--- a/youtube_dl/extractor/safari.py
+++ b/youtube_dl/extractor/safari.py
@@ -68,9 +68,10 @@ class SafariBaseIE(InfoExtractor):
raise ExtractorError(
'Unable to login: %s' % credentials, expected=True)
- # oreilly serves two same groot_sessionid cookies in Set-Cookie header
- # and expects first one to be actually set
- self._apply_first_set_cookie_header(urlh, 'groot_sessionid')
+ # oreilly serves two same instances of the following cookies
+ # in Set-Cookie header and expects first one to be actually set
+ for cookie in ('groot_sessionid', 'orm-jwt', 'orm-rt'):
+ self._apply_first_set_cookie_header(urlh, cookie)
_, urlh = self._download_webpage_handle(
auth.get('redirect_uri') or next_uri, None, 'Completing login',)
diff --git a/youtube_dl/extractor/usanetwork.py b/youtube_dl/extractor/usanetwork.py
index 823340776..54c7495cc 100644
--- a/youtube_dl/extractor/usanetwork.py
+++ b/youtube_dl/extractor/usanetwork.py
@@ -1,11 +1,9 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
-
from .adobepass import AdobePassIE
from ..utils import (
- extract_attributes,
+ NO_DEFAULT,
smuggle_url,
update_url_query,
)
@@ -31,22 +29,22 @@ class USANetworkIE(AdobePassIE):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
- player_params = extract_attributes(self._search_regex(
- r'(]+data-usa-tve-player-container[^>]*>)', webpage, 'player params'))
- video_id = player_params['data-mpx-guid']
- title = player_params['data-episode-title']
+ def _x(name, default=NO_DEFAULT):
+ return self._search_regex(
+ r'data-%s\s*=\s*(["\'])(?P
(?:(?!\1).)+)\1' % name,
+ webpage, name, default=default, group='value')
- account_pid, path = re.search(
- r'data-src="(?:https?)?//player\.theplatform\.com/p/([^/]+)/.*?/(media/guid/\d+/\d+)',
- webpage).groups()
+ video_id = _x('mpx-guid')
+ title = _x('episode-title')
+ mpx_account_id = _x('mpx-account-id', '2304992029')
query = {
'mbr': 'true',
}
- if player_params.get('data-is-full-episode') == '1':
+ if _x('is-full-episode', None) == '1':
query['manifest'] = 'm3u'
- if player_params.get('data-entitlement') == 'auth':
+ if _x('is-entitlement', None) == '1':
adobe_pass = {}
drupal_settings = self._search_regex(
r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
@@ -57,7 +55,7 @@ class USANetworkIE(AdobePassIE):
adobe_pass = drupal_settings.get('adobePass', {})
resource = self._get_mvpd_resource(
adobe_pass.get('adobePassResourceId', 'usa'),
- title, video_id, player_params.get('data-episode-rating', 'TV-14'))
+ title, video_id, _x('episode-rating', 'TV-14'))
query['auth'] = self._extract_mvpd_auth(
url, video_id, adobe_pass.get('adobePassRequestorId', 'usa'), resource)
@@ -65,11 +63,11 @@ class USANetworkIE(AdobePassIE):
info.update({
'_type': 'url_transparent',
'url': smuggle_url(update_url_query(
- 'http://link.theplatform.com/s/%s/%s' % (account_pid, path),
+ 'http://link.theplatform.com/s/HNK2IC/media/guid/%s/%s' % (mpx_account_id, video_id),
query), {'force_smil_url': True}),
'id': video_id,
'title': title,
- 'series': player_params.get('data-show-title'),
+ 'series': _x('show-title', None),
'episode': title,
'ie_key': 'ThePlatform',
})
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 44740cb75..25d056b3c 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -31,6 +31,7 @@ from ..utils import (
clean_html,
dict_get,
error_to_compat_str,
+ extract_attributes,
ExtractorError,
float_or_none,
get_element_by_attribute,
@@ -324,17 +325,18 @@ class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
for video_id, video_title in self.extract_videos_from_page(content):
yield self.url_result(video_id, 'Youtube', video_id, video_title)
- def extract_videos_from_page(self, page):
- ids_in_page = []
- titles_in_page = []
- for mobj in re.finditer(self._VIDEO_RE, page):
+ def extract_videos_from_page_impl(self, video_re, page, ids_in_page, titles_in_page):
+ for mobj in re.finditer(video_re, page):
# The link with index 0 is not the first video of the playlist (not sure if still actual)
if 'index' in mobj.groupdict() and mobj.group('id') == '0':
continue
video_id = mobj.group('id')
- video_title = unescapeHTML(mobj.group('title'))
+ video_title = unescapeHTML(
+ mobj.group('title')) if 'title' in mobj.groupdict() else None
if video_title:
video_title = video_title.strip()
+ if video_title == '► Play all':
+ video_title = None
try:
idx = ids_in_page.index(video_id)
if video_title and not titles_in_page[idx]:
@@ -342,6 +344,12 @@ class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
except ValueError:
ids_in_page.append(video_id)
titles_in_page.append(video_title)
+
+ def extract_videos_from_page(self, page):
+ ids_in_page = []
+ titles_in_page = []
+ self.extract_videos_from_page_impl(
+ self._VIDEO_RE, page, ids_in_page, titles_in_page)
return zip(ids_in_page, titles_in_page)
@@ -379,8 +387,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
(?:www\.)?invidious\.enkirton\.net/|
(?:www\.)?invidious\.13ad\.de/|
(?:www\.)?invidious\.mastodon\.host/|
+ (?:www\.)?invidious\.nixnet\.xyz/|
(?:www\.)?tube\.poal\.co/|
(?:www\.)?vid\.wxzm\.sx/|
+ (?:www\.)?yt\.elukerio\.org/|
youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
(?:.*?\#/)? # handle anchor (#/) redirect urls
(?: # the various things that can precede the ID:
@@ -1595,17 +1605,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
video_id = mobj.group(2)
return video_id
- def _extract_annotations(self, video_id):
- return self._download_webpage(
- 'https://www.youtube.com/annotations_invideo', video_id,
- note='Downloading annotations',
- errnote='Unable to download video annotations', fatal=False,
- query={
- 'features': 1,
- 'legacy': 1,
- 'video_id': video_id,
- })
-
@staticmethod
def _extract_chapters(description, duration):
if not description:
@@ -1812,10 +1811,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
break
def extract_unavailable_message():
- return self._html_search_regex(
- (r'(?s)]+id=["\']unavailable-submessage["\'][^>]+>(.+?)
]+id=["\']unavailable-message["\'][^>]*>(.+?)'),
- video_webpage, 'unavailable message', default=None)
+ messages = []
+ for tag, kind in (('h1', 'message'), ('div', 'submessage')):
+ msg = self._html_search_regex(
+ r'(?s)<{tag}[^>]+id=["\']unavailable-{kind}["\'][^>]*>(.+?){tag}>'.format(tag=tag, kind=kind),
+ video_webpage, 'unavailable %s' % kind, default=None)
+ if msg:
+ messages.append(msg)
+ if messages:
+ return '\n'.join(messages)
if not video_info:
unavailable_message = extract_unavailable_message()
@@ -2277,7 +2281,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# annotations
video_annotations = None
if self._downloader.params.get('writeannotations', False):
- video_annotations = self._extract_annotations(video_id)
+ xsrf_token = self._search_regex(
+ r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P[A-Za-z0-9+/=]+)\2',
+ video_webpage, 'xsrf token', group='xsrf_token', fatal=False)
+ invideo_url = try_get(
+ player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
+ if xsrf_token and invideo_url:
+ xsrf_field_name = self._search_regex(
+ r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P\w+)\2',
+ video_webpage, 'xsrf field name',
+ group='xsrf_field_name', default='session_token')
+ video_annotations = self._download_webpage(
+ self._proto_relative_url(invideo_url),
+ video_id, note='Downloading annotations',
+ errnote='Unable to download video annotations', fatal=False,
+ data=urlencode_postdata({xsrf_field_name: xsrf_token}))
chapters = self._extract_chapters(description_original, video_duration)
@@ -2435,7 +2453,8 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
(%(playlist_id)s)
)""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
_TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
- _VIDEO_RE = r'href="\s*/watch\?v=(?P[0-9A-Za-z_-]{11})(?:&(?:[^"]*?index=(?P\d+))?(?:[^>]+>(?P[^<]+))?)?'
+ _VIDEO_RE_TPL = r'href="\s*/watch\?v=%s(?:&(?:[^"]*?index=(?P\d+))?(?:[^>]+>(?P[^<]+))?)?'
+ _VIDEO_RE = _VIDEO_RE_TPL % r'(?P[0-9A-Za-z_-]{11})'
IE_NAME = 'youtube:playlist'
_TESTS = [{
'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
@@ -2600,6 +2619,34 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
def _real_initialize(self):
self._login()
+ def extract_videos_from_page(self, page):
+ ids_in_page = []
+ titles_in_page = []
+
+ for item in re.findall(
+ r'(<[^>]*\bdata-video-id\s*=\s*["\'][0-9A-Za-z_-]{11}[^>]+>)', page):
+ attrs = extract_attributes(item)
+ video_id = attrs['data-video-id']
+ video_title = unescapeHTML(attrs.get('data-title'))
+ if video_title:
+ video_title = video_title.strip()
+ ids_in_page.append(video_id)
+ titles_in_page.append(video_title)
+
+ # Fallback with old _VIDEO_RE
+ self.extract_videos_from_page_impl(
+ self._VIDEO_RE, page, ids_in_page, titles_in_page)
+
+ # Relaxed fallbacks
+ self.extract_videos_from_page_impl(
+ r'href="\s*/watch\?v\s*=\s*(?P[0-9A-Za-z_-]{11})', page,
+ ids_in_page, titles_in_page)
+ self.extract_videos_from_page_impl(
+ r'data-video-ids\s*=\s*["\'](?P[0-9A-Za-z_-]{11})', page,
+ ids_in_page, titles_in_page)
+
+ return zip(ids_in_page, titles_in_page)
+
def _extract_mix(self, playlist_id):
# The mixes are generated from a single video
# the id of the playlist is just 'RD' + video_id
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index 0f7fdb23d..b53a08cae 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
from __future__ import unicode_literals
-__version__ = '2019.08.02'
+__version__ = '2019.08.13'