1
0
mirror of https://codeberg.org/polarisfm/youtube-dl synced 2025-01-10 07:07:55 +01:00

Merge remote-tracking branch 'upstream/master' into fix/niconico

This commit is contained in:
tsukumijima 2020-03-07 14:48:30 +09:00
commit b15e0fe056
38 changed files with 381 additions and 186 deletions

View File

@ -18,7 +18,7 @@ title: ''
<!-- <!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.02.16. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.03.06. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape. - Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates. - Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
@ -26,7 +26,7 @@ Carefully read and work through this check list in order to prevent the most com
--> -->
- [ ] I'm reporting a broken site support - [ ] I'm reporting a broken site support
- [ ] I've verified that I'm running youtube-dl version **2020.02.16** - [ ] I've verified that I'm running youtube-dl version **2020.03.06**
- [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all provided URLs are alive and playable in a browser
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
- [ ] I've searched the bugtracker for similar issues including closed ones - [ ] I've searched the bugtracker for similar issues including closed ones
@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
[debug] User config: [] [debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2020.02.16 [debug] youtube-dl version 2020.03.06
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {} [debug] Proxy map: {}

View File

@ -19,7 +19,7 @@ labels: 'site-support-request'
<!-- <!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.02.16. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.03.06. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
- Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights. - Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights.
- Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
--> -->
- [ ] I'm reporting a new site support request - [ ] I'm reporting a new site support request
- [ ] I've verified that I'm running youtube-dl version **2020.02.16** - [ ] I've verified that I'm running youtube-dl version **2020.03.06**
- [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all provided URLs are alive and playable in a browser
- [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've checked that none of provided URLs violate any copyrights
- [ ] I've searched the bugtracker for similar site support requests including closed ones - [ ] I've searched the bugtracker for similar site support requests including closed ones

View File

@ -18,13 +18,13 @@ title: ''
<!-- <!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.02.16. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.03.06. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
- Finally, put x into all relevant boxes (like this [x]) - Finally, put x into all relevant boxes (like this [x])
--> -->
- [ ] I'm reporting a site feature request - [ ] I'm reporting a site feature request
- [ ] I've verified that I'm running youtube-dl version **2020.02.16** - [ ] I've verified that I'm running youtube-dl version **2020.03.06**
- [ ] I've searched the bugtracker for similar site feature requests including closed ones - [ ] I've searched the bugtracker for similar site feature requests including closed ones

View File

@ -18,7 +18,7 @@ title: ''
<!-- <!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.02.16. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.03.06. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape. - Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates. - Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
--> -->
- [ ] I'm reporting a broken site support issue - [ ] I'm reporting a broken site support issue
- [ ] I've verified that I'm running youtube-dl version **2020.02.16** - [ ] I've verified that I'm running youtube-dl version **2020.03.06**
- [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all provided URLs are alive and playable in a browser
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
- [ ] I've searched the bugtracker for similar bug reports including closed ones - [ ] I've searched the bugtracker for similar bug reports including closed ones
@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
[debug] User config: [] [debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2020.02.16 [debug] youtube-dl version 2020.03.06
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {} [debug] Proxy map: {}

View File

@ -19,13 +19,13 @@ labels: 'request'
<!-- <!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.02.16. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.03.06. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
- Finally, put x into all relevant boxes (like this [x]) - Finally, put x into all relevant boxes (like this [x])
--> -->
- [ ] I'm reporting a feature request - [ ] I'm reporting a feature request
- [ ] I've verified that I'm running youtube-dl version **2020.02.16** - [ ] I've verified that I'm running youtube-dl version **2020.03.06**
- [ ] I've searched the bugtracker for similar feature requests including closed ones - [ ] I've searched the bugtracker for similar feature requests including closed ones

View File

@ -1,3 +1,35 @@
version 2020.03.06
Extractors
* [youtube] Fix age-gated videos support without login (#24248)
* [vimeo] Fix showcase password protected video extraction (#24224)
* [pornhub] Improve title extraction (#24184)
* [peertube] Improve extraction (#23657)
+ [servus] Add support for new URL schema (#23475, #23583, #24142)
* [vimeo] Fix subtitles URLs (#24209)
version 2020.03.01
Core
* [YoutubeDL] Force redirect URL to unicode on python 2
- [options] Remove duplicate short option -v for --version (#24162)
Extractors
* [xhamster] Fix extraction (#24205)
* [franceculture] Fix extraction (#24204)
+ [telecinco] Add support for article opening videos
* [telecinco] Fix extraction (#24195)
* [xtube] Fix metadata extraction (#21073, #22455)
* [youjizz] Fix extraction (#24181)
- Remove no longer needed compat_str around geturl
* [pornhd] Fix extraction (#24128)
+ [teachable] Add support for multiple videos per lecture (#24101)
+ [wistia] Add support for multiple generic embeds (#8347, 11385)
* [imdb] Fix extraction (#23443)
* [tv2dk:bornholm:play] Fix extraction (#24076)
version 2020.02.16 version 2020.02.16
Core Core

View File

@ -26,7 +26,6 @@ from youtube_dl.extractor import (
ThePlatformIE, ThePlatformIE,
ThePlatformFeedIE, ThePlatformFeedIE,
RTVEALaCartaIE, RTVEALaCartaIE,
FunnyOrDieIE,
DemocracynowIE, DemocracynowIE,
) )
@ -322,18 +321,6 @@ class TestRtveSubtitles(BaseTestSubtitles):
self.assertEqual(md5(subtitles['es']), '69e70cae2d40574fb7316f31d6eb7fca') self.assertEqual(md5(subtitles['es']), '69e70cae2d40574fb7316f31d6eb7fca')
class TestFunnyOrDieSubtitles(BaseTestSubtitles):
url = 'http://www.funnyordie.com/videos/224829ff6d/judd-apatow-will-direct-your-vine'
IE = FunnyOrDieIE
def test_allsubtitles(self):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(set(subtitles.keys()), set(['en']))
self.assertEqual(md5(subtitles['en']), 'c5593c193eacd353596c11c2d4f9ecc4')
class TestDemocracynowSubtitles(BaseTestSubtitles): class TestDemocracynowSubtitles(BaseTestSubtitles):
url = 'http://www.democracynow.org/shows/2015/7/3' url = 'http://www.democracynow.org/shows/2015/7/3'
IE = DemocracynowIE IE = DemocracynowIE

View File

@ -92,6 +92,7 @@ from .utils import (
YoutubeDLCookieJar, YoutubeDLCookieJar,
YoutubeDLCookieProcessor, YoutubeDLCookieProcessor,
YoutubeDLHandler, YoutubeDLHandler,
YoutubeDLRedirectHandler,
) )
from .cache import Cache from .cache import Cache
from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
@ -2343,6 +2344,7 @@ class YoutubeDL(object):
debuglevel = 1 if self.params.get('debug_printtraffic') else 0 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel) https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel) ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
redirect_handler = YoutubeDLRedirectHandler()
data_handler = compat_urllib_request_DataHandler() data_handler = compat_urllib_request_DataHandler()
# When passing our own FileHandler instance, build_opener won't add the # When passing our own FileHandler instance, build_opener won't add the
@ -2356,7 +2358,7 @@ class YoutubeDL(object):
file_handler.file_open = file_open file_handler.file_open = file_open
opener = compat_urllib_request.build_opener( opener = compat_urllib_request.build_opener(
proxy_handler, https_handler, cookie_processor, ydlh, data_handler, file_handler) proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
# Delete the default user-agent header, which would otherwise apply in # Delete the default user-agent header, which would otherwise apply in
# cases where our custom HTTP handler doesn't come into play # cases where our custom HTTP handler doesn't come into play

View File

@ -4,7 +4,6 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str
from ..utils import ( from ..utils import (
encode_base_n, encode_base_n,
ExtractorError, ExtractorError,
@ -55,7 +54,7 @@ class EpornerIE(InfoExtractor):
webpage, urlh = self._download_webpage_handle(url, display_id) webpage, urlh = self._download_webpage_handle(url, display_id)
video_id = self._match_id(compat_str(urlh.geturl())) video_id = self._match_id(urlh.geturl())
hash = self._search_regex( hash = self._search_regex(
r'hash\s*:\s*["\']([\da-f]{32})', webpage, 'hash') r'hash\s*:\s*["\']([\da-f]{32})', webpage, 'hash')

View File

@ -31,7 +31,13 @@ class FranceCultureIE(InfoExtractor):
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
video_data = extract_attributes(self._search_regex( video_data = extract_attributes(self._search_regex(
r'(?s)<div[^>]+class="[^"]*?(?:title-zone-diffusion|heading-zone-(?:wrapper|player-button))[^"]*?"[^>]*>.*?(<button[^>]+data-asset-source="[^"]+"[^>]+>)', r'''(?sx)
(?:
</h1>|
<div[^>]+class="[^"]*?(?:title-zone-diffusion|heading-zone-(?:wrapper|player-button))[^"]*?"[^>]*>
).*?
(<button[^>]+data-asset-source="[^"]+"[^>]+>)
''',
webpage, 'video data')) webpage, 'video data'))
video_url = video_data['data-asset-source'] video_url = video_data['data-asset-source']

View File

@ -2287,7 +2287,7 @@ class GenericIE(InfoExtractor):
if head_response is not False: if head_response is not False:
# Check for redirect # Check for redirect
new_url = compat_str(head_response.geturl()) new_url = head_response.geturl()
if url != new_url: if url != new_url:
self.report_following_redirect(new_url) self.report_following_redirect(new_url)
if force_videoid: if force_videoid:
@ -2387,12 +2387,12 @@ class GenericIE(InfoExtractor):
return self.playlist_result( return self.playlist_result(
self._parse_xspf( self._parse_xspf(
doc, video_id, xspf_url=url, doc, video_id, xspf_url=url,
xspf_base_url=compat_str(full_response.geturl())), xspf_base_url=full_response.geturl()),
video_id) video_id)
elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag): elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
info_dict['formats'] = self._parse_mpd_formats( info_dict['formats'] = self._parse_mpd_formats(
doc, doc,
mpd_base_url=compat_str(full_response.geturl()).rpartition('/')[0], mpd_base_url=full_response.geturl().rpartition('/')[0],
mpd_url=url) mpd_url=url)
self._sort_formats(info_dict['formats']) self._sort_formats(info_dict['formats'])
return info_dict return info_dict
@ -2537,14 +2537,15 @@ class GenericIE(InfoExtractor):
dailymail_urls, video_id, video_title, ie=DailyMailIE.ie_key()) dailymail_urls, video_id, video_title, ie=DailyMailIE.ie_key())
# Look for embedded Wistia player # Look for embedded Wistia player
wistia_url = WistiaIE._extract_url(webpage) wistia_urls = WistiaIE._extract_urls(webpage)
if wistia_url: if wistia_urls:
return { playlist = self.playlist_from_matches(wistia_urls, video_id, video_title, ie=WistiaIE.ie_key())
for entry in playlist['entries']:
entry.update({
'_type': 'url_transparent', '_type': 'url_transparent',
'url': self._proto_relative_url(wistia_url),
'ie_key': WistiaIE.ie_key(),
'uploader': video_uploader, 'uploader': video_uploader,
} })
return playlist
# Look for SVT player # Look for SVT player
svt_url = SVTIE._extract_url(webpage) svt_url = SVTIE._extract_url(webpage)

View File

@ -1,5 +1,7 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import base64
import json
import re import re
from .common import InfoExtractor from .common import InfoExtractor
@ -8,6 +10,7 @@ from ..utils import (
mimetype2ext, mimetype2ext,
parse_duration, parse_duration,
qualities, qualities,
try_get,
url_or_none, url_or_none,
) )
@ -15,15 +18,16 @@ from ..utils import (
class ImdbIE(InfoExtractor): class ImdbIE(InfoExtractor):
IE_NAME = 'imdb' IE_NAME = 'imdb'
IE_DESC = 'Internet Movie Database trailers' IE_DESC = 'Internet Movie Database trailers'
_VALID_URL = r'https?://(?:www|m)\.imdb\.com/(?:video|title|list).+?[/-]vi(?P<id>\d+)' _VALID_URL = r'https?://(?:www|m)\.imdb\.com/(?:video|title|list).*?[/-]vi(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.imdb.com/video/imdb/vi2524815897', 'url': 'http://www.imdb.com/video/imdb/vi2524815897',
'info_dict': { 'info_dict': {
'id': '2524815897', 'id': '2524815897',
'ext': 'mp4', 'ext': 'mp4',
'title': 'No. 2 from Ice Age: Continental Drift (2012)', 'title': 'No. 2',
'description': 'md5:87bd0bdc61e351f21f20d2d7441cb4e7', 'description': 'md5:87bd0bdc61e351f21f20d2d7441cb4e7',
'duration': 152,
} }
}, { }, {
'url': 'http://www.imdb.com/video/_/vi2524815897', 'url': 'http://www.imdb.com/video/_/vi2524815897',
@ -47,21 +51,23 @@ class ImdbIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(
'https://www.imdb.com/videoplayer/vi' + video_id, video_id) data = self._download_json(
video_metadata = self._parse_json(self._search_regex( 'https://www.imdb.com/ve/data/VIDEO_PLAYBACK_DATA', video_id,
r'window\.IMDbReactInitialState\.push\(({.+?})\);', webpage, query={
'video metadata'), video_id)['videos']['videoMetadata']['vi' + video_id] 'key': base64.b64encode(json.dumps({
title = self._html_search_meta( 'type': 'VIDEO_PLAYER',
['og:title', 'twitter:title'], webpage) or self._html_search_regex( 'subType': 'FORCE_LEGACY',
r'<title>(.+?)</title>', webpage, 'title', fatal=False) or video_metadata['title'] 'id': 'vi%s' % video_id,
}).encode()).decode(),
})[0]
quality = qualities(('SD', '480p', '720p', '1080p')) quality = qualities(('SD', '480p', '720p', '1080p'))
formats = [] formats = []
for encoding in video_metadata.get('encodings', []): for encoding in data['videoLegacyEncodings']:
if not encoding or not isinstance(encoding, dict): if not encoding or not isinstance(encoding, dict):
continue continue
video_url = url_or_none(encoding.get('videoUrl')) video_url = url_or_none(encoding.get('url'))
if not video_url: if not video_url:
continue continue
ext = mimetype2ext(encoding.get( ext = mimetype2ext(encoding.get(
@ -69,7 +75,7 @@ class ImdbIE(InfoExtractor):
if ext == 'm3u8': if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
video_url, video_id, 'mp4', entry_protocol='m3u8_native', video_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False)) preference=1, m3u8_id='hls', fatal=False))
continue continue
format_id = encoding.get('definition') format_id = encoding.get('definition')
formats.append({ formats.append({
@ -80,13 +86,33 @@ class ImdbIE(InfoExtractor):
}) })
self._sort_formats(formats) self._sort_formats(formats)
webpage = self._download_webpage(
'https://www.imdb.com/video/vi' + video_id, video_id)
video_metadata = self._parse_json(self._search_regex(
r'args\.push\(\s*({.+?})\s*\)\s*;', webpage,
'video metadata'), video_id)
video_info = video_metadata.get('VIDEO_INFO')
if video_info and isinstance(video_info, dict):
info = try_get(
video_info, lambda x: x[list(video_info.keys())[0]][0], dict)
else:
info = {}
title = self._html_search_meta(
['og:title', 'twitter:title'], webpage) or self._html_search_regex(
r'<title>(.+?)</title>', webpage, 'title',
default=None) or info['videoTitle']
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'alt_title': info.get('videoSubTitle'),
'formats': formats, 'formats': formats,
'description': video_metadata.get('description'), 'description': info.get('videoDescription'),
'thumbnail': video_metadata.get('slate', {}).get('url'), 'thumbnail': url_or_none(try_get(
'duration': parse_duration(video_metadata.get('duration')), video_metadata, lambda x: x['videoSlate']['source'])),
'duration': parse_duration(info.get('videoRuntime')),
} }

View File

@ -4,7 +4,6 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str
from ..utils import ( from ..utils import (
clean_html, clean_html,
determine_ext, determine_ext,
@ -36,7 +35,7 @@ class LecturioBaseIE(InfoExtractor):
self._LOGIN_URL, None, 'Downloading login popup') self._LOGIN_URL, None, 'Downloading login popup')
def is_logged(url_handle): def is_logged(url_handle):
return self._LOGIN_URL not in compat_str(url_handle.geturl()) return self._LOGIN_URL not in url_handle.geturl()
# Already logged in # Already logged in
if is_logged(urlh): if is_logged(urlh):

View File

@ -8,7 +8,6 @@ from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_b64decode, compat_b64decode,
compat_HTTPError, compat_HTTPError,
compat_str,
) )
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
@ -99,7 +98,7 @@ class LinuxAcademyIE(InfoExtractor):
'sso': 'true', 'sso': 'true',
}) })
login_state_url = compat_str(urlh.geturl()) login_state_url = urlh.geturl()
try: try:
login_page = self._download_webpage( login_page = self._download_webpage(
@ -129,7 +128,7 @@ class LinuxAcademyIE(InfoExtractor):
}) })
access_token = self._search_regex( access_token = self._search_regex(
r'access_token=([^=&]+)', compat_str(urlh.geturl()), r'access_token=([^=&]+)', urlh.geturl(),
'access token') 'access token')
self._download_webpage( self._download_webpage(

View File

@ -6,7 +6,6 @@ import re
from .theplatform import ThePlatformBaseIE from .theplatform import ThePlatformBaseIE
from ..compat import ( from ..compat import (
compat_parse_qs, compat_parse_qs,
compat_str,
compat_urllib_parse_urlparse, compat_urllib_parse_urlparse,
) )
from ..utils import ( from ..utils import (
@ -114,7 +113,7 @@ class MediasetIE(ThePlatformBaseIE):
continue continue
urlh = ie._request_webpage( urlh = ie._request_webpage(
embed_url, video_id, note='Following embed URL redirect') embed_url, video_id, note='Following embed URL redirect')
embed_url = compat_str(urlh.geturl()) embed_url = urlh.geturl()
program_guid = _program_guid(_qs(embed_url)) program_guid = _program_guid(_qs(embed_url))
if program_guid: if program_guid:
entries.append(embed_url) entries.append(embed_url)

View File

@ -129,7 +129,7 @@ class MediasiteIE(InfoExtractor):
query = mobj.group('query') query = mobj.group('query')
webpage, urlh = self._download_webpage_handle(url, resource_id) # XXX: add UrlReferrer? webpage, urlh = self._download_webpage_handle(url, resource_id) # XXX: add UrlReferrer?
redirect_url = compat_str(urlh.geturl()) redirect_url = urlh.geturl()
# XXX: might have also extracted UrlReferrer and QueryString from the html # XXX: might have also extracted UrlReferrer and QueryString from the html
service_path = compat_urlparse.urljoin(redirect_url, self._html_search_regex( service_path = compat_urlparse.urljoin(redirect_url, self._html_search_regex(

View File

@ -31,7 +31,7 @@ class NhkVodIE(InfoExtractor):
'url': 'https://www3.nhk.or.jp/nhkworld/fr/ondemand/audio/plugin-20190404-1/', 'url': 'https://www3.nhk.or.jp/nhkworld/fr/ondemand/audio/plugin-20190404-1/',
'only_matching': True, 'only_matching': True,
}] }]
_API_URL_TEMPLATE = 'https://api.nhk.or.jp/nhkworld/%sod%slist/v7/episode/%s/%s/all%s.json' _API_URL_TEMPLATE = 'https://api.nhk.or.jp/nhkworld/%sod%slist/v7a/episode/%s/%s/all%s.json'
def _real_extract(self, url): def _real_extract(self, url):
lang, m_type, episode_id = re.match(self._VALID_URL, url).groups() lang, m_type, episode_id = re.match(self._VALID_URL, url).groups()

View File

@ -8,6 +8,7 @@ from ..compat import compat_str
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
parse_resolution, parse_resolution,
str_or_none,
try_get, try_get,
unified_timestamp, unified_timestamp,
url_or_none, url_or_none,
@ -415,6 +416,7 @@ class PeerTubeIE(InfoExtractor):
peertube\.cpy\.re peertube\.cpy\.re
)''' )'''
_UUID_RE = r'[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}' _UUID_RE = r'[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}'
_API_BASE = 'https://%s/api/v1/videos/%s/%s'
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
(?: (?:
peertube:(?P<host>[^:]+):| peertube:(?P<host>[^:]+):|
@ -423,26 +425,30 @@ class PeerTubeIE(InfoExtractor):
(?P<id>%s) (?P<id>%s)
''' % (_INSTANCES_RE, _UUID_RE) ''' % (_INSTANCES_RE, _UUID_RE)
_TESTS = [{ _TESTS = [{
'url': 'https://peertube.cpy.re/videos/watch/2790feb0-8120-4e63-9af3-c943c69f5e6c', 'url': 'https://framatube.org/videos/watch/9c9de5e8-0a1e-484a-b099-e80766180a6d',
'md5': '80f24ff364cc9d333529506a263e7feb', 'md5': '9bed8c0137913e17b86334e5885aacff',
'info_dict': { 'info_dict': {
'id': '2790feb0-8120-4e63-9af3-c943c69f5e6c', 'id': '9c9de5e8-0a1e-484a-b099-e80766180a6d',
'ext': 'mp4', 'ext': 'mp4',
'title': 'wow', 'title': 'What is PeerTube?',
'description': 'wow such video, so gif', 'description': 'md5:3fefb8dde2b189186ce0719fda6f7b10',
'thumbnail': r're:https?://.*\.(?:jpg|png)', 'thumbnail': r're:https?://.*\.(?:jpg|png)',
'timestamp': 1519297480, 'timestamp': 1538391166,
'upload_date': '20180222', 'upload_date': '20181001',
'uploader': 'Luclu7', 'uploader': 'Framasoft',
'uploader_id': '7fc42640-efdb-4505-a45d-a15b1a5496f1', 'uploader_id': '3',
'uploder_url': 'https://peertube.nsa.ovh/accounts/luclu7', 'uploader_url': 'https://framatube.org/accounts/framasoft',
'license': 'Unknown', 'channel': 'Les vidéos de Framasoft',
'duration': 3, 'channel_id': '2',
'channel_url': 'https://framatube.org/video-channels/bf54d359-cfad-4935-9d45-9d6be93f63e8',
'language': 'en',
'license': 'Attribution - Share Alike',
'duration': 113,
'view_count': int, 'view_count': int,
'like_count': int, 'like_count': int,
'dislike_count': int, 'dislike_count': int,
'tags': list, 'tags': ['framasoft', 'peertube'],
'categories': list, 'categories': ['Science & Technology'],
} }
}, { }, {
'url': 'https://peertube.tamanoir.foucry.net/videos/watch/0b04f13d-1e18-4f1d-814e-4979aa7c9c44', 'url': 'https://peertube.tamanoir.foucry.net/videos/watch/0b04f13d-1e18-4f1d-814e-4979aa7c9c44',
@ -484,13 +490,38 @@ class PeerTubeIE(InfoExtractor):
entries = [peertube_url] entries = [peertube_url]
return entries return entries
def _call_api(self, host, video_id, path, note=None, errnote=None, fatal=True):
return self._download_json(
self._API_BASE % (host, video_id, path), video_id,
note=note, errnote=errnote, fatal=fatal)
def _get_subtitles(self, host, video_id):
captions = self._call_api(
host, video_id, 'captions', note='Downloading captions JSON',
fatal=False)
if not isinstance(captions, dict):
return
data = captions.get('data')
if not isinstance(data, list):
return
subtitles = {}
for e in data:
language_id = try_get(e, lambda x: x['language']['id'], compat_str)
caption_url = urljoin('https://%s' % host, e.get('captionPath'))
if not caption_url:
continue
subtitles.setdefault(language_id or 'en', []).append({
'url': caption_url,
})
return subtitles
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
host = mobj.group('host') or mobj.group('host_2') host = mobj.group('host') or mobj.group('host_2')
video_id = mobj.group('id') video_id = mobj.group('id')
video = self._download_json( video = self._call_api(
'https://%s/api/v1/videos/%s' % (host, video_id), video_id) host, video_id, '', note='Downloading video JSON')
title = video['name'] title = video['name']
@ -513,10 +544,28 @@ class PeerTubeIE(InfoExtractor):
formats.append(f) formats.append(f)
self._sort_formats(formats) self._sort_formats(formats)
def account_data(field): full_description = self._call_api(
return try_get(video, lambda x: x['account'][field], compat_str) host, video_id, 'description', note='Downloading description JSON',
fatal=False)
category = try_get(video, lambda x: x['category']['label'], compat_str) description = None
if isinstance(full_description, dict):
description = str_or_none(full_description.get('description'))
if not description:
description = video.get('description')
subtitles = self.extract_subtitles(host, video_id)
def data(section, field, type_):
return try_get(video, lambda x: x[section][field], type_)
def account_data(field, type_):
return data('account', field, type_)
def channel_data(field, type_):
return data('channel', field, type_)
category = data('category', 'label', compat_str)
categories = [category] if category else None categories = [category] if category else None
nsfw = video.get('nsfw') nsfw = video.get('nsfw')
@ -528,14 +577,17 @@ class PeerTubeIE(InfoExtractor):
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'description': video.get('description'), 'description': description,
'thumbnail': urljoin(url, video.get('thumbnailPath')), 'thumbnail': urljoin(url, video.get('thumbnailPath')),
'timestamp': unified_timestamp(video.get('publishedAt')), 'timestamp': unified_timestamp(video.get('publishedAt')),
'uploader': account_data('displayName'), 'uploader': account_data('displayName', compat_str),
'uploader_id': account_data('uuid'), 'uploader_id': str_or_none(account_data('id', int)),
'uploder_url': account_data('url'), 'uploader_url': url_or_none(account_data('url', compat_str)),
'license': try_get( 'channel': channel_data('displayName', compat_str),
video, lambda x: x['licence']['label'], compat_str), 'channel_id': str_or_none(channel_data('id', int)),
'channel_url': url_or_none(channel_data('url', compat_str)),
'language': data('language', 'id', compat_str),
'license': data('licence', 'label', compat_str),
'duration': int_or_none(video.get('duration')), 'duration': int_or_none(video.get('duration')),
'view_count': int_or_none(video.get('views')), 'view_count': int_or_none(video.get('views')),
'like_count': int_or_none(video.get('likes')), 'like_count': int_or_none(video.get('likes')),
@ -544,4 +596,5 @@ class PeerTubeIE(InfoExtractor):
'tags': try_get(video, lambda x: x['tags'], list), 'tags': try_get(video, lambda x: x['tags'], list),
'categories': categories, 'categories': categories,
'formats': formats, 'formats': formats,
'subtitles': subtitles
} }

View File

@ -46,7 +46,7 @@ class PlatziBaseIE(InfoExtractor):
headers={'Referer': self._LOGIN_URL}) headers={'Referer': self._LOGIN_URL})
# login succeeded # login succeeded
if 'platzi.com/login' not in compat_str(urlh.geturl()): if 'platzi.com/login' not in urlh.geturl():
return return
login_error = self._webpage_read_content( login_error = self._webpage_read_content(

View File

@ -8,6 +8,7 @@ from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
js_to_json, js_to_json,
merge_dicts,
urljoin, urljoin,
) )
@ -27,23 +28,22 @@ class PornHdIE(InfoExtractor):
'view_count': int, 'view_count': int,
'like_count': int, 'like_count': int,
'age_limit': 18, 'age_limit': 18,
} },
'skip': 'HTTP Error 404: Not Found',
}, { }, {
# removed video
'url': 'http://www.pornhd.com/videos/1962/sierra-day-gets-his-cum-all-over-herself-hd-porn-video', 'url': 'http://www.pornhd.com/videos/1962/sierra-day-gets-his-cum-all-over-herself-hd-porn-video',
'md5': '956b8ca569f7f4d8ec563e2c41598441', 'md5': '1b7b3a40b9d65a8e5b25f7ab9ee6d6de',
'info_dict': { 'info_dict': {
'id': '1962', 'id': '1962',
'display_id': 'sierra-day-gets-his-cum-all-over-herself-hd-porn-video', 'display_id': 'sierra-day-gets-his-cum-all-over-herself-hd-porn-video',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Sierra loves doing laundry', 'title': 'md5:98c6f8b2d9c229d0f0fde47f61a1a759',
'description': 'md5:8ff0523848ac2b8f9b065ba781ccf294', 'description': 'md5:8ff0523848ac2b8f9b065ba781ccf294',
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.jpg',
'view_count': int, 'view_count': int,
'like_count': int, 'like_count': int,
'age_limit': 18, 'age_limit': 18,
}, },
'skip': 'Not available anymore',
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -61,7 +61,13 @@ class PornHdIE(InfoExtractor):
r"(?s)sources'?\s*[:=]\s*(\{.+?\})", r"(?s)sources'?\s*[:=]\s*(\{.+?\})",
webpage, 'sources', default='{}')), video_id) webpage, 'sources', default='{}')), video_id)
info = {}
if not sources: if not sources:
entries = self._parse_html5_media_entries(url, webpage, video_id)
if entries:
info = entries[0]
if not sources and not info:
message = self._html_search_regex( message = self._html_search_regex(
r'(?s)<(div|p)[^>]+class="no-video"[^>]*>(?P<value>.+?)</\1', r'(?s)<(div|p)[^>]+class="no-video"[^>]*>(?P<value>.+?)</\1',
webpage, 'error message', group='value') webpage, 'error message', group='value')
@ -80,23 +86,29 @@ class PornHdIE(InfoExtractor):
'format_id': format_id, 'format_id': format_id,
'height': height, 'height': height,
}) })
self._sort_formats(formats) if formats:
info['formats'] = formats
self._sort_formats(info['formats'])
description = self._html_search_regex( description = self._html_search_regex(
r'<(div|p)[^>]+class="description"[^>]*>(?P<value>[^<]+)</\1', (r'(?s)<section[^>]+class=["\']video-description[^>]+>(?P<value>.+?)</section>',
webpage, 'description', fatal=False, group='value') r'<(div|p)[^>]+class="description"[^>]*>(?P<value>[^<]+)</\1'),
webpage, 'description', fatal=False,
group='value') or self._html_search_meta(
'description', webpage, default=None) or self._og_search_description(webpage)
view_count = int_or_none(self._html_search_regex( view_count = int_or_none(self._html_search_regex(
r'(\d+) views\s*<', webpage, 'view count', fatal=False)) r'(\d+) views\s*<', webpage, 'view count', fatal=False))
thumbnail = self._search_regex( thumbnail = self._search_regex(
r"poster'?\s*:\s*([\"'])(?P<url>(?:(?!\1).)+)\1", webpage, r"poster'?\s*:\s*([\"'])(?P<url>(?:(?!\1).)+)\1", webpage,
'thumbnail', fatal=False, group='url') 'thumbnail', default=None, group='url')
like_count = int_or_none(self._search_regex( like_count = int_or_none(self._search_regex(
(r'(\d+)\s*</11[^>]+>(?:&nbsp;|\s)*\blikes', (r'(\d+)</span>\s*likes',
r'(\d+)\s*</11[^>]+>(?:&nbsp;|\s)*\blikes',
r'class=["\']save-count["\'][^>]*>\s*(\d+)'), r'class=["\']save-count["\'][^>]*>\s*(\d+)'),
webpage, 'like count', fatal=False)) webpage, 'like count', fatal=False))
return { return merge_dicts(info, {
'id': video_id, 'id': video_id,
'display_id': display_id, 'display_id': display_id,
'title': title, 'title': title,
@ -106,4 +118,4 @@ class PornHdIE(InfoExtractor):
'like_count': like_count, 'like_count': like_count,
'formats': formats, 'formats': formats,
'age_limit': 18, 'age_limit': 18,
} })

View File

@ -189,10 +189,10 @@ class PornHubIE(PornHubBaseIE):
# http://www.pornhub.com/view_video.php?viewkey=1331683002), not relying # http://www.pornhub.com/view_video.php?viewkey=1331683002), not relying
# on that anymore. # on that anymore.
title = self._html_search_meta( title = self._html_search_meta(
'twitter:title', webpage, default=None) or self._search_regex( 'twitter:title', webpage, default=None) or self._html_search_regex(
(r'<h1[^>]+class=["\']title["\'][^>]*>(?P<title>[^<]+)', (r'(?s)<h1[^>]+class=["\']title["\'][^>]*>(?P<title>.+?)</h1>',
r'<div[^>]+data-video-title=(["\'])(?P<title>.+?)\1', r'<div[^>]+data-video-title=(["\'])(?P<title>(?:(?!\1).)+)\1',
r'shareTitle\s*=\s*(["\'])(?P<title>.+?)\1'), r'shareTitle["\']\s*[=:]\s*(["\'])(?P<title>(?:(?!\1).)+)\1'),
webpage, 'title', group='title') webpage, 'title', group='title')
video_urls = [] video_urls = []

View File

@ -8,7 +8,6 @@ from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_parse_qs, compat_parse_qs,
compat_str,
compat_urlparse, compat_urlparse,
) )
from ..utils import ( from ..utils import (
@ -39,13 +38,13 @@ class SafariBaseIE(InfoExtractor):
'Downloading login page') 'Downloading login page')
def is_logged(urlh): def is_logged(urlh):
return 'learning.oreilly.com/home/' in compat_str(urlh.geturl()) return 'learning.oreilly.com/home/' in urlh.geturl()
if is_logged(urlh): if is_logged(urlh):
self.LOGGED_IN = True self.LOGGED_IN = True
return return
redirect_url = compat_str(urlh.geturl()) redirect_url = urlh.geturl()
parsed_url = compat_urlparse.urlparse(redirect_url) parsed_url = compat_urlparse.urlparse(redirect_url)
qs = compat_parse_qs(parsed_url.query) qs = compat_parse_qs(parsed_url.query)
next_uri = compat_urlparse.urljoin( next_uri = compat_urlparse.urljoin(

View File

@ -7,9 +7,18 @@ from .common import InfoExtractor
class ServusIE(InfoExtractor): class ServusIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?servus\.com/(?:(?:at|de)/p/[^/]+|tv/videos)/(?P<id>[aA]{2}-\w+|\d+-\d+)' _VALID_URL = r'''(?x)
https?://
(?:www\.)?
(?:
servus\.com/(?:(?:at|de)/p/[^/]+|tv/videos)|
servustv\.com/videos
)
/(?P<id>[aA]{2}-\w+|\d+-\d+)
'''
_TESTS = [{ _TESTS = [{
'url': 'https://www.servus.com/de/p/Die-Gr%C3%BCnen-aus-Sicht-des-Volkes/AA-1T6VBU5PW1W12/', # new URL schema
'url': 'https://www.servustv.com/videos/aa-1t6vbu5pw1w12/',
'md5': '3e1dd16775aa8d5cbef23628cfffc1f4', 'md5': '3e1dd16775aa8d5cbef23628cfffc1f4',
'info_dict': { 'info_dict': {
'id': 'AA-1T6VBU5PW1W12', 'id': 'AA-1T6VBU5PW1W12',
@ -18,6 +27,10 @@ class ServusIE(InfoExtractor):
'description': 'md5:1247204d85783afe3682644398ff2ec4', 'description': 'md5:1247204d85783afe3682644398ff2ec4',
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.jpg',
} }
}, {
# old URL schema
'url': 'https://www.servus.com/de/p/Die-Gr%C3%BCnen-aus-Sicht-des-Volkes/AA-1T6VBU5PW1W12/',
'only_matching': True,
}, { }, {
'url': 'https://www.servus.com/at/p/Wie-das-Leben-beginnt/1309984137314-381415152/', 'url': 'https://www.servus.com/at/p/Wie-das-Leben-beginnt/1309984137314-381415152/',
'only_matching': True, 'only_matching': True,

View File

@ -4,7 +4,6 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from .wistia import WistiaIE from .wistia import WistiaIE
from ..compat import compat_str
from ..utils import ( from ..utils import (
clean_html, clean_html,
ExtractorError, ExtractorError,
@ -58,7 +57,7 @@ class TeachableBaseIE(InfoExtractor):
self._logged_in = True self._logged_in = True
return return
login_url = compat_str(urlh.geturl()) login_url = urlh.geturl()
login_form = self._hidden_inputs(login_page) login_form = self._hidden_inputs(login_page)
@ -160,8 +159,8 @@ class TeachableIE(TeachableBaseIE):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
wistia_url = WistiaIE._extract_url(webpage) wistia_urls = WistiaIE._extract_urls(webpage)
if not wistia_url: if not wistia_urls:
if any(re.search(p, webpage) for p in ( if any(re.search(p, webpage) for p in (
r'class=["\']lecture-contents-locked', r'class=["\']lecture-contents-locked',
r'>\s*Lecture contents locked', r'>\s*Lecture contents locked',
@ -174,12 +173,14 @@ class TeachableIE(TeachableBaseIE):
title = self._og_search_title(webpage, default=None) title = self._og_search_title(webpage, default=None)
return { entries = [{
'_type': 'url_transparent', '_type': 'url_transparent',
'url': wistia_url, 'url': wistia_url,
'ie_key': WistiaIE.ie_key(), 'ie_key': WistiaIE.ie_key(),
'title': title, 'title': title,
} } for wistia_url in wistia_urls]
return self.playlist_result(entries, video_id, title)
class TeachableCourseIE(TeachableBaseIE): class TeachableCourseIE(TeachableBaseIE):

View File

@ -11,6 +11,7 @@ from ..utils import (
determine_ext, determine_ext,
int_or_none, int_or_none,
str_or_none, str_or_none,
try_get,
urljoin, urljoin,
) )
@ -24,7 +25,7 @@ class TelecincoIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': '1876350223', 'id': '1876350223',
'title': 'Bacalao con kokotxas al pil-pil', 'title': 'Bacalao con kokotxas al pil-pil',
'description': 'md5:1382dacd32dd4592d478cbdca458e5bb', 'description': 'md5:716caf5601e25c3c5ab6605b1ae71529',
}, },
'playlist': [{ 'playlist': [{
'md5': 'adb28c37238b675dad0f042292f209a7', 'md5': 'adb28c37238b675dad0f042292f209a7',
@ -55,6 +56,26 @@ class TelecincoIE(InfoExtractor):
'description': 'md5:2771356ff7bfad9179c5f5cd954f1477', 'description': 'md5:2771356ff7bfad9179c5f5cd954f1477',
'duration': 50, 'duration': 50,
}, },
}, {
# video in opening's content
'url': 'https://www.telecinco.es/vivalavida/fiorella-sobrina-edmundo-arrocet-entrevista_18_2907195140.html',
'info_dict': {
'id': '2907195140',
'title': 'La surrealista entrevista a la sobrina de Edmundo Arrocet: "No puedes venir aquí y tomarnos por tontos"',
'description': 'md5:73f340a7320143d37ab895375b2bf13a',
},
'playlist': [{
'md5': 'adb28c37238b675dad0f042292f209a7',
'info_dict': {
'id': 'TpI2EttSDAReWpJ1o0NVh2',
'ext': 'mp4',
'title': 'La surrealista entrevista a la sobrina de Edmundo Arrocet: "No puedes venir aquí y tomarnos por tontos"',
'duration': 1015,
},
}],
'params': {
'skip_download': True,
},
}, { }, {
'url': 'http://www.telecinco.es/informativos/nacional/Pablo_Iglesias-Informativos_Telecinco-entrevista-Pedro_Piqueras_2_1945155182.html', 'url': 'http://www.telecinco.es/informativos/nacional/Pablo_Iglesias-Informativos_Telecinco-entrevista-Pedro_Piqueras_2_1945155182.html',
'only_matching': True, 'only_matching': True,
@ -135,16 +156,27 @@ class TelecincoIE(InfoExtractor):
display_id = self._match_id(url) display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
article = self._parse_json(self._search_regex( article = self._parse_json(self._search_regex(
r'window\.\$REACTBASE_STATE\.article\s*=\s*({.+})', r'window\.\$REACTBASE_STATE\.article(?:_multisite)?\s*=\s*({.+})',
webpage, 'article'), display_id)['article'] webpage, 'article'), display_id)['article']
title = article.get('title') title = article.get('title')
description = clean_html(article.get('leadParagraph')) description = clean_html(article.get('leadParagraph')) or ''
if article.get('editorialType') != 'VID': if article.get('editorialType') != 'VID':
entries = [] entries = []
for p in article.get('body', []): body = [article.get('opening')]
content = p.get('content') body.extend(try_get(article, lambda x: x['body'], list) or [])
if p.get('type') != 'video' or not content: for p in body:
if not isinstance(p, dict):
continue continue
content = p.get('content')
if not content:
continue
type_ = p.get('type')
if type_ == 'paragraph':
content_str = str_or_none(content)
if content_str:
description += content_str
continue
if type_ == 'video' and isinstance(content, dict):
entries.append(self._parse_content(content, url)) entries.append(self._parse_content(content, url))
return self.playlist_result( return self.playlist_result(
entries, str_or_none(article.get('id')), title, description) entries, str_or_none(article.get('id')), title, description)

View File

@ -4,7 +4,6 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
@ -151,7 +150,7 @@ class TumblrIE(InfoExtractor):
url = 'http://%s.tumblr.com/post/%s/' % (blog, video_id) url = 'http://%s.tumblr.com/post/%s/' % (blog, video_id)
webpage, urlh = self._download_webpage_handle(url, video_id) webpage, urlh = self._download_webpage_handle(url, video_id)
redirect_url = compat_str(urlh.geturl()) redirect_url = urlh.geturl()
if 'tumblr.com/safe-mode' in redirect_url or redirect_url.startswith('/safe-mode'): if 'tumblr.com/safe-mode' in redirect_url or redirect_url.startswith('/safe-mode'):
raise ExtractorError( raise ExtractorError(
'This Tumblr may contain sensitive media. ' 'This Tumblr may contain sensitive media. '

View File

@ -106,7 +106,7 @@ class TV2DKBornholmPlayIE(InfoExtractor):
video_id = self._match_id(url) video_id = self._match_id(url)
video = self._download_json( video = self._download_json(
'http://play.tv2bornholm.dk/controls/AJAX.aspx/specifikVideo', video_id, 'https://play.tv2bornholm.dk/controls/AJAX.aspx/specifikVideo', video_id,
data=json.dumps({ data=json.dumps({
'playlist_id': video_id, 'playlist_id': video_id,
'serienavn': '', 'serienavn': '',

View File

@ -33,6 +33,7 @@ from ..utils import (
unified_timestamp, unified_timestamp,
unsmuggle_url, unsmuggle_url,
urlencode_postdata, urlencode_postdata,
urljoin,
unescapeHTML, unescapeHTML,
) )
@ -191,7 +192,7 @@ class VimeoBaseInfoExtractor(InfoExtractor):
for tt in text_tracks: for tt in text_tracks:
subtitles[tt['lang']] = [{ subtitles[tt['lang']] = [{
'ext': 'vtt', 'ext': 'vtt',
'url': 'https://vimeo.com' + tt['url'], 'url': urljoin('https://vimeo.com', tt['url']),
}] }]
thumbnails = [] thumbnails = []
@ -584,14 +585,14 @@ class VimeoIE(VimeoBaseInfoExtractor):
url = 'https://vimeo.com/' + video_id url = 'https://vimeo.com/' + video_id
elif is_player: elif is_player:
url = 'https://player.vimeo.com/video/' + video_id url = 'https://player.vimeo.com/video/' + video_id
elif any(p in url for p in ('play_redirect_hls', 'moogaloop.swf')): elif any(p in url for p in ('play_redirect_hls', 'moogaloop.swf', '/album/', '/showcase/')):
url = 'https://vimeo.com/' + video_id url = 'https://vimeo.com/' + video_id
try: try:
# Retrieve video webpage to extract further information # Retrieve video webpage to extract further information
webpage, urlh = self._download_webpage_handle( webpage, urlh = self._download_webpage_handle(
url, video_id, headers=headers) url, video_id, headers=headers)
redirect_url = compat_str(urlh.geturl()) redirect_url = urlh.geturl()
except ExtractorError as ee: except ExtractorError as ee:
if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403: if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
errmsg = ee.cause.read() errmsg = ee.cause.read()

View File

@ -45,22 +45,23 @@ class WistiaIE(InfoExtractor):
# https://wistia.com/support/embed-and-share/video-on-your-website # https://wistia.com/support/embed-and-share/video-on-your-website
@staticmethod @staticmethod
def _extract_url(webpage): def _extract_url(webpage):
match = re.search( urls = WistiaIE._extract_urls(webpage)
r'<(?:meta[^>]+?content|(?:iframe|script)[^>]+?src)=["\'](?P<url>(?:https?:)?//(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/[a-z0-9]{10})', webpage) return urls[0] if urls else None
if match:
return unescapeHTML(match.group('url'))
match = re.search( @staticmethod
def _extract_urls(webpage):
urls = []
for match in re.finditer(
r'<(?:meta[^>]+?content|(?:iframe|script)[^>]+?src)=["\'](?P<url>(?:https?:)?//(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/[a-z0-9]{10})', webpage):
urls.append(unescapeHTML(match.group('url')))
for match in re.finditer(
r'''(?sx) r'''(?sx)
<script[^>]+src=(["'])(?:https?:)?//fast\.wistia\.com/assets/external/E-v1\.js\1[^>]*>.*?
<div[^>]+class=(["']).*?\bwistia_async_(?P<id>[a-z0-9]{10})\b.*?\2 <div[^>]+class=(["']).*?\bwistia_async_(?P<id>[a-z0-9]{10})\b.*?\2
''', webpage) ''', webpage):
if match: urls.append('wistia:%s' % match.group('id'))
return 'wistia:%s' % match.group('id') for match in re.finditer(r'(?:data-wistia-?id=["\']|Wistia\.embed\(["\']|id=["\']wistia_)(?P<id>[a-z0-9]{10})', webpage):
urls.append('wistia:%s' % match.group('id'))
match = re.search(r'(?:data-wistia-?id=["\']|Wistia\.embed\(["\']|id=["\']wistia_)(?P<id>[a-z0-9]{10})', webpage) return urls
if match:
return 'wistia:%s' % match.group('id')
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)

View File

@ -113,7 +113,7 @@ class XHamsterIE(InfoExtractor):
display_id = mobj.group('display_id') or mobj.group('display_id_2') display_id = mobj.group('display_id') or mobj.group('display_id_2')
desktop_url = re.sub(r'^(https?://(?:.+?\.)?)m\.', r'\1', url) desktop_url = re.sub(r'^(https?://(?:.+?\.)?)m\.', r'\1', url)
webpage = self._download_webpage(desktop_url, video_id) webpage, urlh = self._download_webpage_handle(desktop_url, video_id)
error = self._html_search_regex( error = self._html_search_regex(
r'<div[^>]+id=["\']videoClosed["\'][^>]*>(.+?)</div>', r'<div[^>]+id=["\']videoClosed["\'][^>]*>(.+?)</div>',
@ -161,6 +161,9 @@ class XHamsterIE(InfoExtractor):
'ext': determine_ext(format_url, 'mp4'), 'ext': determine_ext(format_url, 'mp4'),
'height': get_height(quality), 'height': get_height(quality),
'filesize': filesize, 'filesize': filesize,
'http_headers': {
'Referer': urlh.geturl(),
},
}) })
self._sort_formats(formats) self._sort_formats(formats)

View File

@ -47,7 +47,7 @@ class XTubeIE(InfoExtractor):
'display_id': 'A-Super-Run-Part-1-YT', 'display_id': 'A-Super-Run-Part-1-YT',
'ext': 'flv', 'ext': 'flv',
'title': 'A Super Run - Part 1 (YT)', 'title': 'A Super Run - Part 1 (YT)',
'description': 'md5:ca0d47afff4a9b2942e4b41aa970fd93', 'description': 'md5:4cc3af1aa1b0413289babc88f0d4f616',
'uploader': 'tshirtguy59', 'uploader': 'tshirtguy59',
'duration': 579, 'duration': 579,
'view_count': int, 'view_count': int,
@ -87,6 +87,20 @@ class XTubeIE(InfoExtractor):
'Cookie': 'age_verified=1; cookiesAccepted=1', 'Cookie': 'age_verified=1; cookiesAccepted=1',
}) })
title, thumbnail, duration = [None] * 3
config = self._parse_json(self._search_regex(
r'playerConf\s*=\s*({.+?})\s*,\s*\n', webpage, 'config',
default='{}'), video_id, transform_source=js_to_json, fatal=False)
if config:
config = config.get('mainRoll')
if isinstance(config, dict):
title = config.get('title')
thumbnail = config.get('poster')
duration = int_or_none(config.get('duration'))
sources = config.get('sources')
if isinstance(sources, dict):
sources = self._parse_json(self._search_regex( sources = self._parse_json(self._search_regex(
r'(["\'])?sources\1?\s*:\s*(?P<sources>{.+?}),', r'(["\'])?sources\1?\s*:\s*(?P<sources>{.+?}),',
webpage, 'sources', group='sources'), video_id, webpage, 'sources', group='sources'), video_id,
@ -102,20 +116,25 @@ class XTubeIE(InfoExtractor):
self._remove_duplicate_formats(formats) self._remove_duplicate_formats(formats)
self._sort_formats(formats) self._sort_formats(formats)
if not title:
title = self._search_regex( title = self._search_regex(
(r'<h1>\s*(?P<title>[^<]+?)\s*</h1>', r'videoTitle\s*:\s*(["\'])(?P<title>.+?)\1'), (r'<h1>\s*(?P<title>[^<]+?)\s*</h1>', r'videoTitle\s*:\s*(["\'])(?P<title>.+?)\1'),
webpage, 'title', group='title') webpage, 'title', group='title')
description = self._search_regex( description = self._og_search_description(
webpage, default=None) or self._html_search_meta(
'twitter:description', webpage, default=None) or self._search_regex(
r'</h1>\s*<p>([^<]+)', webpage, 'description', fatal=False) r'</h1>\s*<p>([^<]+)', webpage, 'description', fatal=False)
uploader = self._search_regex( uploader = self._search_regex(
(r'<input[^>]+name="contentOwnerId"[^>]+value="([^"]+)"', (r'<input[^>]+name="contentOwnerId"[^>]+value="([^"]+)"',
r'<span[^>]+class="nickname"[^>]*>([^<]+)'), r'<span[^>]+class="nickname"[^>]*>([^<]+)'),
webpage, 'uploader', fatal=False) webpage, 'uploader', fatal=False)
if not duration:
duration = parse_duration(self._search_regex( duration = parse_duration(self._search_regex(
r'<dt>Runtime:?</dt>\s*<dd>([^<]+)</dd>', r'<dt>Runtime:?</dt>\s*<dd>([^<]+)</dd>',
webpage, 'duration', fatal=False)) webpage, 'duration', fatal=False))
view_count = str_to_int(self._search_regex( view_count = str_to_int(self._search_regex(
r'<dt>Views:?</dt>\s*<dd>([\d,\.]+)</dd>', (r'["\']viewsCount["\'][^>]*>(\d+)\s+views',
r'<dt>Views:?</dt>\s*<dd>([\d,\.]+)</dd>'),
webpage, 'view count', fatal=False)) webpage, 'view count', fatal=False))
comment_count = str_to_int(self._html_search_regex( comment_count = str_to_int(self._html_search_regex(
r'>Comments? \(([\d,\.]+)\)<', r'>Comments? \(([\d,\.]+)\)<',
@ -126,6 +145,7 @@ class XTubeIE(InfoExtractor):
'display_id': display_id, 'display_id': display_id,
'title': title, 'title': title,
'description': description, 'description': description,
'thumbnail': thumbnail,
'uploader': uploader, 'uploader': uploader,
'duration': duration, 'duration': duration,
'view_count': view_count, 'view_count': view_count,
@ -144,7 +164,7 @@ class XTubeUserIE(InfoExtractor):
'id': 'greenshowers-4056496', 'id': 'greenshowers-4056496',
'age_limit': 18, 'age_limit': 18,
}, },
'playlist_mincount': 155, 'playlist_mincount': 154,
} }
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -44,7 +44,7 @@ class YouJizzIE(InfoExtractor):
encodings = self._parse_json( encodings = self._parse_json(
self._search_regex( self._search_regex(
r'encodings\s*=\s*(\[.+?\]);\n', webpage, 'encodings', r'[Ee]ncodings\s*=\s*(\[.+?\]);\n', webpage, 'encodings',
default='[]'), default='[]'),
video_id, fatal=False) video_id, fatal=False)
for encoding in encodings: for encoding in encodings:

View File

@ -570,7 +570,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'upload_date': '20120506', 'upload_date': '20120506',
'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]', 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
'alt_title': 'I Love It (feat. Charli XCX)', 'alt_title': 'I Love It (feat. Charli XCX)',
'description': 'md5:f3ceb5ef83a08d95b9d146f973157cc8', 'description': 'md5:19a2f98d9032b9311e686ed039564f63',
'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli', 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop', 'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
'iconic ep', 'iconic', 'love', 'it'], 'iconic ep', 'iconic', 'love', 'it'],
@ -685,12 +685,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'id': 'nfWlot6h_JM', 'id': 'nfWlot6h_JM',
'ext': 'm4a', 'ext': 'm4a',
'title': 'Taylor Swift - Shake It Off', 'title': 'Taylor Swift - Shake It Off',
'description': 'md5:bec2185232c05479482cb5a9b82719bf', 'description': 'md5:307195cd21ff7fa352270fe884570ef0',
'duration': 242, 'duration': 242,
'uploader': 'TaylorSwiftVEVO', 'uploader': 'TaylorSwiftVEVO',
'uploader_id': 'TaylorSwiftVEVO', 'uploader_id': 'TaylorSwiftVEVO',
'upload_date': '20140818', 'upload_date': '20140818',
'creator': 'Taylor Swift',
}, },
'params': { 'params': {
'youtube_include_dash_manifest': True, 'youtube_include_dash_manifest': True,
@ -755,11 +754,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'upload_date': '20100430', 'upload_date': '20100430',
'uploader_id': 'deadmau5', 'uploader_id': 'deadmau5',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
'creator': 'deadmau5', 'creator': 'Dada Life, deadmau5',
'description': 'md5:12c56784b8032162bb936a5f76d55360', 'description': 'md5:12c56784b8032162bb936a5f76d55360',
'uploader': 'deadmau5', 'uploader': 'deadmau5',
'title': 'Deadmau5 - Some Chords (HD)', 'title': 'Deadmau5 - Some Chords (HD)',
'alt_title': 'Some Chords', 'alt_title': 'This Machine Kills Some Chords',
}, },
'expected_warnings': [ 'expected_warnings': [
'DASH manifest missing', 'DASH manifest missing',
@ -1135,6 +1134,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'skip_download': True, 'skip_download': True,
'youtube_include_dash_manifest': False, 'youtube_include_dash_manifest': False,
}, },
'skip': 'not actual anymore',
}, },
{ {
# Youtube Music Auto-generated description # Youtube Music Auto-generated description
@ -1145,8 +1145,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'title': 'Voyeur Girl', 'title': 'Voyeur Girl',
'description': 'md5:7ae382a65843d6df2685993e90a8628f', 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
'upload_date': '20190312', 'upload_date': '20190312',
'uploader': 'Various Artists - Topic', 'uploader': 'Stephen - Topic',
'uploader_id': 'UCVWKBi1ELZn0QX2CBLSkiyw', 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
'artist': 'Stephen', 'artist': 'Stephen',
'track': 'Voyeur Girl', 'track': 'Voyeur Girl',
'album': 'it\'s too much love to know my dear', 'album': 'it\'s too much love to know my dear',
@ -1210,7 +1210,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'id': '-hcAI0g-f5M', 'id': '-hcAI0g-f5M',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Put It On Me', 'title': 'Put It On Me',
'description': 'md5:93c55acc682ae7b0c668f2e34e1c069e', 'description': 'md5:f6422397c07c4c907c6638e1fee380a5',
'upload_date': '20180426', 'upload_date': '20180426',
'uploader': 'Matt Maeson - Topic', 'uploader': 'Matt Maeson - Topic',
'uploader_id': 'UCnEkIGqtGcQMLk73Kp-Q5LQ', 'uploader_id': 'UCnEkIGqtGcQMLk73Kp-Q5LQ',
@ -1256,7 +1256,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _extract_signature_function(self, video_id, player_url, example_sig): def _extract_signature_function(self, video_id, player_url, example_sig):
id_m = re.match( id_m = re.match(
r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2,3}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$', r'.*?[-.](?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2,3}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$',
player_url) player_url)
if not id_m: if not id_m:
raise ExtractorError('Cannot identify player %r' % player_url) raise ExtractorError('Cannot identify player %r' % player_url)
@ -2035,7 +2035,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
else: else:
player_version = self._search_regex( player_version = self._search_regex(
[r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js', [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
r'(?:www|player(?:_ias)?)-([^/]+)(?:/[a-z]{2,3}_[A-Z]{2})?/base\.js'], r'(?:www|player(?:_ias)?)[-.]([^/]+)(?:/[a-z]{2,3}_[A-Z]{2})?/base\.js'],
player_url, player_url,
'html5 player', fatal=False) 'html5 player', fatal=False)
player_desc = 'html5 player %s' % player_version player_desc = 'html5 player %s' % player_version
@ -2495,20 +2495,23 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
_VIDEO_RE = _VIDEO_RE_TPL % r'(?P<id>[0-9A-Za-z_-]{11})' _VIDEO_RE = _VIDEO_RE_TPL % r'(?P<id>[0-9A-Za-z_-]{11})'
IE_NAME = 'youtube:playlist' IE_NAME = 'youtube:playlist'
_TESTS = [{ _TESTS = [{
'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re', 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
'info_dict': { 'info_dict': {
'title': 'ytdl test PL', 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re', 'uploader': 'Sergey M.',
'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
'title': 'youtube-dl public playlist',
}, },
'playlist_count': 3, 'playlist_count': 1,
}, { }, {
'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx', 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
'info_dict': { 'info_dict': {
'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx', 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
'title': 'YDL_Empty_List', 'uploader': 'Sergey M.',
'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
'title': 'youtube-dl empty playlist',
}, },
'playlist_count': 0, 'playlist_count': 0,
'skip': 'This playlist is private',
}, { }, {
'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.', 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC', 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
@ -2518,7 +2521,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
'uploader': 'Christiaan008', 'uploader': 'Christiaan008',
'uploader_id': 'ChRiStIaAn008', 'uploader_id': 'ChRiStIaAn008',
}, },
'playlist_count': 95, 'playlist_count': 96,
}, { }, {
'note': 'issue #673', 'note': 'issue #673',
'url': 'PLBB231211A4F62143', 'url': 'PLBB231211A4F62143',

View File

@ -29,7 +29,6 @@ class ZapiksIE(InfoExtractor):
'timestamp': 1359044972, 'timestamp': 1359044972,
'upload_date': '20130124', 'upload_date': '20130124',
'view_count': int, 'view_count': int,
'comment_count': int,
}, },
}, },
{ {

View File

@ -244,14 +244,14 @@ class ZDFChannelIE(ZDFBaseIE):
'id': 'das-aktuelle-sportstudio', 'id': 'das-aktuelle-sportstudio',
'title': 'das aktuelle sportstudio | ZDF', 'title': 'das aktuelle sportstudio | ZDF',
}, },
'playlist_count': 21, 'playlist_mincount': 23,
}, { }, {
'url': 'https://www.zdf.de/dokumentation/planet-e', 'url': 'https://www.zdf.de/dokumentation/planet-e',
'info_dict': { 'info_dict': {
'id': 'planet-e', 'id': 'planet-e',
'title': 'planet e.', 'title': 'planet e.',
}, },
'playlist_count': 4, 'playlist_mincount': 50,
}, { }, {
'url': 'https://www.zdf.de/filme/taunuskrimi/', 'url': 'https://www.zdf.de/filme/taunuskrimi/',
'only_matching': True, 'only_matching': True,

View File

@ -134,7 +134,7 @@ def parseOpts(overrideArguments=None):
action='help', action='help',
help='Print this help text and exit') help='Print this help text and exit')
general.add_option( general.add_option(
'-v', '--version', '--version',
action='version', action='version',
help='Print program version and exit') help='Print program version and exit')
general.add_option( general.add_option(

View File

@ -2795,6 +2795,15 @@ class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
https_response = http_response https_response = http_response
class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
if sys.version_info[0] < 3:
def redirect_request(self, req, fp, code, msg, headers, newurl):
# On python 2 urlh.geturl() may sometimes return redirect URL
# as byte string instead of unicode. This workaround allows
# to force it always return unicode.
return compat_urllib_request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, compat_str(newurl))
def extract_timezone(date_str): def extract_timezone(date_str):
m = re.search( m = re.search(
r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)', r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals from __future__ import unicode_literals
__version__ = '2020.02.16' __version__ = '2020.03.06'