Merge remote-tracking branch 'upstream/master' into fix-zing-mp3

2024-12-03 05:47:55 +01:00 · 2018-10-20 22:40:46 -04:00 · 2018-10-20 22:40:46 -04:00 · db4c9e5cc4
commit db4c9e5cc4
parent 563b863d7d b99b0bcfa0
14 changed files with 189 additions and 125 deletions
--- a/.github/ISSUE_TEMPLATE.md
+++ b/.github/ISSUE_TEMPLATE.md
@ -6,8 +6,8 @@
 ---
-### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.09.26*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
+### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.10.05*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.09.26**
+- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.10.05**
 ### Before submitting an *issue* make sure you have:
 - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
 [debug] User config: []
 [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
-[debug] youtube-dl version 2018.09.26
+[debug] youtube-dl version 2018.10.05
 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
 [debug] Proxy map: {}
--- a/16
+++ b/16
@ -1,3 +1,19 @@
 version 2018.10.05
 Extractors
 * [pluralsight] Improve authentication (#17762)
 * [dailymotion] Fix extraction (#17699)
 * [crunchyroll] Switch to HTTPS for RpcApi (#17749)
 + [philharmoniedeparis] Add support for pad.philharmoniedeparis.fr (#17705)
 * [philharmoniedeparis] Fix extraction (#17705)
 + [jamendo] Add support for licensing.jamendo.com (#17724)
 + [openload] Add support for oload.cloud (#17710)
 * [pluralsight] Fix subtitles extraction (#17726, #17728)
 + [vimeo] Add another config regular expression (#17690)
 * [spike] Fix Paramount Network extraction (#17677)
 * [hotstar] Fix extraction (#14694, #14931, #17637)
 version 2018.09.26
 Extractors
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@ -360,7 +360,7 @@
 - **HitRecord**
 - **HornBunny**
 - **HotNewHipHop**
- - **HotStar**
+ - **hotstar**
 - **hotstar:playlist**
 - **Howcast**
 - **HowStuffWorks**
--- a/youtube_dl/extractor/brightcove.py
+++ b/youtube_dl/extractor/brightcove.py
@ -1,8 +1,10 @@
 # coding: utf-8
 from __future__ import unicode_literals
-import re
+import base64
 import json
 import re
 import struct
 from .common import InfoExtractor
 from .adobepass import AdobePassIE
@ -310,6 +312,10 @@ class BrightcoveLegacyIE(InfoExtractor):
                'Cannot find playerKey= variable. Did you forget quotes in a shell invocation?',
                expected=True)
    def _brightcove_new_url_result(self, publisher_id, video_id):
        brightcove_new_url = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' % (publisher_id, video_id)
        return self.url_result(brightcove_new_url, BrightcoveNewIE.ie_key(), video_id)
    def _get_video_info(self, video_id, query, referer=None):
        headers = {}
        linkBase = query.get('linkBaseURL')
@ -323,6 +329,28 @@ class BrightcoveLegacyIE(InfoExtractor):
            r"<h1>We're sorry.</h1>([\s\n]*<p>.*?</p>)+", webpage,
            'error message', default=None)
        if error_msg is not None:
            publisher_id = query.get('publisherId')
            if publisher_id and publisher_id[0].isdigit():
                publisher_id = publisher_id[0]
            if not publisher_id:
                player_key = query.get('playerKey')
                if player_key and ',' in player_key[0]:
                    player_key = player_key[0]
                else:
                    player_id = query.get('playerID')
                    if player_id and player_id[0].isdigit():
                        player_page = self._download_webpage(
                            'http://link.brightcove.com/services/player/bcpid' + player_id[0],
                            video_id, headers=headers, fatal=False)
                        if player_page:
                            player_key = self._search_regex(
                                r'<param\s+name="playerKey"\s+value="([\w~,-]+)"',
                                player_page, 'player key', fatal=False)
                if player_key:
                    enc_pub_id = player_key.split(',')[1].replace('~', '=')
                    publisher_id = struct.unpack('>Q', base64.urlsafe_b64decode(enc_pub_id))[0]
                if publisher_id:
                    return self._brightcove_new_url_result(publisher_id, video_id)
            raise ExtractorError(
                'brightcove said: %s' % error_msg, expected=True)
@ -444,8 +472,12 @@ class BrightcoveLegacyIE(InfoExtractor):
                else:
                    return ad_info
-        if 'url' not in info and not info.get('formats'):
+        if not info.get('url') and not info.get('formats'):
-            raise ExtractorError('Unable to extract video url for %s' % video_id)
+            uploader_id = info.get('uploader_id')
            if uploader_id:
                info.update(self._brightcove_new_url_result(uploader_id, video_id))
            else:
                raise ExtractorError('Unable to extract video url for %s' % video_id)
        return info
--- a/youtube_dl/extractor/cwtv.py
+++ b/youtube_dl/extractor/cwtv.py
@ -3,6 +3,7 @@ from __future__ import unicode_literals
 from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
    int_or_none,
    parse_age_limit,
    parse_iso8601,
@ -66,9 +67,12 @@ class CWTVIE(InfoExtractor):
    def _real_extract(self, url):
        video_id = self._match_id(url)
-        video_data = self._download_json(
+        data = self._download_json(
            'http://images.cwtv.com/feed/mobileapp/video-meta/apiversion_8/guid_' + video_id,
-            video_id)['video']
+            video_id)
        if data.get('result') != 'ok':
            raise ExtractorError(data['msg'], expected=True)
        video_data = data['video']
        title = video_data['title']
        mpx_url = video_data.get('mpx_url') or 'http://link.theplatform.com/s/cwtv/media/guid/2703454149/%s?formats=M3U' % video_id
--- a/youtube_dl/extractor/dailymotion.py
+++ b/youtube_dl/extractor/dailymotion.py
@ -22,7 +22,10 @@ from ..utils import (
    parse_iso8601,
    sanitized_Request,
    str_to_int,
    try_get,
    unescapeHTML,
    update_url_query,
    url_or_none,
    urlencode_postdata,
 )
@ -171,10 +174,25 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
             r'__PLAYER_CONFIG__\s*=\s*({.+?});'],
            webpage, 'player v5', default=None)
        if player_v5:
-            player = self._parse_json(player_v5, video_id)
+            player = self._parse_json(player_v5, video_id, fatal=False) or {}
-            metadata = player['metadata']
+            metadata = try_get(player, lambda x: x['metadata'], dict)
            if not metadata:
                metadata_url = url_or_none(try_get(
                    player, lambda x: x['context']['metadata_template_url1']))
                if metadata_url:
                    metadata_url = metadata_url.replace(':videoId', video_id)
                else:
                    metadata_url = update_url_query(
                        'https://www.dailymotion.com/player/metadata/video/%s'
                        % video_id, {
                            'embedder': url,
                            'integration': 'inline',
                            'GK_PV5_NEON': '1',
                        })
                metadata = self._download_json(
                    metadata_url, video_id, 'Downloading metadata JSON')
-            if metadata.get('error', {}).get('type') == 'password_protected':
+            if try_get(metadata, lambda x: x['error']['type']) == 'password_protected':
                password = self._downloader.params.get('videopassword')
                if password:
                    r = int(metadata['id'][1:], 36)
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -1153,7 +1153,6 @@ from .tv2 import (
    TV2ArticleIE,
 )
 from .tv2hu import TV2HuIE
 from .tv3 import TV3IE
 from .tv4 import TV4IE
 from .tv5mondeplus import TV5MondePlusIE
 from .tva import TVAIE
--- a/youtube_dl/extractor/openload.py
+++ b/youtube_dl/extractor/openload.py
@ -243,7 +243,7 @@ class PhantomJSwrapper(object):
 class OpenloadIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.(?:tv|stream|site|xyz|win|download|cloud))/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)'
+    _VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.(?:tv|stream|site|xyz|win|download|cloud|cc))/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)'
    _TESTS = [{
        'url': 'https://openload.co/f/kUEfGclsU9o',
@ -314,6 +314,9 @@ class OpenloadIE(InfoExtractor):
        # Its title has not got its extension but url has it
        'url': 'https://oload.download/f/N4Otkw39VCw/Tomb.Raider.2018.HDRip.XviD.AC3-EVO.avi.mp4',
        'only_matching': True,
    }, {
        'url': 'https://oload.cc/embed/5NEAbI2BDSk',
        'only_matching': True,
    }]
    _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'
--- a/youtube_dl/extractor/patreon.py
+++ b/youtube_dl/extractor/patreon.py
@ -2,52 +2,63 @@
 from __future__ import unicode_literals
 from .common import InfoExtractor
-from ..utils import js_to_json
+from ..utils import (
    clean_html,
    determine_ext,
    int_or_none,
    parse_iso8601,
 )
 class PatreonIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?patreon\.com/creation\?hid=(?P<id>[^&#]+)'
+    _VALID_URL = r'https?://(?:www\.)?patreon\.com/(?:creation\?hid=|posts/(?:[\w-]+-)?)(?P<id>\d+)'
-    _TESTS = [
+    _TESTS = [{
-        {
+        'url': 'http://www.patreon.com/creation?hid=743933',
-            'url': 'http://www.patreon.com/creation?hid=743933',
+        'md5': 'e25505eec1053a6e6813b8ed369875cc',
-            'md5': 'e25505eec1053a6e6813b8ed369875cc',
+        'info_dict': {
-            'info_dict': {
+            'id': '743933',
-                'id': '743933',
+            'ext': 'mp3',
-                'ext': 'mp3',
+            'title': 'Episode 166: David Smalley of Dogma Debate',
-                'title': 'Episode 166: David Smalley of Dogma Debate',
+            'description': 'md5:713b08b772cd6271b9f3906683cfacdf',
-                'uploader': 'Cognitive Dissonance Podcast',
+            'uploader': 'Cognitive Dissonance Podcast',
-                'thumbnail': 're:^https?://.*$',
+            'thumbnail': 're:^https?://.*$',
-            },
+            'timestamp': 1406473987,
            'upload_date': '20140727',
        },
-        {
+    }, {
-            'url': 'http://www.patreon.com/creation?hid=754133',
+        'url': 'http://www.patreon.com/creation?hid=754133',
-            'md5': '3eb09345bf44bf60451b8b0b81759d0a',
+        'md5': '3eb09345bf44bf60451b8b0b81759d0a',
-            'info_dict': {
+        'info_dict': {
-                'id': '754133',
+            'id': '754133',
-                'ext': 'mp3',
+            'ext': 'mp3',
-                'title': 'CD 167 Extra',
+            'title': 'CD 167 Extra',
-                'uploader': 'Cognitive Dissonance Podcast',
+            'uploader': 'Cognitive Dissonance Podcast',
-                'thumbnail': 're:^https?://.*$',
+            'thumbnail': 're:^https?://.*$',
            },
        },
-        {
+        'skip': 'Patron-only content',
-            'url': 'https://www.patreon.com/creation?hid=1682498',
+    }, {
-            'info_dict': {
+        'url': 'https://www.patreon.com/creation?hid=1682498',
-                'id': 'SU4fj_aEMVw',
+        'info_dict': {
-                'ext': 'mp4',
+            'id': 'SU4fj_aEMVw',
-                'title': 'I\'m on Patreon!',
+            'ext': 'mp4',
-                'uploader': 'TraciJHines',
+            'title': 'I\'m on Patreon!',
-                'thumbnail': 're:^https?://.*$',
+            'uploader': 'TraciJHines',
-                'upload_date': '20150211',
+            'thumbnail': 're:^https?://.*$',
-                'description': 'md5:c5a706b1f687817a3de09db1eb93acd4',
+            'upload_date': '20150211',
-                'uploader_id': 'TraciJHines',
+            'description': 'md5:c5a706b1f687817a3de09db1eb93acd4',
-            },
+            'uploader_id': 'TraciJHines',
-            'params': {
+        },
-                'noplaylist': True,
+        'params': {
-                'skip_download': True,
+            'noplaylist': True,
-            }
+            'skip_download': True,
        }
-    ]
+    }, {
        'url': 'https://www.patreon.com/posts/episode-166-of-743933',
        'only_matching': True,
    }, {
        'url': 'https://www.patreon.com/posts/743933',
        'only_matching': True,
    }]
    # Currently Patreon exposes download URL via hidden CSS, so login is not
    # needed. Keeping this commented for when this inevitably changes.
@ -78,38 +89,48 @@ class PatreonIE(InfoExtractor):
    def _real_extract(self, url):
        video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
+        post = self._download_json(
-        title = self._og_search_title(webpage).strip()
+            'https://www.patreon.com/api/posts/' + video_id, video_id)
-
+        attributes = post['data']['attributes']
-        attach_fn = self._html_search_regex(
+        title = attributes['title'].strip()
-            r'<div class="attach"><a target="_blank" href="([^"]+)">',
+        image = attributes.get('image') or {}
-            webpage, 'attachment URL', default=None)
+        info = {
        embed = self._html_search_regex(
            r'<div[^>]+id="watchCreation"[^>]*>\s*<iframe[^>]+src="([^"]+)"',
            webpage, 'embedded URL', default=None)
        if attach_fn is not None:
            video_url = 'http://www.patreon.com' + attach_fn
            thumbnail = self._og_search_thumbnail(webpage)
            uploader = self._html_search_regex(
                r'<strong>(.*?)</strong> is creating', webpage, 'uploader')
        elif embed is not None:
            return self.url_result(embed)
        else:
            playlist = self._parse_json(self._search_regex(
                r'(?s)new\s+jPlayerPlaylist\(\s*\{\s*[^}]*},\s*(\[.*?,?\s*\])',
                webpage, 'playlist JSON'),
                video_id, transform_source=js_to_json)
            data = playlist[0]
            video_url = self._proto_relative_url(data['mp3'])
            thumbnail = self._proto_relative_url(data.get('cover'))
            uploader = data.get('artist')
        return {
            'id': video_id,
            'url': video_url,
            'ext': 'mp3',
            'title': title,
-            'uploader': uploader,
+            'description': clean_html(attributes.get('content')),
-            'thumbnail': thumbnail,
+            'thumbnail': image.get('large_url') or image.get('url'),
            'timestamp': parse_iso8601(attributes.get('published_at')),
            'like_count': int_or_none(attributes.get('like_count')),
            'comment_count': int_or_none(attributes.get('comment_count')),
        }
        def add_file(file_data):
            file_url = file_data.get('url')
            if file_url:
                info.update({
                    'url': file_url,
                    'ext': determine_ext(file_data.get('name'), 'mp3'),
                })
        for i in post.get('included', []):
            i_type = i.get('type')
            if i_type == 'attachment':
                add_file(i.get('attributes') or {})
            elif i_type == 'user':
                user_attributes = i.get('attributes')
                if user_attributes:
                    info.update({
                        'uploader': user_attributes.get('full_name'),
                        'uploader_url': user_attributes.get('url'),
                    })
        if not info.get('url'):
            add_file(attributes.get('post_file') or {})
        if not info.get('url'):
            info.update({
                '_type': 'url',
                'url': attributes['embed']['url'],
            })
        return info
--- a/youtube_dl/extractor/pluralsight.py
+++ b/youtube_dl/extractor/pluralsight.py
@ -4,6 +4,7 @@ import collections
 import json
 import os
 import random
 import re
 from .common import InfoExtractor
 from ..compat import (
@ -196,7 +197,10 @@ query viewClip {
        if error:
            raise ExtractorError('Unable to login: %s' % error, expected=True)
-        if all(p not in response for p in ('__INITIAL_STATE__', '"currentUser"')):
+        if all(not re.search(p, response) for p in (
                r'__INITIAL_STATE__', r'["\']currentUser["\']',
                # new layout?
                r'>\s*Sign out\s*<')):
            BLOCKED = 'Your account has been blocked due to suspicious activity'
            if BLOCKED in response:
                raise ExtractorError(
--- a/youtube_dl/extractor/rutube.py
+++ b/youtube_dl/extractor/rutube.py
@ -103,7 +103,8 @@ class RutubeIE(RutubeBaseIE):
        options = self._download_json(
            'http://rutube.ru/api/play/options/%s/?format=json' % video_id,
-            video_id, 'Downloading options JSON')
+            video_id, 'Downloading options JSON',
            headers=self.geo_verification_headers())
        formats = []
        for format_id, format_url in options['video_balancer'].items():
--- a/youtube_dl/extractor/ted.py
+++ b/youtube_dl/extractor/ted.py
@ -212,8 +212,6 @@ class TEDIE(InfoExtractor):
        http_url = None
        for format_id, resources in resources_.items():
            if not isinstance(resources, dict):
                continue
            if format_id == 'h264':
                for resource in resources:
                    h264_url = resource.get('file')
@ -242,6 +240,8 @@ class TEDIE(InfoExtractor):
                        'tbr': int_or_none(resource.get('bitrate')),
                    })
            elif format_id == 'hls':
                if not isinstance(resources, dict):
                    continue
                stream_url = url_or_none(resources.get('stream'))
                if not stream_url:
                    continue
--- a/youtube_dl/extractor/tv3.py
+++ b/youtube_dl/extractor/tv3.py
@ -1,34 +0,0 @@
 # coding: utf-8
 from __future__ import unicode_literals
 from .common import InfoExtractor
 class TV3IE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?tv3\.co\.nz/(?P<id>[^/]+)/tabid/\d+/articleID/\d+/MCat/\d+/Default\.aspx'
    _TEST = {
        'url': 'http://www.tv3.co.nz/MOTORSPORT-SRS-SsangYong-Hampton-Downs-Round-3/tabid/3692/articleID/121615/MCat/2915/Default.aspx',
        'info_dict': {
            'id': '4659127992001',
            'ext': 'mp4',
            'title': 'CRC Motorsport: SRS SsangYong Hampton Downs Round 3 - S2015 Ep3',
            'description': 'SsangYong Racing Series returns for Round 3 with drivers from New Zealand and Australia taking to the grid at Hampton Downs raceway.',
            'uploader_id': '3812193411001',
            'upload_date': '20151213',
            'timestamp': 1449975272,
        },
        'expected_warnings': [
            'Failed to download MPD manifest'
        ],
        'params': {
            # m3u8 download
            'skip_download': True,
        },
    }
    BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/3812193411001/default_default/index.html?videoId=%s'
    def _real_extract(self, url):
        display_id = self._match_id(url)
        webpage = self._download_webpage(url, display_id)
        brightcove_id = self._search_regex(r'<param\s*name="@videoPlayer"\s*value="(\d+)"', webpage, 'brightcove id')
        return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id)
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@ -1,3 +1,3 @@
 from __future__ import unicode_literals
-__version__ = '2018.09.26'
+__version__ = '2018.10.05'
`@ -1,3 +1,3 @@`
	`from __future__ import unicode_literals`	`from __future__ import unicode_literals`

	`__version__ = '2018.09.26'`	`__version__ = '2018.10.05'`