Merge remote-tracking branch 'upstream/master' into fix-zing-mp3

2024-12-02 13:27:56 +01:00 · 2018-10-20 22:40:46 -04:00 · 2018-10-20 22:40:46 -04:00 · db4c9e5cc4
commit db4c9e5cc4
parent 563b863d7d b99b0bcfa0
14 changed files with 189 additions and 125 deletions
--- a/.github/ISSUE_TEMPLATE.md
+++ b/.github/ISSUE_TEMPLATE.md
@ -6,8 +6,8 @@

 ---

-### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.09.26*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.09.26**
+### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.10.05*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
+- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.10.05**

 ### Before submitting an *issue* make sure you have:
 - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
 [debug] User config: []
 [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
-[debug] youtube-dl version 2018.09.26
+[debug] youtube-dl version 2018.10.05
 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
 [debug] Proxy map: {}
--- a/16
+++ b/16
@ -1,3 +1,19 @@
+version 2018.10.05
+
+Extractors
+* [pluralsight] Improve authentication (#17762)
+* [dailymotion] Fix extraction (#17699)
+* [crunchyroll] Switch to HTTPS for RpcApi (#17749)
+ [philharmoniedeparis] Add support for pad.philharmoniedeparis.fr (#17705)
+* [philharmoniedeparis] Fix extraction (#17705)
+ [jamendo] Add support for licensing.jamendo.com (#17724)
+ [openload] Add support for oload.cloud (#17710)
+* [pluralsight] Fix subtitles extraction (#17726, #17728)
+ [vimeo] Add another config regular expression (#17690)
+* [spike] Fix Paramount Network extraction (#17677)
+* [hotstar] Fix extraction (#14694, #14931, #17637)
+
+
 version 2018.09.26

 Extractors
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@ -360,7 +360,7 @@
 - **HitRecord**
 - **HornBunny**
 - **HotNewHipHop**
- - **HotStar**
+ - **hotstar**
 - **hotstar:playlist**
 - **Howcast**
 - **HowStuffWorks**
--- a/youtube_dl/extractor/brightcove.py
+++ b/youtube_dl/extractor/brightcove.py
@ -1,8 +1,10 @@
 # coding: utf-8
 from __future__ import unicode_literals

-import re
+import base64
 import json
+import re
+import struct

 from .common import InfoExtractor
 from .adobepass import AdobePassIE
@ -310,6 +312,10 @@ class BrightcoveLegacyIE(InfoExtractor):
                'Cannot find playerKey= variable. Did you forget quotes in a shell invocation?',
                expected=True)

+    def _brightcove_new_url_result(self, publisher_id, video_id):
+        brightcove_new_url = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' % (publisher_id, video_id)
+        return self.url_result(brightcove_new_url, BrightcoveNewIE.ie_key(), video_id)
+
    def _get_video_info(self, video_id, query, referer=None):
        headers = {}
        linkBase = query.get('linkBaseURL')
@ -323,6 +329,28 @@ class BrightcoveLegacyIE(InfoExtractor):
            r"<h1>We're sorry.</h1>([\s\n]*<p>.*?</p>)+", webpage,
            'error message', default=None)
        if error_msg is not None:
+            publisher_id = query.get('publisherId')
+            if publisher_id and publisher_id[0].isdigit():
+                publisher_id = publisher_id[0]
+            if not publisher_id:
+                player_key = query.get('playerKey')
+                if player_key and ',' in player_key[0]:
+                    player_key = player_key[0]
+                else:
+                    player_id = query.get('playerID')
+                    if player_id and player_id[0].isdigit():
+                        player_page = self._download_webpage(
+                            'http://link.brightcove.com/services/player/bcpid' + player_id[0],
+                            video_id, headers=headers, fatal=False)
+                        if player_page:
+                            player_key = self._search_regex(
+                                r'<param\s+name="playerKey"\s+value="([\w~,-]+)"',
+                                player_page, 'player key', fatal=False)
+                if player_key:
+                    enc_pub_id = player_key.split(',')[1].replace('~', '=')
+                    publisher_id = struct.unpack('>Q', base64.urlsafe_b64decode(enc_pub_id))[0]
+                if publisher_id:
+                    return self._brightcove_new_url_result(publisher_id, video_id)
            raise ExtractorError(
                'brightcove said: %s' % error_msg, expected=True)

@ -444,8 +472,12 @@ class BrightcoveLegacyIE(InfoExtractor):
                else:
                    return ad_info

-        if 'url' not in info and not info.get('formats'):
-            raise ExtractorError('Unable to extract video url for %s' % video_id)
+        if not info.get('url') and not info.get('formats'):
+            uploader_id = info.get('uploader_id')
+            if uploader_id:
+                info.update(self._brightcove_new_url_result(uploader_id, video_id))
+            else:
+                raise ExtractorError('Unable to extract video url for %s' % video_id)
        return info


--- a/youtube_dl/extractor/cwtv.py
+++ b/youtube_dl/extractor/cwtv.py
@ -3,6 +3,7 @@ from __future__ import unicode_literals

 from .common import InfoExtractor
 from ..utils import (
+    ExtractorError,
    int_or_none,
    parse_age_limit,
    parse_iso8601,
@ -66,9 +67,12 @@ class CWTVIE(InfoExtractor):

    def _real_extract(self, url):
        video_id = self._match_id(url)
-        video_data = self._download_json(
+        data = self._download_json(
            'http://images.cwtv.com/feed/mobileapp/video-meta/apiversion_8/guid_' + video_id,
-            video_id)['video']
+            video_id)
+        if data.get('result') != 'ok':
+            raise ExtractorError(data['msg'], expected=True)
+        video_data = data['video']
        title = video_data['title']
        mpx_url = video_data.get('mpx_url') or 'http://link.theplatform.com/s/cwtv/media/guid/2703454149/%s?formats=M3U' % video_id

--- a/youtube_dl/extractor/dailymotion.py
+++ b/youtube_dl/extractor/dailymotion.py
@ -22,7 +22,10 @@ from ..utils import (
    parse_iso8601,
    sanitized_Request,
    str_to_int,
+    try_get,
    unescapeHTML,
+    update_url_query,
+    url_or_none,
    urlencode_postdata,
 )

@ -171,10 +174,25 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
             r'__PLAYER_CONFIG__\s*=\s*({.+?});'],
            webpage, 'player v5', default=None)
        if player_v5:
-            player = self._parse_json(player_v5, video_id)
-            metadata = player['metadata']
+            player = self._parse_json(player_v5, video_id, fatal=False) or {}
+            metadata = try_get(player, lambda x: x['metadata'], dict)
+            if not metadata:
+                metadata_url = url_or_none(try_get(
+                    player, lambda x: x['context']['metadata_template_url1']))
+                if metadata_url:
+                    metadata_url = metadata_url.replace(':videoId', video_id)
+                else:
+                    metadata_url = update_url_query(
+                        'https://www.dailymotion.com/player/metadata/video/%s'
+                        % video_id, {
+                            'embedder': url,
+                            'integration': 'inline',
+                            'GK_PV5_NEON': '1',
+                        })
+                metadata = self._download_json(
+                    metadata_url, video_id, 'Downloading metadata JSON')

-            if metadata.get('error', {}).get('type') == 'password_protected':
+            if try_get(metadata, lambda x: x['error']['type']) == 'password_protected':
                password = self._downloader.params.get('videopassword')
                if password:
                    r = int(metadata['id'][1:], 36)
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -1153,7 +1153,6 @@ from .tv2 import (
    TV2ArticleIE,
 )
 from .tv2hu import TV2HuIE
-from .tv3 import TV3IE
 from .tv4 import TV4IE
 from .tv5mondeplus import TV5MondePlusIE
 from .tva import TVAIE
--- a/youtube_dl/extractor/openload.py
+++ b/youtube_dl/extractor/openload.py
@ -243,7 +243,7 @@ class PhantomJSwrapper(object):


 class OpenloadIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.(?:tv|stream|site|xyz|win|download|cloud))/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)'
+    _VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.(?:tv|stream|site|xyz|win|download|cloud|cc))/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)'

    _TESTS = [{
        'url': 'https://openload.co/f/kUEfGclsU9o',
@ -314,6 +314,9 @@ class OpenloadIE(InfoExtractor):
        # Its title has not got its extension but url has it
        'url': 'https://oload.download/f/N4Otkw39VCw/Tomb.Raider.2018.HDRip.XviD.AC3-EVO.avi.mp4',
        'only_matching': True,
+    }, {
+        'url': 'https://oload.cc/embed/5NEAbI2BDSk',
+        'only_matching': True,
    }]

    _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'
--- a/youtube_dl/extractor/patreon.py
+++ b/youtube_dl/extractor/patreon.py
@ -2,52 +2,63 @@
 from __future__ import unicode_literals

 from .common import InfoExtractor
-from ..utils import js_to_json
+from ..utils import (
+    clean_html,
+    determine_ext,
+    int_or_none,
+    parse_iso8601,
+)


 class PatreonIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?patreon\.com/creation\?hid=(?P<id>[^&#]+)'
-    _TESTS = [
-        {
-            'url': 'http://www.patreon.com/creation?hid=743933',
-            'md5': 'e25505eec1053a6e6813b8ed369875cc',
-            'info_dict': {
-                'id': '743933',
-                'ext': 'mp3',
-                'title': 'Episode 166: David Smalley of Dogma Debate',
-                'uploader': 'Cognitive Dissonance Podcast',
-                'thumbnail': 're:^https?://.*$',
-            },
+    _VALID_URL = r'https?://(?:www\.)?patreon\.com/(?:creation\?hid=|posts/(?:[\w-]+-)?)(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'http://www.patreon.com/creation?hid=743933',
+        'md5': 'e25505eec1053a6e6813b8ed369875cc',
+        'info_dict': {
+            'id': '743933',
+            'ext': 'mp3',
+            'title': 'Episode 166: David Smalley of Dogma Debate',
+            'description': 'md5:713b08b772cd6271b9f3906683cfacdf',
+            'uploader': 'Cognitive Dissonance Podcast',
+            'thumbnail': 're:^https?://.*$',
+            'timestamp': 1406473987,
+            'upload_date': '20140727',
        },
-        {
-            'url': 'http://www.patreon.com/creation?hid=754133',
-            'md5': '3eb09345bf44bf60451b8b0b81759d0a',
-            'info_dict': {
-                'id': '754133',
-                'ext': 'mp3',
-                'title': 'CD 167 Extra',
-                'uploader': 'Cognitive Dissonance Podcast',
-                'thumbnail': 're:^https?://.*$',
-            },
+    }, {
+        'url': 'http://www.patreon.com/creation?hid=754133',
+        'md5': '3eb09345bf44bf60451b8b0b81759d0a',
+        'info_dict': {
+            'id': '754133',
+            'ext': 'mp3',
+            'title': 'CD 167 Extra',
+            'uploader': 'Cognitive Dissonance Podcast',
+            'thumbnail': 're:^https?://.*$',
        },
-        {
-            'url': 'https://www.patreon.com/creation?hid=1682498',
-            'info_dict': {
-                'id': 'SU4fj_aEMVw',
-                'ext': 'mp4',
-                'title': 'I\'m on Patreon!',
-                'uploader': 'TraciJHines',
-                'thumbnail': 're:^https?://.*$',
-                'upload_date': '20150211',
-                'description': 'md5:c5a706b1f687817a3de09db1eb93acd4',
-                'uploader_id': 'TraciJHines',
-            },
-            'params': {
-                'noplaylist': True,
-                'skip_download': True,
-            }
+        'skip': 'Patron-only content',
+    }, {
+        'url': 'https://www.patreon.com/creation?hid=1682498',
+        'info_dict': {
+            'id': 'SU4fj_aEMVw',
+            'ext': 'mp4',
+            'title': 'I\'m on Patreon!',
+            'uploader': 'TraciJHines',
+            'thumbnail': 're:^https?://.*$',
+            'upload_date': '20150211',
+            'description': 'md5:c5a706b1f687817a3de09db1eb93acd4',
+            'uploader_id': 'TraciJHines',
+        },
+        'params': {
+            'noplaylist': True,
+            'skip_download': True,
        }
-    ]
+    }, {
+        'url': 'https://www.patreon.com/posts/episode-166-of-743933',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.patreon.com/posts/743933',
+        'only_matching': True,
+    }]

    # Currently Patreon exposes download URL via hidden CSS, so login is not
    # needed. Keeping this commented for when this inevitably changes.
@ -78,38 +89,48 @@ class PatreonIE(InfoExtractor):

    def _real_extract(self, url):
        video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
-        title = self._og_search_title(webpage).strip()
-
-        attach_fn = self._html_search_regex(
-            r'<div class="attach"><a target="_blank" href="([^"]+)">',
-            webpage, 'attachment URL', default=None)
-        embed = self._html_search_regex(
-            r'<div[^>]+id="watchCreation"[^>]*>\s*<iframe[^>]+src="([^"]+)"',
-            webpage, 'embedded URL', default=None)
-
-        if attach_fn is not None:
-            video_url = 'http://www.patreon.com' + attach_fn
-            thumbnail = self._og_search_thumbnail(webpage)
-            uploader = self._html_search_regex(
-                r'<strong>(.*?)</strong> is creating', webpage, 'uploader')
-        elif embed is not None:
-            return self.url_result(embed)
-        else:
-            playlist = self._parse_json(self._search_regex(
-                r'(?s)new\s+jPlayerPlaylist\(\s*\{\s*[^}]*},\s*(\[.*?,?\s*\])',
-                webpage, 'playlist JSON'),
-                video_id, transform_source=js_to_json)
-            data = playlist[0]
-            video_url = self._proto_relative_url(data['mp3'])
-            thumbnail = self._proto_relative_url(data.get('cover'))
-            uploader = data.get('artist')
-
-        return {
+        post = self._download_json(
+            'https://www.patreon.com/api/posts/' + video_id, video_id)
+        attributes = post['data']['attributes']
+        title = attributes['title'].strip()
+        image = attributes.get('image') or {}
+        info = {
            'id': video_id,
-            'url': video_url,
-            'ext': 'mp3',
            'title': title,
-            'uploader': uploader,
-            'thumbnail': thumbnail,
+            'description': clean_html(attributes.get('content')),
+            'thumbnail': image.get('large_url') or image.get('url'),
+            'timestamp': parse_iso8601(attributes.get('published_at')),
+            'like_count': int_or_none(attributes.get('like_count')),
+            'comment_count': int_or_none(attributes.get('comment_count')),
        }
+
+        def add_file(file_data):
+            file_url = file_data.get('url')
+            if file_url:
+                info.update({
+                    'url': file_url,
+                    'ext': determine_ext(file_data.get('name'), 'mp3'),
+                })
+
+        for i in post.get('included', []):
+            i_type = i.get('type')
+            if i_type == 'attachment':
+                add_file(i.get('attributes') or {})
+            elif i_type == 'user':
+                user_attributes = i.get('attributes')
+                if user_attributes:
+                    info.update({
+                        'uploader': user_attributes.get('full_name'),
+                        'uploader_url': user_attributes.get('url'),
+                    })
+
+        if not info.get('url'):
+            add_file(attributes.get('post_file') or {})
+
+        if not info.get('url'):
+            info.update({
+                '_type': 'url',
+                'url': attributes['embed']['url'],
+            })
+
+        return info
--- a/youtube_dl/extractor/pluralsight.py
+++ b/youtube_dl/extractor/pluralsight.py
@ -4,6 +4,7 @@ import collections
 import json
 import os
 import random
+import re

 from .common import InfoExtractor
 from ..compat import (
@ -196,7 +197,10 @@ query viewClip {
        if error:
            raise ExtractorError('Unable to login: %s' % error, expected=True)

-        if all(p not in response for p in ('__INITIAL_STATE__', '"currentUser"')):
+        if all(not re.search(p, response) for p in (
+                r'__INITIAL_STATE__', r'["\']currentUser["\']',
+                # new layout?
+                r'>\s*Sign out\s*<')):
            BLOCKED = 'Your account has been blocked due to suspicious activity'
            if BLOCKED in response:
                raise ExtractorError(
--- a/youtube_dl/extractor/rutube.py
+++ b/youtube_dl/extractor/rutube.py
@ -103,7 +103,8 @@ class RutubeIE(RutubeBaseIE):

        options = self._download_json(
            'http://rutube.ru/api/play/options/%s/?format=json' % video_id,
-            video_id, 'Downloading options JSON')
+            video_id, 'Downloading options JSON',
+            headers=self.geo_verification_headers())

        formats = []
        for format_id, format_url in options['video_balancer'].items():
--- a/youtube_dl/extractor/ted.py
+++ b/youtube_dl/extractor/ted.py
@ -212,8 +212,6 @@ class TEDIE(InfoExtractor):

        http_url = None
        for format_id, resources in resources_.items():
-            if not isinstance(resources, dict):
-                continue
            if format_id == 'h264':
                for resource in resources:
                    h264_url = resource.get('file')
@ -242,6 +240,8 @@ class TEDIE(InfoExtractor):
                        'tbr': int_or_none(resource.get('bitrate')),
                    })
            elif format_id == 'hls':
+                if not isinstance(resources, dict):
+                    continue
                stream_url = url_or_none(resources.get('stream'))
                if not stream_url:
                    continue
--- a/youtube_dl/extractor/tv3.py
+++ b/youtube_dl/extractor/tv3.py
@ -1,34 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-
-
-class TV3IE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?tv3\.co\.nz/(?P<id>[^/]+)/tabid/\d+/articleID/\d+/MCat/\d+/Default\.aspx'
-    _TEST = {
-        'url': 'http://www.tv3.co.nz/MOTORSPORT-SRS-SsangYong-Hampton-Downs-Round-3/tabid/3692/articleID/121615/MCat/2915/Default.aspx',
-        'info_dict': {
-            'id': '4659127992001',
-            'ext': 'mp4',
-            'title': 'CRC Motorsport: SRS SsangYong Hampton Downs Round 3 - S2015 Ep3',
-            'description': 'SsangYong Racing Series returns for Round 3 with drivers from New Zealand and Australia taking to the grid at Hampton Downs raceway.',
-            'uploader_id': '3812193411001',
-            'upload_date': '20151213',
-            'timestamp': 1449975272,
-        },
-        'expected_warnings': [
-            'Failed to download MPD manifest'
-        ],
-        'params': {
-            # m3u8 download
-            'skip_download': True,
-        },
-    }
-    BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/3812193411001/default_default/index.html?videoId=%s'
-
-    def _real_extract(self, url):
-        display_id = self._match_id(url)
-        webpage = self._download_webpage(url, display_id)
-        brightcove_id = self._search_regex(r'<param\s*name="@videoPlayer"\s*value="(\d+)"', webpage, 'brightcove id')
-        return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id)
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@ -1,3 +1,3 @@
 from __future__ import unicode_literals

-__version__ = '2018.09.26'
+__version__ = '2018.10.05'