Merge branch 'master' of https://github.com/rg3/youtube-dl

2025-02-18 18:17:55 +01:00 · 2017-11-18 19:33:21 +00:00 · 2017-11-18 19:33:21 +00:00 · b3b8237cfd
commit b3b8237cfd
parent 2b206c30f1 f610dbb05f
9 changed files with 111 additions and 31 deletions
--- a/.github/ISSUE_TEMPLATE.md
+++ b/.github/ISSUE_TEMPLATE.md
@ -6,8 +6,8 @@

 ---

-### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.11.06*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.11.06**
+### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.11.15*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
+- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.11.15**

 ### Before submitting an *issue* make sure you have:
 - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@ -35,7 +35,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
 [debug] User config: []
 [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
-[debug] youtube-dl version 2017.11.06
+[debug] youtube-dl version 2017.11.15
 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
 [debug] Proxy map: {}
--- a/20
+++ b/20
@ -1,7 +1,25 @@
-version <unreleased>
+version 2017.11.15
+
+Core
+* [common] Skip Apple FairPlay m3u8 manifests (#14741)
+* [YoutubeDL] Fix playlist range optimization for --playlist-items (#14740)

 Extractors
+* [vshare] Capture and output error message
+* [vshare] Fix extraction (#14473)
+* [crunchyroll] Extract old RTMP formats
+* [tva] Fix extraction (#14736)
+* [gamespot] Lower preference of HTTP formats (#14652)
+* [instagram:user] Fix extraction (#14699)
+* [ccma] Fix typo (#14730)
+- Remove sensitive data from logging in messages
+* [instagram:user] Fix extraction (#14699)
+ [gamespot] Add support for article URLs (#14652)
+* [gamespot] Skip Brightcove Once HTTP formats (#14652)
+* [cartoonnetwork] Update tokenizer_src (#14666)
 + [wsj] Recognize another URL pattern (#14704)
+* [pandatv] Update API URL and sign format URLs (#14693)
+* [crunchyroll] Use old login method (#11572)


 version 2017.11.06
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@ -592,19 +592,11 @@ class InfoExtractor(object):
        if not encoding:
            encoding = self._guess_encoding_from_content(content_type, webpage_bytes)
        if self._downloader.params.get('dump_intermediate_pages', False):
-            try:
-                url = url_or_request.get_full_url()
-            except AttributeError:
-                url = url_or_request
-            self.to_screen('Dumping request to ' + url)
+            self.to_screen('Dumping request to ' + urlh.geturl())
            dump = base64.b64encode(webpage_bytes).decode('ascii')
            self._downloader.to_screen(dump)
        if self._downloader.params.get('write_pages', False):
-            try:
-                url = url_or_request.get_full_url()
-            except AttributeError:
-                url = url_or_request
-            basen = '%s_%s' % (video_id, url)
+            basen = '%s_%s' % (video_id, urlh.geturl())
            if len(basen) > 240:
                h = '___' + hashlib.md5(basen.encode('utf-8')).hexdigest()
                basen = basen[:240 - len(h)] + h
@ -1356,6 +1348,9 @@ class InfoExtractor(object):
        if '#EXT-X-FAXS-CM:' in m3u8_doc:  # Adobe Flash Access
            return []

+        if re.search(r'#EXT-X-SESSION-KEY:.*?URI="skd://', m3u8_doc):  # Apple FairPlay
+            return []
+
        formats = []

        format_url = lambda u: (
--- a/youtube_dl/extractor/drtuber.py
+++ b/youtube_dl/extractor/drtuber.py
@ -10,7 +10,7 @@ from ..utils import (


 class DrTuberIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?drtuber\.com/(?:video|embed)/(?P<id>\d+)(?:/(?P<display_id>[\w-]+))?'
+    _VALID_URL = r'https?://(?:(?:www|m)\.)?drtuber\.com/(?:video|embed)/(?P<id>\d+)(?:/(?P<display_id>[\w-]+))?'
    _TESTS = [{
        'url': 'http://www.drtuber.com/video/1740434/hot-perky-blonde-naked-golf',
        'md5': '93e680cf2536ad0dfb7e74d94a89facd',
@ -28,6 +28,9 @@ class DrTuberIE(InfoExtractor):
    }, {
        'url': 'http://www.drtuber.com/embed/489939',
        'only_matching': True,
+    }, {
+        'url': 'http://m.drtuber.com/video/3893529/lingerie-blowjob-from-beautiful-teen',
+        'only_matching': True,
    }]

    @staticmethod
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@ -102,6 +102,7 @@ from .joj import JojIE
 from .megaphone import MegaphoneIE
 from .vzaar import VzaarIE
 from .channel9 import Channel9IE
+from .vshare import VShareIE


 class GenericIE(InfoExtractor):
@ -1921,6 +1922,16 @@ class GenericIE(InfoExtractor):
                'title': 'Rescue Kit 14 Free Edition - Getting started',
            },
            'playlist_count': 4,
+        },
+        {
+            # vshare embed
+            'url': 'https://youtube-dl-demo.neocities.org/vshare.html',
+            'md5': '17b39f55b5497ae8b59f5fbce8e35886',
+            'info_dict': {
+                'id': '0f64ce6',
+                'title': 'vl14062007715967',
+                'ext': 'mp4',
+            }
        }
        # {
        #     # TODO: find another test
@ -2879,6 +2890,11 @@ class GenericIE(InfoExtractor):
            return self.playlist_from_matches(
                channel9_urls, video_id, video_title, ie=Channel9IE.ie_key())

+        vshare_urls = VShareIE._extract_urls(webpage)
+        if vshare_urls:
+            return self.playlist_from_matches(
+                vshare_urls, video_id, video_title, ie=VShareIE.ie_key())
+
        def merge_dicts(dict1, dict2):
            merged = {}
            for k, v in dict1.items():
--- a/youtube_dl/extractor/instagram.py
+++ b/youtube_dl/extractor/instagram.py
@ -130,13 +130,21 @@ class InstagramIE(InfoExtractor):
                video_url = media.get('video_url')
                height = int_or_none(media.get('dimensions', {}).get('height'))
                width = int_or_none(media.get('dimensions', {}).get('width'))
-                description = media.get('caption')
+                description = try_get(
+                    media, lambda x: x['edge_media_to_caption']['edges'][0]['node']['text'],
+                    compat_str) or media.get('caption')
                thumbnail = media.get('display_src')
-                timestamp = int_or_none(media.get('date'))
+                timestamp = int_or_none(media.get('taken_at_timestamp') or media.get('date'))
                uploader = media.get('owner', {}).get('full_name')
                uploader_id = media.get('owner', {}).get('username')
-                like_count = int_or_none(media.get('likes', {}).get('count'))
-                comment_count = int_or_none(media.get('comments', {}).get('count'))
+
+                def get_count(key, kind):
+                    return int_or_none(try_get(
+                        media, (lambda x: x['edge_media_%s' % key]['count'],
+                                lambda x: x['%ss' % kind]['count'])))
+                like_count = get_count('preview_like', 'like')
+                comment_count = get_count('to_comment', 'comment')
+
                comments = [{
                    'author': comment.get('user', {}).get('username'),
                    'author_id': comment.get('user', {}).get('id'),
--- a/youtube_dl/extractor/spankbang.py
+++ b/youtube_dl/extractor/spankbang.py
@ -7,7 +7,7 @@ from ..utils import ExtractorError


 class SpankBangIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:(?:www|[a-z]{2})\.)?spankbang\.com/(?P<id>[\da-z]+)/video'
+    _VALID_URL = r'https?://(?:(?:www|m|[a-z]{2})\.)?spankbang\.com/(?P<id>[\da-z]+)/video'
    _TESTS = [{
        'url': 'http://spankbang.com/3vvn/video/fantasy+solo',
        'md5': '1cc433e1d6aa14bc376535b8679302f7',
@ -15,7 +15,7 @@ class SpankBangIE(InfoExtractor):
            'id': '3vvn',
            'ext': 'mp4',
            'title': 'fantasy solo',
-            'description': 'Watch fantasy solo free HD porn video - 05 minutes - dillion harper masturbates on a bed free adult movies.',
+            'description': 'Watch fantasy solo free HD porn video - 05 minutes -  Babe,Masturbation,Solo,Toy  - dillion harper masturbates on a bed free adult movies sexy clips.',
            'thumbnail': r're:^https?://.*\.jpg$',
            'uploader': 'silly2587',
            'age_limit': 18,
@ -28,6 +28,10 @@ class SpankBangIE(InfoExtractor):
        # no uploader
        'url': 'http://spankbang.com/lklg/video/sex+with+anyone+wedding+edition+2',
        'only_matching': True,
+    }, {
+        # mobile page
+        'url': 'http://m.spankbang.com/1o2de/video/can+t+remember+her+name',
+        'only_matching': True,
    }]

    def _real_extract(self, url):
--- a/youtube_dl/extractor/vshare.py
+++ b/youtube_dl/extractor/vshare.py
@ -1,14 +1,21 @@
 # coding: utf-8
 from __future__ import unicode_literals

+import re
+
 from .common import InfoExtractor
+from ..compat import compat_chr
+from ..utils import (
+    decode_packed_codes,
+    ExtractorError,
+)


 class VShareIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?vshare\.io/[dv]/(?P<id>[^/?#&]+)'
    _TESTS = [{
        'url': 'https://vshare.io/d/0f64ce6',
-        'md5': '16d7b8fef58846db47419199ff1ab3e7',
+        'md5': '17b39f55b5497ae8b59f5fbce8e35886',
        'info_dict': {
            'id': '0f64ce6',
            'title': 'vl14062007715967',
@ -19,20 +26,49 @@ class VShareIE(InfoExtractor):
        'only_matching': True,
    }]

+    @staticmethod
+    def _extract_urls(webpage):
+        return re.findall(
+            r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?vshare\.io/v/[^/?#&]+)',
+            webpage)
+
+    def _extract_packed(self, webpage):
+        packed = self._search_regex(
+            r'(eval\(function.+)', webpage, 'packed code')
+        unpacked = decode_packed_codes(packed)
+        digits = self._search_regex(r'\[((?:\d+,?)+)\]', unpacked, 'digits')
+        digits = [int(digit) for digit in digits.split(',')]
+        key_digit = self._search_regex(
+            r'fromCharCode\(.+?(\d+)\)}', unpacked, 'key digit')
+        chars = [compat_chr(d - int(key_digit)) for d in digits]
+        return ''.join(chars)
+
    def _real_extract(self, url):
        video_id = self._match_id(url)

        webpage = self._download_webpage(
-            'https://vshare.io/d/%s' % video_id, video_id)
+            'https://vshare.io/v/%s/width-650/height-430/1' % video_id,
+            video_id)

        title = self._html_search_regex(
-            r'(?s)<div id="root-container">(.+?)<br/>', webpage, 'title')
-        video_url = self._search_regex(
-            r'<a[^>]+href=(["\'])(?P<url>(?:https?:)?//.+?)\1[^>]*>[Cc]lick\s+here',
-            webpage, 'video url', group='url')
+            r'<title>([^<]+)</title>', webpage, 'title')
+        title = title.split(' - ')[0]

-        return {
+        error = self._html_search_regex(
+            r'(?s)<div[^>]+\bclass=["\']xxx-error[^>]+>(.+?)</div', webpage,
+            'error', default=None)
+        if error:
+            raise ExtractorError(error, expected=True)
+
+        info = self._parse_html5_media_entries(
+            url, '<video>%s</video>' % self._extract_packed(webpage),
+            video_id)[0]
+
+        self._sort_formats(info['formats'])
+
+        info.update({
            'id': video_id,
            'title': title,
-            'url': video_url,
-        }
+        })
+
+        return info
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@ -1,3 +1,3 @@
 from __future__ import unicode_literals

-__version__ = '2017.11.06'
+__version__ = '2017.11.15'