From 0987f2ddb27a27506c697ad9dae2ccbf24fc786d Mon Sep 17 00:00:00 2001 From: Timendum Date: Tue, 14 Nov 2017 16:34:45 +0100 Subject: [PATCH 01/10] [vshare] Fix extraction (closes #14473) --- youtube_dl/extractor/generic.py | 16 +++++++++++++++ youtube_dl/extractor/vshare.py | 36 +++++++++++++++++++++++++-------- 2 files changed, 44 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 2a9c3e2de..31564e550 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -102,6 +102,7 @@ from .joj import JojIE from .megaphone import MegaphoneIE from .vzaar import VzaarIE from .channel9 import Channel9IE +from .vshare import VShareIE class GenericIE(InfoExtractor): @@ -1921,6 +1922,16 @@ class GenericIE(InfoExtractor): 'title': 'Rescue Kit 14 Free Edition - Getting started', }, 'playlist_count': 4, + }, + { + # vshare embed + 'url': 'https://youtube-dl-demo.neocities.org/vshare.html', + 'md5': '17b39f55b5497ae8b59f5fbce8e35886', + 'info_dict': { + 'id': '0f64ce6', + 'title': 'vl14062007715967', + 'ext': 'mp4', + } } # { # # TODO: find another test @@ -2879,6 +2890,11 @@ class GenericIE(InfoExtractor): return self.playlist_from_matches( channel9_urls, video_id, video_title, ie=Channel9IE.ie_key()) + vshare_urls = VShareIE._extract_urls(webpage) + if vshare_urls: + return self.playlist_from_matches( + vshare_urls, video_id, video_title, ie=VShareIE.ie_key()) + def merge_dicts(dict1, dict2): merged = {} for k, v in dict1.items(): diff --git a/youtube_dl/extractor/vshare.py b/youtube_dl/extractor/vshare.py index 5addbc280..ea39a9051 100644 --- a/youtube_dl/extractor/vshare.py +++ b/youtube_dl/extractor/vshare.py @@ -1,14 +1,18 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor +from ..compat import compat_chr +from ..utils import decode_packed_codes class VShareIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?vshare\.io/[dv]/(?P[^/?#&]+)' _TESTS = [{ 'url': 'https://vshare.io/d/0f64ce6', - 'md5': '16d7b8fef58846db47419199ff1ab3e7', + 'md5': '17b39f55b5497ae8b59f5fbce8e35886', 'info_dict': { 'id': '0f64ce6', 'title': 'vl14062007715967', @@ -19,20 +23,36 @@ class VShareIE(InfoExtractor): 'only_matching': True, }] + def _extract_packed(self, webpage): + packed = self._search_regex(r'(eval\(function.+)', webpage, 'packed code') + unpacked = decode_packed_codes(packed) + digits = self._search_regex(r'\[((?:\d+,?)+)\]', unpacked, 'digits') + digits = digits.split(',') + digits = [int(digit) for digit in digits] + key_digit = self._search_regex(r'fromCharCode\(.+?(\d+)\)}', unpacked, 'key digit') + chars = [compat_chr(d - int(key_digit)) for d in digits] + return ''.join(chars) + def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage( - 'https://vshare.io/d/%s' % video_id, video_id) + 'https://vshare.io/v/%s/width-650/height-430/1' % video_id, video_id) - title = self._html_search_regex( - r'(?s)
(.+?)
', webpage, 'title') - video_url = self._search_regex( - r']+href=(["\'])(?P(?:https?:)?//.+?)\1[^>]*>[Cc]lick\s+here', - webpage, 'video url', group='url') + title = self._html_search_regex(r'([^<]+)', webpage, 'title') + title = title.split(' - ')[0] + unpacked = self._extract_packed(webpage) + video_urls = re.findall(r']+?src=["\'](?P(?:https?:)?//(?:www\.)?vshare\.io/v/[^/?#&]+)', + webpage) From ff31f2d5c3750364b013a9bf59b85cebd0cee1fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 14 Nov 2017 22:39:54 +0700 Subject: [PATCH 02/10] [vshare] Capture and output error message --- youtube_dl/extractor/vshare.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vshare.py b/youtube_dl/extractor/vshare.py index ea39a9051..20ce22e16 100644 --- a/youtube_dl/extractor/vshare.py +++ b/youtube_dl/extractor/vshare.py @@ -5,7 +5,10 @@ import re from .common import InfoExtractor from ..compat import compat_chr -from ..utils import decode_packed_codes +from ..utils import ( + decode_packed_codes, + ExtractorError, +) class VShareIE(InfoExtractor): @@ -42,6 +45,12 @@ class VShareIE(InfoExtractor): title = self._html_search_regex(r'([^<]+)', webpage, 'title') title = title.split(' - ')[0] + error = self._html_search_regex( + r'(?s)]+\bclass=["\']xxx-error[^>]+>(.+?) Date: Tue, 14 Nov 2017 22:49:25 +0700 Subject: [PATCH 03/10] [vshare] Improve extraction, fix formats sorting and carry long lines --- youtube_dl/extractor/vshare.py | 41 ++++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/vshare.py b/youtube_dl/extractor/vshare.py index 20ce22e16..e4ec77889 100644 --- a/youtube_dl/extractor/vshare.py +++ b/youtube_dl/extractor/vshare.py @@ -26,13 +26,20 @@ class VShareIE(InfoExtractor): 'only_matching': True, }] + @staticmethod + def _extract_urls(webpage): + return re.findall( + r']+?src=["\'](?P(?:https?:)?//(?:www\.)?vshare\.io/v/[^/?#&]+)', + webpage) + def _extract_packed(self, webpage): - packed = self._search_regex(r'(eval\(function.+)', webpage, 'packed code') + packed = self._search_regex( + r'(eval\(function.+)', webpage, 'packed code') unpacked = decode_packed_codes(packed) digits = self._search_regex(r'\[((?:\d+,?)+)\]', unpacked, 'digits') - digits = digits.split(',') - digits = [int(digit) for digit in digits] - key_digit = self._search_regex(r'fromCharCode\(.+?(\d+)\)}', unpacked, 'key digit') + digits = [int(digit) for digit in digits.split(',')] + key_digit = self._search_regex( + r'fromCharCode\(.+?(\d+)\)}', unpacked, 'key digit') chars = [compat_chr(d - int(key_digit)) for d in digits] return ''.join(chars) @@ -40,9 +47,11 @@ class VShareIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage( - 'https://vshare.io/v/%s/width-650/height-430/1' % video_id, video_id) + 'https://vshare.io/v/%s/width-650/height-430/1' % video_id, + video_id) - title = self._html_search_regex(r'([^<]+)', webpage, 'title') + title = self._html_search_regex( + r'([^<]+)', webpage, 'title') title = title.split(' - ')[0] error = self._html_search_regex( @@ -51,17 +60,15 @@ class VShareIE(InfoExtractor): if error: raise ExtractorError(error, expected=True) - unpacked = self._extract_packed(webpage) - video_urls = re.findall(r'%s' % self._extract_packed(webpage), + video_id)[0] + + self._sort_formats(info['formats']) + + info.update({ 'id': video_id, 'title': title, - 'formats': formats, - } + }) - @staticmethod - def _extract_urls(webpage): - return re.findall( - r']+?src=["\'](?P(?:https?:)?//(?:www\.)?vshare\.io/v/[^/?#&]+)', - webpage) + return info From ea2295842f79c9efff3a9abce1d0eee7de4953d6 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 14 Nov 2017 17:41:30 +0100 Subject: [PATCH 04/10] [common] skip Apple FairPlay m3u8 manifests(closes #14741) --- youtube_dl/extractor/common.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index e2d9f52b0..a9d68fc0c 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1356,6 +1356,9 @@ class InfoExtractor(object): if '#EXT-X-FAXS-CM:' in m3u8_doc: # Adobe Flash Access return [] + if re.search(r'#EXT-X-SESSION-KEY:.*?URI="skd://', m3u8_doc): # Apple FairPlay + return [] + formats = [] format_url = lambda u: ( From fae0eb42ec4309fe7fb8476d30621ba1d60fa168 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 14 Nov 2017 23:59:30 +0700 Subject: [PATCH 05/10] [ChangeLog] Actualize --- ChangeLog | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/ChangeLog b/ChangeLog index cedab4723..6ed0f011f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,7 +1,25 @@ version +Core +* [common] Skip Apple FairPlay m3u8 manifests (#14741) +* [YoutubeDL] Fix playlist range optimization for --playlist-items (#14740) + Extractors +* [vshare] Capture and output error message +* [vshare] Fix extraction (#14473) +* [crunchyroll] Extract old RTMP formats +* [tva] Fix extraction (#14736) +* [gamespot] Lower preference of HTTP formats (#14652) +* [instagram:user] Fix extraction (#14699) +* [ccma] Fix typo (#14730) +- Remove sensitive data from logging in messages +* [instagram:user] Fix extraction (#14699) ++ [gamespot] Add support for article URLs (#14652) +* [gamespot] Skip Brightcove Once HTTP formats (#14652) +* [cartoonnetwork] Update tokenizer_src (#14666) + [wsj] Recognize another URL pattern (#14704) +* [pandatv] Update API URL and sign format URLs (#14693) +* [crunchyroll] Use old login method (#11572) version 2017.11.06 From 08e45b39e76419f63aa43d5008257789d8a30bf8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 15 Nov 2017 00:15:42 +0700 Subject: [PATCH 06/10] release 2017.11.15 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index be6e6ddab..4dd1a6e59 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.11.06*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.11.06** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.11.15*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.11.15** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.11.06 +[debug] youtube-dl version 2017.11.15 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 6ed0f011f..6b4befb8f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2017.11.15 Core * [common] Skip Apple FairPlay m3u8 manifests (#14741) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 8b67d23fe..1c3cbefeb 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.11.06' +__version__ = '2017.11.15' From 9cbd4dda10ad248a5268ec1e0e563cf97024a8b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 15 Nov 2017 22:14:54 +0700 Subject: [PATCH 07/10] [instagram] Fix description, timestamp and counters extraction (closes #14755) --- youtube_dl/extractor/instagram.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py index 20db31f86..a77f619d2 100644 --- a/youtube_dl/extractor/instagram.py +++ b/youtube_dl/extractor/instagram.py @@ -130,13 +130,21 @@ class InstagramIE(InfoExtractor): video_url = media.get('video_url') height = int_or_none(media.get('dimensions', {}).get('height')) width = int_or_none(media.get('dimensions', {}).get('width')) - description = media.get('caption') + description = try_get( + media, lambda x: x['edge_media_to_caption']['edges'][0]['node']['text'], + compat_str) or media.get('caption') thumbnail = media.get('display_src') - timestamp = int_or_none(media.get('date')) + timestamp = int_or_none(media.get('taken_at_timestamp') or media.get('date')) uploader = media.get('owner', {}).get('full_name') uploader_id = media.get('owner', {}).get('username') - like_count = int_or_none(media.get('likes', {}).get('count')) - comment_count = int_or_none(media.get('comments', {}).get('count')) + + def get_count(key, kind): + return int_or_none(try_get( + media, (lambda x: x['edge_media_%s' % key]['count'], + lambda x: x['%ss' % kind]['count']))) + like_count = get_count('preview_like', 'like') + comment_count = get_count('to_comment', 'comment') + comments = [{ 'author': comment.get('user', {}).get('username'), 'author_id': comment.get('user', {}).get('id'), From 3192d4bc7a063983c3a82bc4320c16d65679307a Mon Sep 17 00:00:00 2001 From: Windom Date: Thu, 16 Nov 2017 20:05:04 +0200 Subject: [PATCH 08/10] [spankbang] Add support for mobile URLs and fix test --- youtube_dl/extractor/spankbang.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/spankbang.py b/youtube_dl/extractor/spankbang.py index 2863e53b5..e6c2dcfc4 100644 --- a/youtube_dl/extractor/spankbang.py +++ b/youtube_dl/extractor/spankbang.py @@ -7,7 +7,7 @@ from ..utils import ExtractorError class SpankBangIE(InfoExtractor): - _VALID_URL = r'https?://(?:(?:www|[a-z]{2})\.)?spankbang\.com/(?P[\da-z]+)/video' + _VALID_URL = r'https?://(?:(?:www|m|[a-z]{2})\.)?spankbang\.com/(?P[\da-z]+)/video' _TESTS = [{ 'url': 'http://spankbang.com/3vvn/video/fantasy+solo', 'md5': '1cc433e1d6aa14bc376535b8679302f7', @@ -15,7 +15,7 @@ class SpankBangIE(InfoExtractor): 'id': '3vvn', 'ext': 'mp4', 'title': 'fantasy solo', - 'description': 'Watch fantasy solo free HD porn video - 05 minutes - dillion harper masturbates on a bed free adult movies.', + 'description': 'Watch fantasy solo free HD porn video - 05 minutes - Babe,Masturbation,Solo,Toy - dillion harper masturbates on a bed free adult movies sexy clips.', 'thumbnail': r're:^https?://.*\.jpg$', 'uploader': 'silly2587', 'age_limit': 18, @@ -28,6 +28,10 @@ class SpankBangIE(InfoExtractor): # no uploader 'url': 'http://spankbang.com/lklg/video/sex+with+anyone+wedding+edition+2', 'only_matching': True, + }, { + # mobile page + 'url': 'http://m.spankbang.com/1o2de/video/can+t+remember+her+name', + 'only_matching': True, }] def _real_extract(self, url): From 38db52adf35c2134444e5b6b601e9567797e9195 Mon Sep 17 00:00:00 2001 From: Windom Date: Thu, 16 Nov 2017 20:50:07 +0200 Subject: [PATCH 09/10] [drtuber] Add support for mobile URLs --- youtube_dl/extractor/drtuber.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/drtuber.py b/youtube_dl/extractor/drtuber.py index c5d56a9ad..c88b3126b 100644 --- a/youtube_dl/extractor/drtuber.py +++ b/youtube_dl/extractor/drtuber.py @@ -10,7 +10,7 @@ from ..utils import ( class DrTuberIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?drtuber\.com/(?:video|embed)/(?P\d+)(?:/(?P[\w-]+))?' + _VALID_URL = r'https?://(?:(?:www|m)\.)?drtuber\.com/(?:video|embed)/(?P\d+)(?:/(?P[\w-]+))?' _TESTS = [{ 'url': 'http://www.drtuber.com/video/1740434/hot-perky-blonde-naked-golf', 'md5': '93e680cf2536ad0dfb7e74d94a89facd', @@ -28,6 +28,9 @@ class DrTuberIE(InfoExtractor): }, { 'url': 'http://www.drtuber.com/embed/489939', 'only_matching': True, + }, { + 'url': 'http://m.drtuber.com/video/3893529/lingerie-blowjob-from-beautiful-teen', + 'only_matching': True, }] @staticmethod From f610dbb05f8d17cc95437958835a437c3777b38c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 18 Nov 2017 19:02:56 +0700 Subject: [PATCH 10/10] [extractor/common] Use final URL when dumping request (closes #14769) --- youtube_dl/extractor/common.py | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index a9d68fc0c..8e4ee0deb 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -592,19 +592,11 @@ class InfoExtractor(object): if not encoding: encoding = self._guess_encoding_from_content(content_type, webpage_bytes) if self._downloader.params.get('dump_intermediate_pages', False): - try: - url = url_or_request.get_full_url() - except AttributeError: - url = url_or_request - self.to_screen('Dumping request to ' + url) + self.to_screen('Dumping request to ' + urlh.geturl()) dump = base64.b64encode(webpage_bytes).decode('ascii') self._downloader.to_screen(dump) if self._downloader.params.get('write_pages', False): - try: - url = url_or_request.get_full_url() - except AttributeError: - url = url_or_request - basen = '%s_%s' % (video_id, url) + basen = '%s_%s' % (video_id, urlh.geturl()) if len(basen) > 240: h = '___' + hashlib.md5(basen.encode('utf-8')).hexdigest() basen = basen[:240 - len(h)] + h