From 1cc47c667419e0eadc0a6989256ab7b276852adf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 24 Apr 2018 23:49:30 +0700 Subject: [PATCH 001/114] [utils] Fix match_str for boolean meta fields --- test/test_utils.py | 12 ++++++++++++ youtube_dl/utils.py | 4 ++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index a1fe6fdb2..253a7fe17 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1072,6 +1072,18 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4') self.assertFalse(match_str( 'like_count > 100 & dislike_count %s)\s*(?P[a-z_]+) From 0ff51adae6feab7386874eddc0d61dbeaf063bf2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 24 Apr 2018 23:53:01 +0700 Subject: [PATCH 002/114] [twitch] Extract is_live according to status (closes #16259) --- youtube_dl/extractor/twitch.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index f736283e9..4c11fd3c3 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -168,6 +168,13 @@ class TwitchItemBaseIE(TwitchBaseIE): return self.playlist_result(entries, info['id'], info['title']) def _extract_info(self, info): + status = info.get('status') + if status == 'recording': + is_live = True + elif status == 'recorded': + is_live = False + else: + is_live = None return { 'id': info['_id'], 'title': info.get('title') or 'Untitled Broadcast', @@ -178,6 +185,7 @@ class TwitchItemBaseIE(TwitchBaseIE): 'uploader_id': info.get('channel', {}).get('name'), 'timestamp': parse_iso8601(info.get('recorded_at')), 'view_count': int_or_none(info.get('views')), + 'is_live': is_live, } def _real_extract(self, url): From 76030543cd5e2214c47aa82f03b3e2cec97e7bc1 Mon Sep 17 00:00:00 2001 From: Alexandre Macabies Date: Tue, 24 Apr 2018 19:49:30 +0200 Subject: [PATCH 003/114] [openload] Recognize IPv6 stream URLs (closes #16137) --- youtube_dl/extractor/openload.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 650f95656..d0bdd60b8 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -340,7 +340,10 @@ class OpenloadIE(InfoExtractor): get_element_by_id('streamurj', webpage) or self._search_regex( (r'>\s*([\w-]+~\d{10,}~\d+\.\d+\.0\.0~[\w-]+)\s*<', - r'>\s*([\w~-]+~\d+\.\d+\.\d+\.\d+~[\w~-]+)'), webpage, + r'>\s*([\w~-]+~\d+\.\d+\.\d+\.\d+~[\w~-]+)', + r'>\s*([\w-]+~\d{10,}~(?:[a-f\d]+:){2}:~[\w-]+)\s*<', + r'>\s*([\w~-]+~[a-f0-9:]+~[\w~-]+)\s*<', + r'>\s*([\w~-]+~[a-f0-9:]+~[\w~-]+)'), webpage, 'stream URL')) video_url = 'https://openload.co/stream/%s?mime=true' % decoded_id From 5d0fe6d23e4407bee3caec33955d4cb410bebb5d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 25 Apr 2018 00:56:16 +0700 Subject: [PATCH 004/114] Credit @Zopieux for #16250 --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 6223212aa..880e0abee 100644 --- a/AUTHORS +++ b/AUTHORS @@ -236,3 +236,4 @@ Lei Wang Petr Novák Leonardo Taccari Martin Weinelt +Alexandre Macabies From 95284bc281d8aa3b1d6863ccb536da9d4cf6433c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 25 Apr 2018 01:01:06 +0700 Subject: [PATCH 005/114] Credit @TingPing for picarto (#15551) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 880e0abee..812051796 100644 --- a/AUTHORS +++ b/AUTHORS @@ -236,4 +236,5 @@ Lei Wang Petr Novák Leonardo Taccari Martin Weinelt +TingPing Alexandre Macabies From ecb24f7c081b764dd669cb4b277d8c14e55b2a39 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 25 Apr 2018 01:02:28 +0700 Subject: [PATCH 006/114] Credit @f2face for #16115 --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 812051796..eaf96d79d 100644 --- a/AUTHORS +++ b/AUTHORS @@ -236,5 +236,6 @@ Lei Wang Petr Novák Leonardo Taccari Martin Weinelt +Surya Oktafendri TingPing Alexandre Macabies From e028d4f506562a1febf76277795305e296823ad6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 25 Apr 2018 01:03:42 +0700 Subject: [PATCH 007/114] [ChangeLog] Actualize [ci skip] --- ChangeLog | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/ChangeLog b/ChangeLog index 185fa1753..a731fde29 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,32 @@ +version + +Core +* [utils] Fix match_str for boolean meta fields ++ [Makefile] Add support for pandoc 2 and disable smart extension (#16251) +* [YoutubeDL] Fix typo in media extension compatibility checker (#16215) + +Extractors ++ [openload] Recognize IPv6 stream URLs (#16136, #16137, #16205, #16246, + #16250) ++ [twitch] Extract is_live according to status (#16259) +* [pornflip] Relax URL regular expression (#16258) +- [etonline] Remove extractor (#16256) +* [breakcom] Fix extraction (#16254) ++ [youtube] Add ability to authenticate with cookies +* [youtube:feed] Implement lazy playlist extraction (#10184) ++ [svt] Add support for TV channel live streams (#15279, #15809) +* [ccma] Fix video extraction (#15931) +* [rentv] Fix extraction (#15227) ++ [nick] Add support for nickjr.nl (#16230) +* [extremetube] Fix metadata extraction ++ [keezmovies] Add support for generic embeds (#16134, #16154) +* [nexx] Extract new azure URLs (#16223) +* [cbssports] Fix extraction (#16217) +* [kaltura] Improve embeds detection (#16201) +* [instagram:user] Fix extraction (#16119) +* [cbs] Skip DRM asset types (#16104) + + version 2018.04.16 Extractors From b5802d69f511481a87d8604fa1577bca8370cab5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 25 Apr 2018 01:12:40 +0700 Subject: [PATCH 008/114] release 2018.04.25 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 1 - youtube_dl/version.py | 2 +- 4 files changed, 5 insertions(+), 6 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 69f996179..252fa0adf 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.04.16*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.04.16** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.04.25*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.04.25** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2018.04.16 +[debug] youtube-dl version 2018.04.25 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index a731fde29..4a3df67df 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2018.04.25 Core * [utils] Fix match_str for boolean meta fields diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 715d16cfe..a110f687b 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -257,7 +257,6 @@ - **ESPN** - **ESPNArticle** - **EsriVideo** - - **ETOnline** - **Europa** - **EveryonesMixtape** - **ExpoTV** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 5aefdd0a2..4e3cb39c6 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2018.04.16' +__version__ = '2018.04.25' From d3711b00502d9104a3697aba5d210a25066ca756 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 25 Apr 2018 02:14:27 +0700 Subject: [PATCH 009/114] [devscripts/gh-pages/generate-download.py] Use program checksum from versions.json --- devscripts/gh-pages/generate-download.py | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/devscripts/gh-pages/generate-download.py b/devscripts/gh-pages/generate-download.py index fcd7e1dff..a873d32ee 100755 --- a/devscripts/gh-pages/generate-download.py +++ b/devscripts/gh-pages/generate-download.py @@ -1,27 +1,22 @@ #!/usr/bin/env python3 from __future__ import unicode_literals -import hashlib -import urllib.request import json versions_info = json.load(open('update/versions.json')) version = versions_info['latest'] -URL = versions_info['versions'][version]['bin'][0] - -data = urllib.request.urlopen(URL).read() +version_dict = versions_info['versions'][version] # Read template page with open('download.html.in', 'r', encoding='utf-8') as tmplf: template = tmplf.read() -sha256sum = hashlib.sha256(data).hexdigest() template = template.replace('@PROGRAM_VERSION@', version) -template = template.replace('@PROGRAM_URL@', URL) -template = template.replace('@PROGRAM_SHA256SUM@', sha256sum) -template = template.replace('@EXE_URL@', versions_info['versions'][version]['exe'][0]) -template = template.replace('@EXE_SHA256SUM@', versions_info['versions'][version]['exe'][1]) -template = template.replace('@TAR_URL@', versions_info['versions'][version]['tar'][0]) -template = template.replace('@TAR_SHA256SUM@', versions_info['versions'][version]['tar'][1]) +template = template.replace('@PROGRAM_URL@', version_dict['bin'][0]) +template = template.replace('@PROGRAM_SHA256SUM@', version_dict['bin'][1]) +template = template.replace('@EXE_URL@', version_dict['exe'][0]) +template = template.replace('@EXE_SHA256SUM@', version_dict['exe'][1]) +template = template.replace('@TAR_URL@', version_dict['tar'][0]) +template = template.replace('@TAR_SHA256SUM@', version_dict['tar'][1]) with open('download.html', 'w', encoding='utf-8') as dlf: dlf.write(template) From c84eae4f66be8a22c14b852bdb01773bb3807239 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 27 Apr 2018 03:45:52 +0700 Subject: [PATCH 010/114] [funk:channel] Improve extraction (closes #16285) --- youtube_dl/extractor/funk.py | 51 ++++++++++++++++++++++++++++-------- 1 file changed, 40 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/funk.py b/youtube_dl/extractor/funk.py index faea6576f..0ff058619 100644 --- a/youtube_dl/extractor/funk.py +++ b/youtube_dl/extractor/funk.py @@ -5,7 +5,10 @@ import re from .common import InfoExtractor from .nexx import NexxIE -from ..utils import int_or_none +from ..utils import ( + int_or_none, + try_get, +) class FunkBaseIE(InfoExtractor): @@ -77,6 +80,20 @@ class FunkChannelIE(FunkBaseIE): 'params': { 'skip_download': True, }, + }, { + # only available via byIdList API + 'url': 'https://www.funk.net/channel/informr/martin-sonneborn-erklaert-die-eu', + 'info_dict': { + 'id': '205067', + 'ext': 'mp4', + 'title': 'Martin Sonneborn erklärt die EU', + 'description': 'md5:050f74626e4ed87edf4626d2024210c0', + 'timestamp': 1494424042, + 'upload_date': '20170510', + }, + 'params': { + 'skip_download': True, + }, }, { 'url': 'https://www.funk.net/channel/59d5149841dca100012511e3/mein-erster-job-lovemilla-folge-1/lovemilla/', 'only_matching': True, @@ -87,16 +104,28 @@ class FunkChannelIE(FunkBaseIE): channel_id = mobj.group('id') alias = mobj.group('alias') - results = self._download_json( - 'https://www.funk.net/api/v3.0/content/videos/filter', channel_id, - headers={ - 'authorization': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoiY3VyYXRpb24tdG9vbCIsInNjb3BlIjoic3RhdGljLWNvbnRlbnQtYXBpLGN1cmF0aW9uLWFwaSxzZWFyY2gtYXBpIn0.q4Y2xZG8PFHai24-4Pjx2gym9RmJejtmK6lMXP5wAgc', - 'Referer': url, - }, query={ - 'channelId': channel_id, - 'size': 100, - })['result'] + headers = { + 'authorization': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoiY3VyYXRpb24tdG9vbCIsInNjb3BlIjoic3RhdGljLWNvbnRlbnQtYXBpLGN1cmF0aW9uLWFwaSxzZWFyY2gtYXBpIn0.q4Y2xZG8PFHai24-4Pjx2gym9RmJejtmK6lMXP5wAgc', + 'Referer': url, + } - video = next(r for r in results if r.get('alias') == alias) + video = None + + by_id_list = self._download_json( + 'https://www.funk.net/api/v3.0/content/videos/byIdList', channel_id, + headers=headers, query={ + 'ids': alias, + }, fatal=False) + if by_id_list: + video = try_get(by_id_list, lambda x: x['result'][0], dict) + + if not video: + results = self._download_json( + 'https://www.funk.net/api/v3.0/content/videos/filter', channel_id, + headers=headers, query={ + 'channelId': channel_id, + 'size': 100, + })['result'] + video = next(r for r in results if r.get('alias') == alias) return self._make_url_result(video) From 0fe7783eced5c62dbd95780c2150fd1080bd3927 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 28 Apr 2018 01:59:15 +0700 Subject: [PATCH 011/114] [extractor/common] Add _download_json_handle --- youtube_dl/extractor/common.py | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 59b9d3739..e0c3c8eb0 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -682,18 +682,30 @@ class InfoExtractor(object): else: self.report_warning(errmsg + str(ve)) - def _download_json(self, url_or_request, video_id, - note='Downloading JSON metadata', - errnote='Unable to download JSON metadata', - transform_source=None, - fatal=True, encoding=None, data=None, headers={}, query={}): - json_string = self._download_webpage( + def _download_json_handle( + self, url_or_request, video_id, note='Downloading JSON metadata', + errnote='Unable to download JSON metadata', transform_source=None, + fatal=True, encoding=None, data=None, headers={}, query={}): + """Return a tuple (JSON object, URL handle)""" + res = self._download_webpage_handle( url_or_request, video_id, note, errnote, fatal=fatal, encoding=encoding, data=data, headers=headers, query=query) - if (not fatal) and json_string is False: - return None + if res is False: + return res + json_string, urlh = res return self._parse_json( - json_string, video_id, transform_source=transform_source, fatal=fatal) + json_string, video_id, transform_source=transform_source, + fatal=fatal), urlh + + def _download_json( + self, url_or_request, video_id, note='Downloading JSON metadata', + errnote='Unable to download JSON metadata', transform_source=None, + fatal=True, encoding=None, data=None, headers={}, query={}): + res = self._download_json_handle( + url_or_request, video_id, note=note, errnote=errnote, + transform_source=transform_source, fatal=fatal, encoding=encoding, + data=data, headers=headers, query=query) + return res if res is False else res[0] def _parse_json(self, json_string, video_id, transform_source=None, fatal=True): if transform_source: From 6cc622327ff8289f94894f3695ed31014c61cf8e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 28 Apr 2018 02:47:17 +0700 Subject: [PATCH 012/114] [utils] Introduce merge_dicts --- test/test_utils.py | 12 ++++++++++++ youtube_dl/extractor/generic.py | 16 +--------------- youtube_dl/utils.py | 14 ++++++++++++++ 3 files changed, 27 insertions(+), 15 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 253a7fe17..14503ab53 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -42,6 +42,7 @@ from youtube_dl.utils import ( is_html, js_to_json, limit_length, + merge_dicts, mimetype2ext, month_by_name, multipart_encode, @@ -669,6 +670,17 @@ class TestUtil(unittest.TestCase): self.assertEqual(dict_get(d, ('b', 'c', key, )), None) self.assertEqual(dict_get(d, ('b', 'c', key, ), skip_false_values=False), false_value) + def test_merge_dicts(self): + self.assertEqual(merge_dicts({'a': 1}, {'b': 2}), {'a': 1, 'b': 2}) + self.assertEqual(merge_dicts({'a': 1}, {'a': 2}), {'a': 1}) + self.assertEqual(merge_dicts({'a': 1}, {'a': None}), {'a': 1}) + self.assertEqual(merge_dicts({'a': 1}, {'a': ''}), {'a': 1}) + self.assertEqual(merge_dicts({'a': 1}, {}), {'a': 1}) + self.assertEqual(merge_dicts({'a': None}, {'a': 1}), {'a': 1}) + self.assertEqual(merge_dicts({'a': ''}, {'a': 1}), {'a': ''}) + self.assertEqual(merge_dicts({'a': ''}, {'a': 'abc'}), {'a': 'abc'}) + self.assertEqual(merge_dicts({'a': None}, {'a': ''}, {'a': 'abc'}), {'a': 'abc'}) + def test_encode_compat_str(self): self.assertEqual(encode_compat_str(b'\xd1\x82\xd0\xb5\xd1\x81\xd1\x82', 'utf-8'), 'тест') self.assertEqual(encode_compat_str('тест', 'utf-8'), 'тест') diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index af1322e00..d48914495 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -23,6 +23,7 @@ from ..utils import ( is_html, js_to_json, KNOWN_EXTENSIONS, + merge_dicts, mimetype2ext, orderedSet, sanitized_Request, @@ -3002,21 +3003,6 @@ class GenericIE(InfoExtractor): return self.playlist_from_matches( sharevideos_urls, video_id, video_title) - def merge_dicts(dict1, dict2): - merged = {} - for k, v in dict1.items(): - if v is not None: - merged[k] = v - for k, v in dict2.items(): - if v is None: - continue - if (k not in merged or - (isinstance(v, compat_str) and v and - isinstance(merged[k], compat_str) and - not merged[k])): - merged[k] = v - return merged - # Look for HTML5 media entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls') if entries: diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 574284e94..b460393bf 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2225,6 +2225,20 @@ def try_get(src, getter, expected_type=None): return v +def merge_dicts(*dicts): + merged = {} + for a_dict in dicts: + for k, v in a_dict.items(): + if v is None: + continue + if (k not in merged or + (isinstance(v, compat_str) and v and + isinstance(merged[k], compat_str) and + not merged[k])): + merged[k] = v + return merged + + def encode_compat_str(string, encoding=preferredencoding(), errors='strict'): return string if isinstance(string, compat_str) else compat_str(string, encoding, errors) From e7e4a6e0f9166cee82c165ca69a6a3c94ddc5f45 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 28 Apr 2018 02:48:03 +0700 Subject: [PATCH 013/114] [extractor/common] Extract interaction statistic --- youtube_dl/extractor/common.py | 35 ++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index e0c3c8eb0..a9939b0fd 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1020,6 +1020,40 @@ class InfoExtractor(object): if isinstance(json_ld, dict): json_ld = [json_ld] + INTERACTION_TYPE_MAP = { + 'CommentAction': 'comment', + 'AgreeAction': 'like', + 'DisagreeAction': 'dislike', + 'LikeAction': 'like', + 'DislikeAction': 'dislike', + 'ListenAction': 'view', + 'WatchAction': 'view', + 'ViewAction': 'view', + } + + def extract_interaction_statistic(e): + interaction_statistic = e.get('interactionStatistic') + if not isinstance(interaction_statistic, list): + return + for is_e in interaction_statistic: + if not isinstance(is_e, dict): + continue + if is_e.get('@type') != 'InteractionCounter': + continue + interaction_type = is_e.get('interactionType') + if not isinstance(interaction_type, compat_str): + continue + interaction_count = int_or_none(is_e.get('userInteractionCount')) + if interaction_count is None: + continue + count_kind = INTERACTION_TYPE_MAP.get(interaction_type.split('/')[-1]) + if not count_kind: + continue + count_key = '%s_count' % count_kind + if info.get(count_key) is not None: + continue + info[count_key] = interaction_count + def extract_video_object(e): assert e['@type'] == 'VideoObject' info.update({ @@ -1035,6 +1069,7 @@ class InfoExtractor(object): 'height': int_or_none(e.get('height')), 'view_count': int_or_none(e.get('interactionCount')), }) + extract_interaction_statistic(e) for e in json_ld: if isinstance(e.get('@context'), compat_str) and re.match(r'^https?://schema.org/?$', e.get('@context')): From ae1c585cee3eb183cddf7c30a09b75d887307dee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 28 Apr 2018 02:48:20 +0700 Subject: [PATCH 014/114] [vimeo] Extract JSON LD (closes #16295) --- youtube_dl/extractor/vimeo.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 08257147e..a026526b2 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -16,6 +16,7 @@ from ..utils import ( ExtractorError, InAdvancePagedList, int_or_none, + merge_dicts, NO_DEFAULT, RegexNotFoundError, sanitized_Request, @@ -639,16 +640,18 @@ class VimeoIE(VimeoBaseInfoExtractor): 'preference': 1, }) - info_dict = self._parse_config(config, video_id) - formats.extend(info_dict['formats']) + info_dict_config = self._parse_config(config, video_id) + formats.extend(info_dict_config['formats']) self._vimeo_sort_formats(formats) + json_ld = self._search_json_ld(webpage, video_id, default={}) + if not cc_license: cc_license = self._search_regex( r']+rel=["\']license["\'][^>]+href=(["\'])(?P(?:(?!\1).)+)\1', webpage, 'license', default=None, group='license') - info_dict.update({ + info_dict = { 'id': video_id, 'formats': formats, 'timestamp': unified_timestamp(timestamp), @@ -658,7 +661,9 @@ class VimeoIE(VimeoBaseInfoExtractor): 'like_count': like_count, 'comment_count': comment_count, 'license': cc_license, - }) + } + + info_dict = merge_dicts(info_dict, info_dict_config, json_ld) return info_dict From 7dd6ab4a47b08beafe45befa29c44df2db00547e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 28 Apr 2018 04:51:39 +0700 Subject: [PATCH 015/114] [imdb] Extract all formats (closes #16249) --- youtube_dl/extractor/imdb.py | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/imdb.py b/youtube_dl/extractor/imdb.py index 3ff672a89..425421968 100644 --- a/youtube_dl/extractor/imdb.py +++ b/youtube_dl/extractor/imdb.py @@ -3,7 +3,9 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( + determine_ext, mimetype2ext, qualities, remove_end, @@ -73,19 +75,25 @@ class ImdbIE(InfoExtractor): video_info_list = format_info.get('videoInfoList') if not video_info_list or not isinstance(video_info_list, list): continue - video_info = video_info_list[0] - if not video_info or not isinstance(video_info, dict): - continue - video_url = video_info.get('videoUrl') - if not video_url: - continue - format_id = format_info.get('ffname') - formats.append({ - 'format_id': format_id, - 'url': video_url, - 'ext': mimetype2ext(video_info.get('videoMimeType')), - 'quality': quality(format_id), - }) + for video_info in video_info_list: + if not video_info or not isinstance(video_info, dict): + continue + video_url = video_info.get('videoUrl') + if not video_url or not isinstance(video_url, compat_str): + continue + if (video_info.get('videoMimeType') == 'application/x-mpegURL' or + determine_ext(video_url) == 'm3u8'): + formats.extend(self._extract_m3u8_formats( + video_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + continue + format_id = format_info.get('ffname') + formats.append({ + 'format_id': format_id, + 'url': video_url, + 'ext': mimetype2ext(video_info.get('videoMimeType')), + 'quality': quality(format_id), + }) self._sort_formats(formats) return { From 500a86a52ee46a3a1acc864b602b74d141afdc24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 29 Apr 2018 00:33:31 +0700 Subject: [PATCH 016/114] [downloader/fragment] Restart download if .ytdl file is corrupt (closes #16312) --- youtube_dl/downloader/fragment.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/youtube_dl/downloader/fragment.py b/youtube_dl/downloader/fragment.py index 927c7e491..917f6dc01 100644 --- a/youtube_dl/downloader/fragment.py +++ b/youtube_dl/downloader/fragment.py @@ -74,9 +74,14 @@ class FragmentFD(FileDownloader): return not ctx['live'] and not ctx['tmpfilename'] == '-' def _read_ytdl_file(self, ctx): + assert 'ytdl_corrupt' not in ctx stream, _ = sanitize_open(self.ytdl_filename(ctx['filename']), 'r') - ctx['fragment_index'] = json.loads(stream.read())['downloader']['current_fragment']['index'] - stream.close() + try: + ctx['fragment_index'] = json.loads(stream.read())['downloader']['current_fragment']['index'] + except Exception: + ctx['ytdl_corrupt'] = True + finally: + stream.close() def _write_ytdl_file(self, ctx): frag_index_stream, _ = sanitize_open(self.ytdl_filename(ctx['filename']), 'w') @@ -158,11 +163,17 @@ class FragmentFD(FileDownloader): if self.__do_ytdl_file(ctx): if os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename']))): self._read_ytdl_file(ctx) - if ctx['fragment_index'] > 0 and resume_len == 0: + is_corrupt = ctx.get('ytdl_corrupt') is True + is_inconsistent = ctx['fragment_index'] > 0 and resume_len == 0 + if is_corrupt or is_inconsistent: + message = ( + '.ytdl file is corrupt' if is_corrupt else + 'Inconsistent state of incomplete fragment download') self.report_warning( - 'Inconsistent state of incomplete fragment download. ' - 'Restarting from the beginning...') + '%s. Restarting from the beginning...' % message) ctx['fragment_index'] = resume_len = 0 + if 'ytdl_corrupt' in ctx: + del ctx['ytdl_corrupt'] self._write_ytdl_file(ctx) else: self._write_ytdl_file(ctx) From 106c8c3edbc5b7e95cfba79ddc6252fad0adb859 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 29 Apr 2018 19:04:40 +0700 Subject: [PATCH 017/114] [nrktv] Update API host (closes #16324) --- youtube_dl/extractor/nrk.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index 18ead9426..3b4f51f61 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -237,7 +237,7 @@ class NRKTVIE(NRKBaseIE): (?:/\d{2}-\d{2}-\d{4})? (?:\#del=(?P\d+))? ''' % _EPISODE_RE - _API_HOST = 'psapi-ne.nrk.no' + _API_HOST = 'psapi-we.nrk.no' _TESTS = [{ 'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014', From 12b0d4e0e1df6d6a8b9ce10b9a69013497adc2b0 Mon Sep 17 00:00:00 2001 From: Meneth32 Date: Sun, 29 Apr 2018 16:59:40 +0200 Subject: [PATCH 018/114] [redditr] Add support for old.reddit.com URLs --- youtube_dl/extractor/reddit.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/reddit.py b/youtube_dl/extractor/reddit.py index 53b1c967e..8372925be 100644 --- a/youtube_dl/extractor/reddit.py +++ b/youtube_dl/extractor/reddit.py @@ -47,7 +47,7 @@ class RedditIE(InfoExtractor): class RedditRIE(InfoExtractor): - _VALID_URL = r'(?Phttps?://(?:www\.)?reddit\.com/r/[^/]+/comments/(?P[^/?#&]+))' + _VALID_URL = r'(?Phttps?://(?:(?:www|old)\.)?reddit\.com/r/[^/]+/comments/(?P[^/?#&]+))' _TESTS = [{ 'url': 'https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/', 'info_dict': { @@ -74,6 +74,10 @@ class RedditRIE(InfoExtractor): # imgur 'url': 'https://www.reddit.com/r/MadeMeSmile/comments/6t7wi5/wait_for_it/', 'only_matching': True, + }, { + # imgur @ old reddit + 'url': 'https://old.reddit.com/r/MadeMeSmile/comments/6t7wi5/wait_for_it/', + 'only_matching': True, }, { # streamable 'url': 'https://www.reddit.com/r/videos/comments/6t7sg9/comedians_hilarious_joke_about_the_guam_flag/', From 01aec8488084e62aa188b5167e57d01ef66cd256 Mon Sep 17 00:00:00 2001 From: Bastian de Groot Date: Sun, 29 Apr 2018 17:14:37 +0200 Subject: [PATCH 019/114] [generic] Prefer enclosures over links in RSS feeds --- youtube_dl/extractor/generic.py | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index d48914495..252f97c26 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -191,6 +191,16 @@ class GenericIE(InfoExtractor): 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624', } }, + # RSS feed with enclosures and unsupported link URLs + { + 'url': 'http://www.hellointernet.fm/podcast?format=rss', + 'info_dict': { + 'id': 'http://www.hellointernet.fm/podcast?format=rss', + 'description': 'CGP Grey and Brady Haran talk about YouTube, life, work, whatever.', + 'title': 'Hello Internet', + }, + 'playlist_mincount': 100, + }, # SMIL from http://videolectures.net/promogram_igor_mekjavic_eng { 'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/video/1/smil.xml', @@ -2026,13 +2036,15 @@ class GenericIE(InfoExtractor): entries = [] for it in doc.findall('./channel/item'): - next_url = xpath_text(it, 'link', fatal=False) + next_url = None + enclosure_nodes = it.findall('./enclosure') + for e in enclosure_nodes: + next_url = e.attrib.get('url') + if next_url: + break + if not next_url: - enclosure_nodes = it.findall('./enclosure') - for e in enclosure_nodes: - next_url = e.attrib.get('url') - if next_url: - break + next_url = xpath_text(it, 'link', fatal=False) if not next_url: continue From 30226342ab346263b684170c4ce7d5266fec212e Mon Sep 17 00:00:00 2001 From: Niklas Haas Date: Sun, 29 Apr 2018 11:23:23 +0200 Subject: [PATCH 020/114] [youtube] Correctly disable polymer on all requests Rather than just the one that use the _download_webpage helper. The need for this was made apparent by 0fe7783e, which refactored _download_json in a way that completely avoids the use of _download_webpage, thus breaking youtube. Fixes #16323 --- youtube_dl/extractor/youtube.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index e7bd1f18f..04aeb91af 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -246,9 +246,9 @@ class YoutubeBaseInfoExtractor(InfoExtractor): return True - def _download_webpage(self, *args, **kwargs): + def _download_webpage_handle(self, *args, **kwargs): kwargs.setdefault('query', {})['disable_polymer'] = 'true' - return super(YoutubeBaseInfoExtractor, self)._download_webpage( + return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle( *args, **compat_kwargs(kwargs)) def _real_initialize(self): From e5eadfa82f10bda43294d1da85024eec29c7973f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 29 Apr 2018 22:49:47 +0700 Subject: [PATCH 021/114] [udemy,xiami,yandexmusic] Override _download_webpage_handle instead of _download_webpage --- youtube_dl/extractor/udemy.py | 4 ++-- youtube_dl/extractor/xiami.py | 4 ++-- youtube_dl/extractor/yandexmusic.py | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py index 6d6c0a98f..439ed2a89 100644 --- a/youtube_dl/extractor/udemy.py +++ b/youtube_dl/extractor/udemy.py @@ -115,9 +115,9 @@ class UdemyIE(InfoExtractor): error_str += ' - %s' % error_data.get('formErrors') raise ExtractorError(error_str, expected=True) - def _download_webpage(self, *args, **kwargs): + def _download_webpage_handle(self, *args, **kwargs): kwargs.setdefault('headers', {})['User-Agent'] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/603.2.4 (KHTML, like Gecko) Version/10.1.1 Safari/603.2.4' - return super(UdemyIE, self)._download_webpage( + return super(UdemyIE, self)._download_webpage_handle( *args, **compat_kwargs(kwargs)) def _download_json(self, url_or_request, *args, **kwargs): diff --git a/youtube_dl/extractor/xiami.py b/youtube_dl/extractor/xiami.py index 7f871c8ec..8333fb534 100644 --- a/youtube_dl/extractor/xiami.py +++ b/youtube_dl/extractor/xiami.py @@ -9,8 +9,8 @@ from ..utils import int_or_none class XiamiBaseIE(InfoExtractor): _API_BASE_URL = 'http://www.xiami.com/song/playlist/cat/json/id' - def _download_webpage(self, *args, **kwargs): - webpage = super(XiamiBaseIE, self)._download_webpage(*args, **kwargs) + def _download_webpage_handle(self, *args, **kwargs): + webpage = super(XiamiBaseIE, self)._download_webpage_handle(*args, **kwargs) if '>Xiami is currently not available in your country.<' in webpage: self.raise_geo_restricted('Xiami is currently not available in your country') return webpage diff --git a/youtube_dl/extractor/yandexmusic.py b/youtube_dl/extractor/yandexmusic.py index eb1062142..e85eca073 100644 --- a/youtube_dl/extractor/yandexmusic.py +++ b/youtube_dl/extractor/yandexmusic.py @@ -34,8 +34,8 @@ class YandexMusicBaseIE(InfoExtractor): 'youtube-dl with --cookies', expected=True) - def _download_webpage(self, *args, **kwargs): - webpage = super(YandexMusicBaseIE, self)._download_webpage(*args, **kwargs) + def _download_webpage_handle(self, *args, **kwargs): + webpage = super(YandexMusicBaseIE, self)._download_webpage_handle(*args, **kwargs) if 'Нам очень жаль, но запросы, поступившие с вашего IP-адреса, похожи на автоматические.' in webpage: self._raise_captcha() return webpage From 796bf9de45d6f01bf2d34ae22e1eacdc1a649fab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 29 Apr 2018 22:56:07 +0700 Subject: [PATCH 022/114] [yandexmusic] Convert release_year to int --- youtube_dl/extractor/yandexmusic.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/yandexmusic.py b/youtube_dl/extractor/yandexmusic.py index e85eca073..009203851 100644 --- a/youtube_dl/extractor/yandexmusic.py +++ b/youtube_dl/extractor/yandexmusic.py @@ -57,14 +57,14 @@ class YandexMusicTrackIE(YandexMusicBaseIE): 'info_dict': { 'id': '4878838', 'ext': 'mp3', - 'title': 'Carlo Ambrosio & Fabio Di Bari, Carlo Ambrosio - Gypsy Eyes 1', + 'title': 'Carlo Ambrosio, Carlo Ambrosio & Fabio Di Bari - Gypsy Eyes 1', 'filesize': 4628061, 'duration': 193.04, 'track': 'Gypsy Eyes 1', 'album': 'Gypsy Soul', 'album_artist': 'Carlo Ambrosio', - 'artist': 'Carlo Ambrosio & Fabio Di Bari, Carlo Ambrosio', - 'release_year': '2009', + 'artist': 'Carlo Ambrosio, Carlo Ambrosio & Fabio Di Bari', + 'release_year': 2009, }, 'skip': 'Travis CI servers blocked by YandexMusic', } @@ -120,7 +120,7 @@ class YandexMusicTrackIE(YandexMusicBaseIE): track_info.update({ 'album': album.get('title'), 'album_artist': extract_artist(album.get('artists')), - 'release_year': compat_str(year) if year else None, + 'release_year': int_or_none(year), }) track_artist = extract_artist(track.get('artists')) From 4a733545867a014eb786348f8fb9e6ae95850742 Mon Sep 17 00:00:00 2001 From: Alex Seiler Date: Sun, 5 Nov 2017 18:07:35 +0100 Subject: [PATCH 023/114] [zattoo] Add extractor (closes #14668) --- youtube_dl/extractor/extractors.py | 6 + youtube_dl/extractor/zattoo.py | 234 +++++++++++++++++++++++++++++ 2 files changed, 240 insertions(+) create mode 100644 youtube_dl/extractor/zattoo.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 6fb65e4fe..9fe3f649d 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1418,5 +1418,11 @@ from .youtube import ( ) from .zapiks import ZapiksIE from .zaq1 import Zaq1IE +from .zattoo import ( + QuicklineIE, + QuicklineLiveIE, + ZattooIE, + ZattooLiveIE, +) from .zdf import ZDFIE, ZDFChannelIE from .zingmp3 import ZingMp3IE diff --git a/youtube_dl/extractor/zattoo.py b/youtube_dl/extractor/zattoo.py new file mode 100644 index 000000000..928f22566 --- /dev/null +++ b/youtube_dl/extractor/zattoo.py @@ -0,0 +1,234 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from uuid import uuid4 +import re + +from .common import InfoExtractor +from ..utils import ( + compat_str, + ExtractorError, + sanitized_Request, + urlencode_postdata, +) + + +class ZattooBaseIE(InfoExtractor): + + _NETRC_MACHINE = 'zattoo' + _HOST_URL = 'https://zattoo.com' + + _power_guide_hash = None + + def _login(self, uuid, session_id): + (username, password) = self._get_login_info() + if not username or not password: + raise ExtractorError( + 'A valid %s account is needed to access this media.' % self._NETRC_MACHINE, + expected=True) + login_form = { + 'login': username, + 'password': password, + 'remember': True, + } + request = sanitized_Request( + '%s/zapi/v2/account/login' % self._HOST_URL, + urlencode_postdata(login_form)) + request.add_header( + 'Referer', '%s/login' % self._HOST_URL) + request.add_header( + 'Content-Type', 'application/x-www-form-urlencoded; charset=UTF-8') + request.add_header( + 'Cookie', 'uuid=%s; beaker.session.id=%s' % (uuid, session_id)) + response = self._request_webpage( + request, None, 'Logging in') + data = self._parse_json(response.read(), None) + return data['session']['power_guide_hash'] + + def _get_app_token_and_version(self): + host_webpage = self._download_webpage( + self._HOST_URL, None, 'Downloading %s' % self._HOST_URL) + app_token = self._html_search_regex( + r'[^/]+)/(?P[0-9]+)' + + def _real_extract(self, url): + channel_name, video_id = re.match(self._VALID_URL, url).groups() + return self._extract_video(channel_name, video_id) + + +class QuicklineLiveIE(QuicklineBaseIE): + _VALID_URL = r'https?://(?:www\.)?mobiltv\.quickline\.com/watch/(?P[^/]+)$' + + def _real_extract(self, url): + channel_name = video_id = self._match_id(url) + return self._extract_video(channel_name, video_id, is_live=True) + + +class ZattooIE(ZattooBaseIE): + _VALID_URL = r'https?://(?:www\.)?zattoo\.com/watch/(?P[^/]+?)/(?P[0-9]+)[^/]+(?:/(?P[0-9]+))?' + + # Since regular videos are only available for 7 days and recorded videos + # are only available for a specific user, we cannot have detailed tests. + _TESTS = [{ + 'url': 'https://zattoo.com/watch/prosieben/130671867-maze-runner-die-auserwaehlten-in-der-brandwueste', + 'only_matching': True, + }, { + 'url': 'https://zattoo.com/watch/srf_zwei/132905652-eishockey-spengler-cup/102791477/1512211800000/1514433500000/92000', + 'only_matching': True, + }] + + def _real_extract(self, url): + channel_name, video_id, record_id = re.match(self._VALID_URL, url).groups() + return self._extract_video(channel_name, video_id, record_id) + + +class ZattooLiveIE(ZattooBaseIE): + _VALID_URL = r'https?://(?:www\.)?zattoo\.com/watch/(?P[^/]+)$' + + _TEST = { + 'url': 'https://zattoo.com/watch/srf1', + 'only_matching': True, + } + + def _real_extract(self, url): + channel_name = video_id = self._match_id(url) + return self._extract_video(channel_name, video_id, is_live=True) From 67ca1a8ef7ea6094e1e34518b93cdb5ba59f31b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 1 May 2018 01:48:21 +0700 Subject: [PATCH 024/114] [zattoo] Improve and simplify (closes #14676) --- youtube_dl/extractor/zattoo.py | 238 +++++++++++++++++++-------------- 1 file changed, 137 insertions(+), 101 deletions(-) diff --git a/youtube_dl/extractor/zattoo.py b/youtube_dl/extractor/zattoo.py index 928f22566..773073d85 100644 --- a/youtube_dl/extractor/zattoo.py +++ b/youtube_dl/extractor/zattoo.py @@ -1,84 +1,82 @@ # coding: utf-8 from __future__ import unicode_literals -from uuid import uuid4 import re +from uuid import uuid4 from .common import InfoExtractor -from ..utils import ( +from ..compat import ( + compat_HTTPError, compat_str, +) +from ..utils import ( ExtractorError, - sanitized_Request, + int_or_none, + try_get, urlencode_postdata, ) class ZattooBaseIE(InfoExtractor): - _NETRC_MACHINE = 'zattoo' _HOST_URL = 'https://zattoo.com' _power_guide_hash = None - def _login(self, uuid, session_id): + def _login(self): (username, password) = self._get_login_info() if not username or not password: - raise ExtractorError( - 'A valid %s account is needed to access this media.' % self._NETRC_MACHINE, - expected=True) - login_form = { - 'login': username, - 'password': password, - 'remember': True, - } - request = sanitized_Request( - '%s/zapi/v2/account/login' % self._HOST_URL, - urlencode_postdata(login_form)) - request.add_header( - 'Referer', '%s/login' % self._HOST_URL) - request.add_header( - 'Content-Type', 'application/x-www-form-urlencoded; charset=UTF-8') - request.add_header( - 'Cookie', 'uuid=%s; beaker.session.id=%s' % (uuid, session_id)) - response = self._request_webpage( - request, None, 'Logging in') - data = self._parse_json(response.read(), None) - return data['session']['power_guide_hash'] + self.raise_login_required( + 'A valid %s account is needed to access this media.' + % self._NETRC_MACHINE) - def _get_app_token_and_version(self): - host_webpage = self._download_webpage( - self._HOST_URL, None, 'Downloading %s' % self._HOST_URL) + try: + data = self._download_json( + '%s/zapi/v2/account/login' % self._HOST_URL, None, 'Logging in', + data=urlencode_postdata({ + 'login': username, + 'password': password, + 'remember': 'true', + }), headers={ + 'Referer': '%s/login' % self._HOST_URL, + 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', + }) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400: + raise ExtractorError( + 'Unable to login: incorrect username and/or password', + expected=True) + raise + + self._power_guide_hash = data['session']['power_guide_hash'] + + def _real_initialize(self): + webpage = self._download_webpage( + self._HOST_URL, None, 'Downloading app token') app_token = self._html_search_regex( - r'(?:(?!\1).)+?)\1', + webpage, 'app token', group='token') app_version = self._html_search_regex( - r'