From f06d68eb70422137114a73c8a11e0c82eae15706 Mon Sep 17 00:00:00 2001 From: tsukumijima Date: Fri, 24 Jan 2020 15:11:02 +0900 Subject: [PATCH] [niconico] Support encrypted official videos & Fix to get higher quality thumbnail and original title --- youtube_dl/downloader/common.py | 38 +++++++++++++++++- youtube_dl/extractor/niconico.py | 67 +++++++++++++++++++++++++++----- 2 files changed, 93 insertions(+), 12 deletions(-) diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py index 1cdba89cd..b3206cdc2 100644 --- a/youtube_dl/downloader/common.py +++ b/youtube_dl/downloader/common.py @@ -5,8 +5,12 @@ import re import sys import time import random +import threading -from ..compat import compat_os_name +from ..compat import ( + compat_os_name, + compat_urllib_request, +) from ..utils import ( decodeArgument, encodeFilename, @@ -363,7 +367,37 @@ class FileDownloader(object): else '%.2f' % sleep_interval)) time.sleep(sleep_interval) - return self.real_download(filename, info_dict) + timer = [None] + heartbeat_lock = None + download_complete = False + if 'heartbeat_url'in info_dict: + heartbeat_lock = threading.Lock() + + heartbeat_url = info_dict['heartbeat_url'] + heartbeat_data = info_dict['heartbeat_data'] + heartbeat_interval = info_dict.get('heartbeat_interval', 30) + self.to_screen('[download] Heartbeat with %s second interval...' % heartbeat_interval) + + def heartbeat(): + try: + compat_urllib_request.urlopen(url=heartbeat_url, data=heartbeat_data) + except Exception: + self.to_screen("[download] Heartbeat failed") + + with heartbeat_lock: + if not download_complete: + timer[0] = threading.Timer(heartbeat_interval, heartbeat) + timer[0].start() + + heartbeat() + + try: + return self.real_download(filename, info_dict) + finally: + if heartbeat_lock: + with heartbeat_lock: + timer[0].cancel() + download_complete = True def real_download(self, filename, info_dict): """Real download process. Redefine in subclasses.""" diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py index eb07ca776..44c3718d3 100644 --- a/youtube_dl/extractor/niconico.py +++ b/youtube_dl/extractor/niconico.py @@ -195,6 +195,46 @@ class NiconicoIE(InfoExtractor): session_api_data = api_data['video']['dmcInfo']['session_api'] session_api_endpoint = session_api_data['urls'][0] + # ping + self._download_json( + 'https://nvapi.nicovideo.jp/v1/2ab0cbaa/watch', video_id, + query={'t': api_data['video']['dmcInfo']['tracking_id']}, + headers={ + 'Origin': 'https://www.nicovideo.jp', + 'Referer': 'https://www.nicovideo.jp/watch/' + video_id, + 'X-Frontend-Id': '6', + 'X-Frontend-Version': '0' + }) + + # hls (encryption) + if 'encryption' in api_data['video']['dmcInfo']: + session_api_http_parameters = { + 'parameters': { + 'hls_parameters': { + 'encryption': { + 'hls_encryption_v1': { + 'encrypted_key': api_data['video']['dmcInfo']['encryption']['hls_encryption_v1']['encrypted_key'], + 'key_uri': api_data['video']['dmcInfo']['encryption']['hls_encryption_v1']['key_uri'] + } + }, + 'transfer_preset': '', + 'use_ssl': yesno(session_api_endpoint['is_ssl']), + 'use_well_known_port': yesno(session_api_endpoint['is_well_known_port']), + 'segment_duration': 6000 + } + } + } + # http + else: + session_api_http_parameters = { + 'parameters': { + 'http_output_download_parameters': { + 'use_ssl': yesno(session_api_endpoint['is_ssl']), + 'use_well_known_port': yesno(session_api_endpoint['is_well_known_port']), + } + } + } + format_id = '-'.join(map(lambda s: remove_start(s['id'], 'archive_'), [video_quality, audio_quality])) session_response = self._download_json( @@ -233,14 +273,7 @@ class NiconicoIE(InfoExtractor): 'protocol': { 'name': 'http', 'parameters': { - 'http_parameters': { - 'parameters': { - 'http_output_download_parameters': { - 'use_ssl': yesno(session_api_endpoint['is_ssl']), - 'use_well_known_port': yesno(session_api_endpoint['is_well_known_port']), - } - } - } + 'http_parameters': session_api_http_parameters } }, 'recipe_id': session_api_data['recipe_id'], @@ -254,6 +287,12 @@ class NiconicoIE(InfoExtractor): } }).encode()) + # get heartbeat info + heartbeat_url = session_api_endpoint['url'] + '/' + session_response['data']['session']['id'] + '?_format=json&_method=PUT' + heartbeat_data = json.dumps(session_response['data']).encode() + # interval, convert milliseconds to seconds, then halve to make a buffer. + heartbeat_interval = session_api_data['heartbeat_lifetime'] / 2000 + resolution = video_quality.get('resolution', {}) return { @@ -264,6 +303,13 @@ class NiconicoIE(InfoExtractor): 'vbr': float_or_none(video_quality.get('bitrate'), 1000), 'height': resolution.get('height'), 'width': resolution.get('width'), + 'heartbeat_url': heartbeat_url, + 'heartbeat_data': heartbeat_data, + 'heartbeat_interval': heartbeat_interval, + 'http_headers': { + 'Origin': 'https://www.nicovideo.jp', + 'Referer': 'https://www.nicovideo.jp/watch/' + video_id, + } } def _real_extract(self, url): @@ -354,7 +400,7 @@ class NiconicoIE(InfoExtractor): return dict_get(api_data['video'], items) # Start extracting information - title = get_video_info('title') + title = get_video_info('originalTitle') if not title: title = self._og_search_title(webpage, default=None) if not title: @@ -369,7 +415,8 @@ class NiconicoIE(InfoExtractor): video_detail = watch_api_data.get('videoDetail', {}) thumbnail = ( - get_video_info(['thumbnail_url', 'thumbnailURL']) + self._html_search_regex(r'', webpage, 'thumbnail data', default=None) + or get_video_info(['thumbnail_url', 'largeThumbnailURL', 'thumbnailURL']) or self._html_search_meta('image', webpage, 'thumbnail', default=None) or video_detail.get('thumbnail'))