[tver] Added support for videos distributed by Fuji TV (FOD, Fuji TV On Demand) hosted by its own distribution system

2024-11-22 08:34:32 +01:00 · 2020-09-30 02:43:27 +09:00 · 2020-09-30 02:43:27 +09:00 · bcae9d7a02
commit bcae9d7a02
parent bf6f1048b2
1 changed files with 109 additions and 38 deletions
--- a/youtube_dl/extractor/tver.py
+++ b/youtube_dl/extractor/tver.py
@ -12,23 +12,44 @@ from ..utils import (

 class TVerIE(InfoExtractor):

-    _TEST = {
-        'url': 'https://tver.jp/feature/f0057485',  # In addition to 'feature', there are also categories such as 'corner' and 'episode'.
-        'md5': '4ae1bc00e6d55af8f7e2b2c17029f1a3',  # MD5 hash of a short video downloaded by running youtube-dl with the --test option
-        'info_dict': {
-            'id': 'f0057485',  # TVer ID
-            'display_id': 'ref:hanzawa_naoki---s2----323-001',  # Brightcove ID
-            'ext': 'mp4',
-            'title': '半沢直樹(新シリーズ)　第1話 子会社VS銀行!飛ばされた半沢の新たな下剋上が始まる',
-            'description': 'md5:92ce839312ee1e9b162de73fa08b6374',
-            'thumbnail': r're:https?://.*\.jpg$',
-            'duration': 4100.032,
-            'timestamp': 1600308623,
-            'upload_date': '20200917',
-            'uploader_id': '4031511847001',
+    _TESTS = [
+        {
+            # Delivery from Brightcove
+            'url': 'https://tver.jp/feature/f0057485',  # In addition to 'feature', there are also categories such as 'corner' and 'episode'.
+            'md5': '4ae1bc00e6d55af8f7e2b2c17029f1a3',  # MD5 hash of a short video downloaded by running youtube-dl with the --test option
+            'info_dict': {
+                'id': 'f0057485',  # TVer ID
+                'display_id': 'ref:hanzawa_naoki---s2----323-001',  # Brightcove ID
+                'ext': 'mp4',
+                'title': '半沢直樹(新シリーズ)　第1話 子会社VS銀行!飛ばされた半沢の新たな下剋上が始まる',
+                'description': 'md5:92ce839312ee1e9b162de73fa08b6374',
+                'thumbnail': r're:https?://.*\.jpg$',
+                'duration': 4100.032,
+                'timestamp': 1600308623,
+                'upload_date': '20200917',
+                'uploader_id': '4031511847001',
+                'creator': 'tbs',  # Means TBS TV
+                'uploader': 'TBS FREE',  # Content provider
+            },
+            'skip': 'Running from test_download.py doesn\'t seem to be able to handle encrypted HLS videos',
        },
-        'skip': 'Running from test_download.py doesn\'t seem to be able to handle encrypted HLS videos',
-    }
+        {
+            # Delivery from FOD (Fuji TV On Demand)
+            'url': 'https://tver.jp/corner/f0057932',  # In addition to 'feature', there are also categories such as 'corner' and 'episode'.
+            'md5': '6d1970594e532f4b1d6403b5bf9d0d67',  # MD5 hash of a short video downloaded by running youtube-dl with the --test option
+            'info_dict': {
+                'id': 'f0057932',  # TVer ID
+                'display_id': '5d40810015',  # FOD ID
+                'ext': 'mp4',
+                'title': 'ちびまる子ちゃん　#1258「秋のお楽しみメニュー～まる子の昔ばなし～ 『まる子の涼しい大作戦』の巻／『まる子のおむすびころりん』の巻」',
+                'description': 'md5:328c6ef38bed76588a1f6eb5d69c4a7c',
+                'thumbnail': r're:https?://.*\.jpg$',
+                'creator': 'cx',  # Means Fuji TV
+                'uploader': 'FOD見逃し無料',  # Content provider
+            },
+            'skip': 'Running from test_download.py doesn\'t seem to be able to handle encrypted HLS videos',
+        },
+    ]

    IE_NAME = 'TVer'
    IE_DESC = 'TVer'
@ -36,9 +57,6 @@ class TVerIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?tver\.jp/(corner|episode|feature)/(?P<id>f?[0-9]+)'
    _GEO_COUNTRIES = ['JP']  # TVer service is limited to Japan only

-    BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s'
-
-    # TODO: FOD対応
    def _real_extract(self, url):

        video_id = self._match_id(url)
@ -49,31 +67,84 @@ class TVerIE(InfoExtractor):
        tver_info_csv = tver_info_csv.replace('\t', '').replace('\n', '').replace('\'', '')  # remove \t and \n and '
        tver_info = tver_info_csv.split(',')

-        # extract brightcove information
-        brightcove_account_id = tver_info[3]
-        brightcove_video_id = 'ref:' + tver_info[4]
-        brightcove_url = self.BRIGHTCOVE_URL_TEMPLATE % (brightcove_account_id, brightcove_video_id)
-        brightcove_info = self._extract_brightcove_info(brightcove_url, 'https://tver.jp/')
+        # extract tver title
+        title = tver_info[5] + '　' + tver_info[6].lstrip()  # title + subtitle

        # extract tver description
        description = \
            self._html_search_meta(['og:description', 'twitter:description'], webpage, 'description', default=None) or \
            self._html_search_regex(r'<div[^>]+class="description"[^>]*>(?P<description>.*?)</div>', webpage, 'description', default=None, flags=re.DOTALL)

-        # Note: Delegate extraction to BrightcoveNewIE by specifying url_transparent,
-        # while also making TverIE's own acquired entities such as description available.
-        info_dict = {
-            '_type': 'url_transparent',
-            'url': brightcove_url,
-            'ie_key': BrightcoveNewIE.ie_key(),
-            'id': video_id,  # Tver ID
-            'display_id': brightcove_video_id,  # Brightcove ID
-            'title': brightcove_info.get('name'),
-            'description': description,
-            'thumbnail': re.sub(r'/[0-9]+x[0-9]+/', r'/1920x1080/', brightcove_info.get('poster')),  # select large thumbnail
-            'creator': tver_info[7],  # Broadcaster name  e.g. 'tbs', 'ntv'
-            'uploader': tver_info[8],  # Delivery platform name  e.g. 'TBS FREE', '日テレ無料'
-        }
+        # Note: Of the videos on TVer, only the videos distributed by Fuji TV (FOD, Fuji TV On Demand)
+        # use our own distribution system instead of Brightcove.
+        if tver_info[7] == 'cx':
+
+            # extract fod information
+            fod_video_id = tver_info[3]
+            fod_url = 'https://i.fod.fujitv.co.jp/abr/pc_html5/%s.m3u8' % fod_video_id
+            fod_thumbnail = 'https://i.fod.fujitv.co.jp/pc/image/wbtn/wbtn_%s.jpg' % fod_video_id
+
+            # extract fod formats
+            fod_formats = self._extract_m3u8_formats(fod_url, fod_video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls')
+
+            # Note: All 'RESOLUTION' values in the playlist are 360p,
+            # but this is a fake value and will be replaced based on what you actually downloaded and measured.
+            for index, fod_fotmat in enumerate(fod_formats):
+                # 720p, 2000kbps
+                if fod_fotmat['format_id'] == 'hls-2000':
+                    fod_formats[index]['width'] = 1280
+                    fod_formats[index]['height'] = 720
+                # 720p, 1200kbps
+                elif fod_fotmat['format_id'] == 'hls-1200':
+                    fod_formats[index]['width'] = 1280
+                    fod_formats[index]['height'] = 720
+                # 360p, 800kbps
+                elif fod_fotmat['format_id'] == 'hls-800':
+                    fod_formats[index]['width'] = 640
+                    fod_formats[index]['height'] = 360
+                # 180p, 300kbps
+                elif fod_fotmat['format_id'] == 'hls-300':
+                    fod_formats[index]['width'] = 320
+                    fod_formats[index]['height'] = 180
+
+            # reverse the format order
+            fod_formats.reverse()
+
+            info_dict = {
+                'id': video_id,  # Tver ID
+                'display_id': fod_video_id,  # FOD ID
+                'formats': fod_formats,
+                'title': title,
+                'description': description,
+                'thumbnail': fod_thumbnail,
+                'creator': tver_info[7],  # Broadcaster name  e.g. 'cx'
+                'uploader': tver_info[8],  # Delivery platform name  e.g. 'FOD見逃し無料'
+                'tags': [tver_info[5]],
+                'is_live': False,
+            }
+
+        else:
+
+            # extract brightcove information
+            brightcove_account_id = tver_info[3]
+            brightcove_video_id = 'ref:' + tver_info[4]
+            brightcove_url = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' % (brightcove_account_id, brightcove_video_id)
+            brightcove_info = self._extract_brightcove_info(brightcove_url, 'https://tver.jp/')
+
+            # Note: Delegate extraction to BrightcoveNewIE by specifying url_transparent,
+            # while also making TverIE's own acquired entities such as description available.
+            info_dict = {
+                '_type': 'url_transparent',
+                'url': brightcove_url,
+                'ie_key': BrightcoveNewIE.ie_key(),
+                'id': video_id,  # Tver ID
+                'display_id': brightcove_video_id,  # Brightcove ID
+                'title': title or brightcove_info.get('name'),
+                'description': description,
+                'thumbnail': re.sub(r'/[0-9]+x[0-9]+/', r'/1920x1080/', brightcove_info.get('poster')),  # select large thumbnail
+                'creator': tver_info[7],  # Broadcaster name  e.g. 'tbs', 'ntv'
+                'uploader': tver_info[8],  # Delivery platform name  e.g. 'TBS FREE', '日テレ無料'
+            }

        return info_dict