1
0
mirror of https://codeberg.org/polarisfm/youtube-dl synced 2024-11-30 04:08:01 +01:00

[3plus] Handle real video extraction in 3qsdn information extractor

- Add support for 3qsdn playlists.
- Better title extraction for 3qsdn media.
- Add description extraction for 3qsdn media.
- Remove ThreePlusBaseIE which is not needed anymore, since everything
can directly be done by the 3qsdn information extractor.
This commit is contained in:
Alex Seiler 2017-03-03 05:46:33 +01:00
parent 7358fd6b5e
commit f7fb256946
2 changed files with 42 additions and 43 deletions

View File

@ -5,55 +5,26 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from .threeqsdn import ThreeQSDNIE from .threeqsdn import ThreeQSDNIE
from ..utils import get_element_by_class from ..utils import (
get_element_by_class,
smuggle_url,
)
class ThreePlusBaseIE(InfoExtractor): class ThreePlusIE(InfoExtractor):
_HOST_URL = 'http://playout.3qsdn.com/'
def _get_title_and_description(self, video_id):
webpage = self._download_webpage(
self._HOST_URL + video_id, video_id)
title = self._og_search_title(webpage)
description = self._og_search_description(webpage)
return title, description
def _get_real_video_id(self, video_id):
video_info = self._download_webpage(self._HOST_URL + video_id + '?js=true', video_id)
video_id = self._search_regex(
r'sdnPlayoutId\s*:\s*\'(.+?)\'', video_info, 'Real video id', default=video_id)
video_id = video_id.replace('\\x2D', '-')
return video_id
def _extract_from_id(self, video_id):
video_id = self._get_real_video_id(video_id)
title, description = self._get_title_and_description(video_id)
return {
'_type': 'url_transparent',
'ie_key': ThreeQSDNIE.ie_key(),
'url': self._HOST_URL + video_id,
'id': video_id,
'title': title,
'description': description,
}
class ThreePlusIE(ThreePlusBaseIE):
IE_NAME = '3 Plus' IE_NAME = '3 Plus'
_VALID_URL = r'https?://(?:www\.)3plus\.(?:tv|ch)/(?!videos)(?P<id>.+)' _VALID_URL = r'https?://(?:www\.)3plus\.(?:tv|ch)/(?!videos)(?P<id>.+)'
_TESTS = [{ _TESTS = [{
# Real video ID known in advance # Real video ID known in advance
'url': 'http://www.3plus.tv/episode/mama-ich-bin-schwanger/teenager-werden-muetter-folge-3', 'url': 'http://www.3plus.tv/episode/mama-ich-bin-schwanger/teenager-werden-muetter-folge-3',
'md5': '424d9bd2b10e7d4149299bef74e5ddd2',
'info_dict': { 'info_dict': {
'id': 'de1b7745-11d6-11e6-b427-0cc47a188158', 'id': 'de1b7745-11d6-11e6-b427-0cc47a188158',
'ext': 'mp4', 'ext': 'mp4',
'title': 'MAMA ICH BIN SCHWANGER ST01 - Episode 03', 'title': 'MAMA ICH BIN SCHWANGER ST01 - Episode 03',
'description': 'md5:2b93142fd82f4b5460f97b13fee40eb8', 'description': 'md5:2b93142fd82f4b5460f97b13fee40eb8',
}, },
'params': {
'skip_download': True,
},
'expected_warnings': ['Unable to download f4m manifest', 'Failed to parse JSON'], 'expected_warnings': ['Unable to download f4m manifest', 'Failed to parse JSON'],
}, { }, {
# Real video ID not known in advance # Real video ID not known in advance
@ -76,10 +47,12 @@ class ThreePlusIE(ThreePlusBaseIE):
video_id = self._search_regex( video_id = self._search_regex(
r'var\s+sdnPlayoutId\s*=\s*"([0-9a-f\-]{36})"', webpage, 'video id') r'var\s+sdnPlayoutId\s*=\s*"([0-9a-f\-]{36})"', webpage, 'video id')
return self._extract_from_id(video_id) return self.url_result(
smuggle_url('3qsdn:%s' % video_id, {'first_video_only': True}),
ThreeQSDNIE.ie_key())
class ThreePlusPlaylistIE(ThreePlusBaseIE): class ThreePlusPlaylistIE(InfoExtractor):
IE_NAME = '3 Plus Playlists' IE_NAME = '3 Plus Playlists'
_VALID_URL = r'https?://(?:www\.)3plus\.(?:tv|ch)/videos/(?P<id>.+)' _VALID_URL = r'https?://(?:www\.)3plus\.(?:tv|ch)/videos/(?P<id>.+)'
@ -97,6 +70,8 @@ class ThreePlusPlaylistIE(ThreePlusBaseIE):
webpage = self._download_webpage(url, playlist_id) webpage = self._download_webpage(url, playlist_id)
title = get_element_by_class('pane-title', webpage) title = get_element_by_class('pane-title', webpage)
entries = [self._extract_from_id(m.group('id')) for m in re.finditer( entries = [self.url_result(
smuggle_url('3qsdn:%s' % m.group('id'), {'first_video_only': True})) for m in re.finditer(
r'<div[^>]+class\s*=\s*"field-content\s*"\s*>(?P<id>[0-9a-f\-]{36})</div>', webpage)] r'<div[^>]+class\s*=\s*"field-content\s*"\s*>(?P<id>[0-9a-f\-]{36})</div>', webpage)]
return self.playlist_result(entries, playlist_id, title) return self.playlist_result(entries, playlist_id, title)

View File

@ -7,13 +7,15 @@ from ..utils import (
determine_ext, determine_ext,
js_to_json, js_to_json,
mimetype2ext, mimetype2ext,
unsmuggle_url,
) )
class ThreeQSDNIE(InfoExtractor): class ThreeQSDNIE(InfoExtractor):
IE_NAME = '3qsdn' IE_NAME = '3qsdn'
IE_DESC = '3Q SDN' IE_DESC = '3Q SDN'
_VALID_URL = r'https?://playout\.3qsdn\.com/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})' _VALID_URL = r'(?:https?://playout\.3qsdn\.com/|3qsdn:)(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
_API_URL = 'http://playout.3qsdn.com/'
_TESTS = [{ _TESTS = [{
# ondemand from http://www.philharmonie.tv/veranstaltung/26/ # ondemand from http://www.philharmonie.tv/veranstaltung/26/
'url': 'http://playout.3qsdn.com/0280d6b9-1215-11e6-b427-0cc47a188158?protocol=http', 'url': 'http://playout.3qsdn.com/0280d6b9-1215-11e6-b427-0cc47a188158?protocol=http',
@ -21,7 +23,8 @@ class ThreeQSDNIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': '0280d6b9-1215-11e6-b427-0cc47a188158', 'id': '0280d6b9-1215-11e6-b427-0cc47a188158',
'ext': 'mp4', 'ext': 'mp4',
'title': '0280d6b9-1215-11e6-b427-0cc47a188158', 'title': '160504_sixpianos',
'description': '160504_sixpianos',
'is_live': False, 'is_live': False,
}, },
'expected_warnings': ['Failed to download MPD manifest', 'Failed to parse JSON'], 'expected_warnings': ['Failed to download MPD manifest', 'Failed to parse JSON'],
@ -38,6 +41,13 @@ class ThreeQSDNIE(InfoExtractor):
'skip_download': True, # m3u8 downloads 'skip_download': True, # m3u8 downloads
}, },
'expected_warnings': ['Failed to download MPD manifest'], 'expected_warnings': ['Failed to download MPD manifest'],
}, {
# playlist
'url': 'http://playout.3qsdn.com/2a70223f-b56f-11e6-a78b-0cc47a188158',
'info_dict': {
'id': '2a70223f-b56f-11e6-a78b-0cc47a188158',
},
'playlist_count': 11,
}, { }, {
# live audio stream # live audio stream
'url': 'http://playout.3qsdn.com/9edf36e0-6bf2-11e2-a16a-9acf09e2db48', 'url': 'http://playout.3qsdn.com/9edf36e0-6bf2-11e2-a16a-9acf09e2db48',
@ -69,11 +79,20 @@ class ThreeQSDNIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
url, smuggled_data = unsmuggle_url(url, {})
js = self._download_webpage( js = self._download_webpage(
'http://playout.3qsdn.com/%s' % video_id, video_id, self._API_URL + video_id, video_id,
query={'js': 'true'}) query={'js': 'true'})
playout_ids = [m.group('id').replace('\\x2D', '-') for m in re.finditer(
r'sdnPlayoutId\s*:\s*["\'](?P<id>.+?)["\']', js)]
if playout_ids:
if smuggled_data.get('first_video_only'):
return self.url_result(self._API_URL + playout_ids[0], self.ie_key())
return self.playlist_result(
[self.url_result(self._API_URL + vid, self.ie_key()) for vid in playout_ids], video_id)
if any(p in js for p in ( if any(p in js for p in (
'>This content is not available in your country', '>This content is not available in your country',
'playout.3qsdn.com/forbidden')): 'playout.3qsdn.com/forbidden')):
@ -132,11 +151,16 @@ class ThreeQSDNIE(InfoExtractor):
self._sort_formats(formats) self._sort_formats(formats)
title = self._live_title(video_id) if live else video_id webpage = self._download_webpage(self._API_URL + video_id, video_id)
title = self._live_title(video_id) if live else self._og_search_title(webpage, default=None)
if not title:
title = video_id
description = self._og_search_description(webpage, default=None)
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'description': description,
'is_live': live, 'is_live': live,
'formats': formats, 'formats': formats,
} }