From 5a3e2d571d8d9676eb337af7c6a7683981ee77e5 Mon Sep 17 00:00:00 2001 From: mrx23dot Date: Sat, 2 May 2020 14:18:23 +0100 Subject: [PATCH 1/5] [mediaklikk] initial --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/mediaklikk.py | 57 ++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+) create mode 100644 youtube_dl/extractor/mediaklikk.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 995af9988..0c9fcec0a 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -607,6 +607,7 @@ from .markiza import ( from .massengeschmacktv import MassengeschmackTVIE from .matchtv import MatchTVIE from .mdr import MDRIE +from .mediaklikk import MediaKlikkIE from .mediaset import MediasetIE from .mediasite import MediasiteIE from .medici import MediciIE diff --git a/youtube_dl/extractor/mediaklikk.py b/youtube_dl/extractor/mediaklikk.py new file mode 100644 index 000000000..b049c7259 --- /dev/null +++ b/youtube_dl/extractor/mediaklikk.py @@ -0,0 +1,57 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import ( + compat_urllib_parse_unquote, + compat_str +) + + +class MediaKlikkIE(InfoExtractor): + # (?P...) used for referencing match as 'id' + _VALID_URL = r'https?://(?:www\.)?mediaklikk\.hu/video/(?P:[^/]+)/?' + _TEST = { + 'url': 'https://mediaklikk.hu/video/kiberma-2020-04-30-i-adas/', + 'info_dict': { + 'id': '2512015', + 'ext': 'mp4', + 'title': 'KiberMa, 2020.04.30-i adás | MédiaKlikk', + # no thumbnail extractable + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + display_id = video_id + webpage = self._download_webpage(url, video_id) + + pattern = r"mtva_player_manager\.player\(document.getElementById\(.*\),\s?(\{.*\}).*\);" + info_json = self._html_search_regex(pattern, webpage, 'info_json') + info_meta = self._parse_json(compat_urllib_parse_unquote(info_json),None) + + info_ret = { + '_type': 'video', + 'title': info_meta.get('title') or self._og_search_title(webpage), + 'ext': 'mp4', + 'display_id': display_id, + 'id': video_id + } + + if 'series' in info_meta: + info_ret['series'] = info_meta['series'] + info_meta['video'] = info_meta['token'] + del info_meta['token'] + playerpage = self._download_webpage('https://player.mediaklikk.hu/playernew/player.php', + video_id, + query=info_meta) + pattern = r"\"file\": \"(\\/\\/.*playlist\.m3u8)\"," + playlist_url = 'https:' + compat_urllib_parse_unquote( + self._html_search_regex(pattern, playerpage, 'playlist_url'))\ + .replace('\\/', '/') + formats = self._extract_wowza_formats( + playlist_url, video_id, skip_protocols=['f4m', 'smil', 'dash']) + self._sort_formats(formats) + info_ret['formats'] = formats + + return info_ret From 074292541fb35816182cca2f0508d61793c28ed4 Mon Sep 17 00:00:00 2001 From: mrx23dot Date: Sat, 2 May 2020 15:01:21 +0100 Subject: [PATCH 2/5] [mediaklikk] Add new extractor --- youtube_dl/extractor/mediaklikk.py | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/youtube_dl/extractor/mediaklikk.py b/youtube_dl/extractor/mediaklikk.py index b049c7259..ad52affbd 100644 --- a/youtube_dl/extractor/mediaklikk.py +++ b/youtube_dl/extractor/mediaklikk.py @@ -2,28 +2,27 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import ( - compat_urllib_parse_unquote, - compat_str -) - +from ..compat import compat_urllib_parse_unquote +import re class MediaKlikkIE(InfoExtractor): - # (?P...) used for referencing match as 'id' - _VALID_URL = r'https?://(?:www\.)?mediaklikk\.hu/video/(?P:[^/]+)/?' + # Named regular expression group: (?P...) used for referencing match as 'id' + _VALID_URL = r'https?://(?:www\.)?mediaklikk\.hu/video/(?P[^/]+)/?' + _TEST = { 'url': 'https://mediaklikk.hu/video/kiberma-2020-04-30-i-adas/', 'info_dict': { - 'id': '2512015', + 'id': 'kiberma-2020-04-30-i-adas', 'ext': 'mp4', 'title': 'KiberMa, 2020.04.30-i adás | MédiaKlikk', - # no thumbnail extractable + # no thumbnail extractable } } def _real_extract(self, url): - video_id = self._match_id(url) - display_id = video_id + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + display_id = video_id # we only have one id in url.. webpage = self._download_webpage(url, video_id) pattern = r"mtva_player_manager\.player\(document.getElementById\(.*\),\s?(\{.*\}).*\);" @@ -32,7 +31,7 @@ class MediaKlikkIE(InfoExtractor): info_ret = { '_type': 'video', - 'title': info_meta.get('title') or self._og_search_title(webpage), + 'title': info_meta.get('title') or video_id or self._og_search_title(webpage), 'ext': 'mp4', 'display_id': display_id, 'id': video_id @@ -42,9 +41,7 @@ class MediaKlikkIE(InfoExtractor): info_ret['series'] = info_meta['series'] info_meta['video'] = info_meta['token'] del info_meta['token'] - playerpage = self._download_webpage('https://player.mediaklikk.hu/playernew/player.php', - video_id, - query=info_meta) + playerpage = self._download_webpage('https://player.mediaklikk.hu/playernew/player.php', video_id, query=info_meta) pattern = r"\"file\": \"(\\/\\/.*playlist\.m3u8)\"," playlist_url = 'https:' + compat_urllib_parse_unquote( self._html_search_regex(pattern, playerpage, 'playlist_url'))\ From b66a8c363e8e8e6d49c73b4f03da37cc3d9f41eb Mon Sep 17 00:00:00 2001 From: mrx23dot Date: Sat, 2 May 2020 15:31:49 +0100 Subject: [PATCH 3/5] [mediaklikk] coding convention fix --- youtube_dl/extractor/mediaklikk.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/mediaklikk.py b/youtube_dl/extractor/mediaklikk.py index ad52affbd..226a950e5 100644 --- a/youtube_dl/extractor/mediaklikk.py +++ b/youtube_dl/extractor/mediaklikk.py @@ -5,6 +5,7 @@ from .common import InfoExtractor from ..compat import compat_urllib_parse_unquote import re + class MediaKlikkIE(InfoExtractor): # Named regular expression group: (?P...) used for referencing match as 'id' _VALID_URL = r'https?://(?:www\.)?mediaklikk\.hu/video/(?P[^/]+)/?' @@ -12,22 +13,22 @@ class MediaKlikkIE(InfoExtractor): _TEST = { 'url': 'https://mediaklikk.hu/video/kiberma-2020-04-30-i-adas/', 'info_dict': { - 'id': 'kiberma-2020-04-30-i-adas', - 'ext': 'mp4', - 'title': 'KiberMa, 2020.04.30-i adás | MédiaKlikk', - # no thumbnail extractable + 'id': 'kiberma-2020-04-30-i-adas', + 'ext': 'mp4', + 'title': 'KiberMa, 2020.04.30-i adás | MédiaKlikk', + # no thumbnail extractable } } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') - display_id = video_id # we only have one id in url.. + display_id = video_id # we only have one id in url.. webpage = self._download_webpage(url, video_id) pattern = r"mtva_player_manager\.player\(document.getElementById\(.*\),\s?(\{.*\}).*\);" info_json = self._html_search_regex(pattern, webpage, 'info_json') - info_meta = self._parse_json(compat_urllib_parse_unquote(info_json),None) + info_meta = self._parse_json(compat_urllib_parse_unquote(info_json), None) info_ret = { '_type': 'video', From f15317c8d17e2b2f06549ba12558afddcf9f63aa Mon Sep 17 00:00:00 2001 From: mrx23dot Date: Tue, 14 Jul 2020 21:47:26 +0100 Subject: [PATCH 4/5] [mediaklikk] added m4sport support --- youtube_dl/extractor/mediaklikk.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/mediaklikk.py b/youtube_dl/extractor/mediaklikk.py index 226a950e5..378a33176 100644 --- a/youtube_dl/extractor/mediaklikk.py +++ b/youtube_dl/extractor/mediaklikk.py @@ -8,7 +8,7 @@ import re class MediaKlikkIE(InfoExtractor): # Named regular expression group: (?P...) used for referencing match as 'id' - _VALID_URL = r'https?://(?:www\.)?mediaklikk\.hu/video/(?P[^/]+)/?' + _VALID_URL = r'https?://(?:www\.)?(?:mediaklikk|m4sport)\.hu/videok?/(?P[^/]+)/?' _TEST = { 'url': 'https://mediaklikk.hu/video/kiberma-2020-04-30-i-adas/', From ad43251136227dc145a0e993f5bf7970423d8e1a Mon Sep 17 00:00:00 2001 From: mrx23dot Date: Mon, 21 Sep 2020 20:36:33 +0100 Subject: [PATCH 5/5] [mediaklikk] updated test site URL, added subsite support (hirado) --- youtube_dl/extractor/mediaklikk.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/mediaklikk.py b/youtube_dl/extractor/mediaklikk.py index 378a33176..eb0307765 100644 --- a/youtube_dl/extractor/mediaklikk.py +++ b/youtube_dl/extractor/mediaklikk.py @@ -8,10 +8,10 @@ import re class MediaKlikkIE(InfoExtractor): # Named regular expression group: (?P...) used for referencing match as 'id' - _VALID_URL = r'https?://(?:www\.)?(?:mediaklikk|m4sport)\.hu/videok?/(?P[^/]+)/?' + _VALID_URL = r'https?:\/\/(?:www\.)?(?:mediaklikk|m4sport|hirado)\.hu\/.*?videok?\/(?P[^\/]+)\/?' _TEST = { - 'url': 'https://mediaklikk.hu/video/kiberma-2020-04-30-i-adas/', + 'url': 'https://mediaklikk.hu/adal2020/video/2020/03/07/a-dal-donto/', 'info_dict': { 'id': 'kiberma-2020-04-30-i-adas', 'ext': 'mp4',