From 6cbd8de42007532cca3167c6034ff1c243e38408 Mon Sep 17 00:00:00 2001 From: Stefan Date: Thu, 9 Apr 2020 16:42:08 +0200 Subject: [PATCH 01/16] download works, tests fail with error AssertionError: None is not true : Missing mandatory field url --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/playerglobewien.py | 42 +++++++++++++++++++++++++ 2 files changed, 43 insertions(+) create mode 100644 youtube_dl/extractor/playerglobewien.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index e407ab3d9..23c983a4c 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -837,6 +837,7 @@ from .platzi import ( PlatziIE, PlatziCourseIE, ) +from .playerglobewien import PlayerGlobeWienIE from .playfm import PlayFMIE from .playplustv import PlayPlusTVIE from .plays import PlaysTVIE diff --git a/youtube_dl/extractor/playerglobewien.py b/youtube_dl/extractor/playerglobewien.py new file mode 100644 index 000000000..b5c76e953 --- /dev/null +++ b/youtube_dl/extractor/playerglobewien.py @@ -0,0 +1,42 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class PlayerGlobeWienIE(InfoExtractor): + _VALID_URL = r'https?://player.globe.wien/globe-wien/(?P.*)' + _TEST = { + 'url': 'https://player.globe.wien/globe-wien/corona-podcast-teil-4', + 'info_dict': { + 'id': 'corona-podcast-teil-4', + 'ext': 'mp4', + 'title': 'Globe Wien VOD - Eckel & Niavarani & Sarsam - Im Endspurt versagt', + 'url': 'https://player.globe.wien/globe-wien/corona-podcast-teil-4', + }, + 'params': { + 'format': 'bestvideo+bestaudio/best', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + print(video_id) + webpage = self._download_webpage(url, video_id) + formats = [] + title = self._html_search_regex(r'(.+?)', webpage, 'title') + + stream_url = self._download_webpage("https://player.globe.wien/api/playout?vodId="+video_id, video_id) + + hls_url = self._parse_json(stream_url, video_id)['streamUrl']['hls'] + + formats.extend(self._extract_m3u8_formats( + hls_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls')) + + self._sort_formats(formats) + return { + 'id': video_id, + 'title': title, + 'formats': formats, + } + From 29f257fa25c554cfb78c3f5d835feec29fddaf94 Mon Sep 17 00:00:00 2001 From: Stefan Date: Thu, 9 Apr 2020 16:58:54 +0200 Subject: [PATCH 02/16] flake8 fixes, add whitspace around arithmetic operator and remove newlines at the end --- youtube_dl/extractor/playerglobewien.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/youtube_dl/extractor/playerglobewien.py b/youtube_dl/extractor/playerglobewien.py index b5c76e953..5e0de6ba5 100644 --- a/youtube_dl/extractor/playerglobewien.py +++ b/youtube_dl/extractor/playerglobewien.py @@ -26,7 +26,7 @@ class PlayerGlobeWienIE(InfoExtractor): formats = [] title = self._html_search_regex(r'(.+?)', webpage, 'title') - stream_url = self._download_webpage("https://player.globe.wien/api/playout?vodId="+video_id, video_id) + stream_url = self._download_webpage("https://player.globe.wien/api/playout?vodId=" + video_id, video_id) hls_url = self._parse_json(stream_url, video_id)['streamUrl']['hls'] @@ -39,4 +39,3 @@ class PlayerGlobeWienIE(InfoExtractor): 'title': title, 'formats': formats, } - From f3fbacb6d8bcb62285ec76b02fc9de705d645fe8 Mon Sep 17 00:00:00 2001 From: Stefan Date: Thu, 9 Apr 2020 17:34:18 +0200 Subject: [PATCH 03/16] playerglobewien - split tests for audio and video --- youtube_dl/extractor/playerglobewien.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/playerglobewien.py b/youtube_dl/extractor/playerglobewien.py index 5e0de6ba5..fa41d4303 100644 --- a/youtube_dl/extractor/playerglobewien.py +++ b/youtube_dl/extractor/playerglobewien.py @@ -6,18 +6,27 @@ from .common import InfoExtractor class PlayerGlobeWienIE(InfoExtractor): _VALID_URL = r'https?://player.globe.wien/globe-wien/(?P.*)' - _TEST = { + _TESTS = [{ 'url': 'https://player.globe.wien/globe-wien/corona-podcast-teil-4', 'info_dict': { 'id': 'corona-podcast-teil-4', 'ext': 'mp4', 'title': 'Globe Wien VOD - Eckel & Niavarani & Sarsam - Im Endspurt versagt', - 'url': 'https://player.globe.wien/globe-wien/corona-podcast-teil-4', }, 'params': { - 'format': 'bestvideo+bestaudio/best', + 'format': 'bestvideo', } - } + }, { + 'url': 'https://player.globe.wien/globe-wien/corona-podcast-teil-4', + 'info_dict': { + 'id': 'corona-podcast-teil-4', + 'ext': 'mp4', + 'title': 'Globe Wien VOD - Eckel & Niavarani & Sarsam - Im Endspurt versagt', + }, + 'params': { + 'format': 'bestaudio', + } + }] def _real_extract(self, url): video_id = self._match_id(url) From 09531ab29ef334772f666aa8cd3ec2693a6417ef Mon Sep 17 00:00:00 2001 From: Oneboy1979 Date: Fri, 10 Apr 2020 10:14:16 +0200 Subject: [PATCH 04/16] playerglobewien - beautify code and remove text in title --- youtube_dl/extractor/playerglobewien.py | 43 ++++++++++++++----------- 1 file changed, 24 insertions(+), 19 deletions(-) mode change 100644 => 100755 youtube_dl/extractor/playerglobewien.py diff --git a/youtube_dl/extractor/playerglobewien.py b/youtube_dl/extractor/playerglobewien.py old mode 100644 new mode 100755 index fa41d4303..bb49a989b --- a/youtube_dl/extractor/playerglobewien.py +++ b/youtube_dl/extractor/playerglobewien.py @@ -1,32 +1,36 @@ # coding: utf-8 from __future__ import unicode_literals +import re from .common import InfoExtractor class PlayerGlobeWienIE(InfoExtractor): _VALID_URL = r'https?://player.globe.wien/globe-wien/(?P.*)' - _TESTS = [{ - 'url': 'https://player.globe.wien/globe-wien/corona-podcast-teil-4', - 'info_dict': { - 'id': 'corona-podcast-teil-4', - 'ext': 'mp4', - 'title': 'Globe Wien VOD - Eckel & Niavarani & Sarsam - Im Endspurt versagt', + _TESTS = [ + { + 'url': 'https://player.globe.wien/globe-wien/corona-podcast-teil-4', + 'info_dict': { + 'id': 'corona-podcast-teil-4', + 'ext': 'mp4', + 'title': 'Eckel & Niavarani & Sarsam - Im Endspurt versagt', + }, + 'params': { + 'format': 'bestvideo', + } }, - 'params': { - 'format': 'bestvideo', + { + 'url': 'https://player.globe.wien/globe-wien/corona-podcast-teil-4', + 'info_dict': { + 'id': 'corona-podcast-teil-4', + 'ext': 'mp4', + 'title': 'Eckel & Niavarani & Sarsam - Im Endspurt versagt', + }, + 'params': { + 'format': 'bestaudio', + } } - }, { - 'url': 'https://player.globe.wien/globe-wien/corona-podcast-teil-4', - 'info_dict': { - 'id': 'corona-podcast-teil-4', - 'ext': 'mp4', - 'title': 'Globe Wien VOD - Eckel & Niavarani & Sarsam - Im Endspurt versagt', - }, - 'params': { - 'format': 'bestaudio', - } - }] + ] def _real_extract(self, url): video_id = self._match_id(url) @@ -34,6 +38,7 @@ class PlayerGlobeWienIE(InfoExtractor): webpage = self._download_webpage(url, video_id) formats = [] title = self._html_search_regex(r'(.+?)', webpage, 'title') + title = re.sub(r'^Globe Wien VOD -\s*','',title) stream_url = self._download_webpage("https://player.globe.wien/api/playout?vodId=" + video_id, video_id) From d4a419700cfd9f28976de1dbe30dc5df16320ab5 Mon Sep 17 00:00:00 2001 From: Stefan Date: Fri, 10 Apr 2020 21:27:50 +0200 Subject: [PATCH 05/16] playerglobewien - fix flake error with whitespaces --- youtube_dl/extractor/playerglobewien.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/playerglobewien.py b/youtube_dl/extractor/playerglobewien.py index bb49a989b..3f348c810 100755 --- a/youtube_dl/extractor/playerglobewien.py +++ b/youtube_dl/extractor/playerglobewien.py @@ -38,7 +38,7 @@ class PlayerGlobeWienIE(InfoExtractor): webpage = self._download_webpage(url, video_id) formats = [] title = self._html_search_regex(r'(.+?)', webpage, 'title') - title = re.sub(r'^Globe Wien VOD -\s*','',title) + title = re.sub(r'^Globe Wien VOD -\s*', '', title) stream_url = self._download_webpage("https://player.globe.wien/api/playout?vodId=" + video_id, video_id) From efbbe76a637beb4f1d1548cd67806ab9f186be2f Mon Sep 17 00:00:00 2001 From: Oneboy1979 Date: Sat, 11 Apr 2020 20:30:29 +0200 Subject: [PATCH 06/16] PlaywrGlobalWien - add support for hader player --- youtube_dl/extractor/playerglobewien.py | 28 ++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/playerglobewien.py b/youtube_dl/extractor/playerglobewien.py index 3f348c810..df614334c 100755 --- a/youtube_dl/extractor/playerglobewien.py +++ b/youtube_dl/extractor/playerglobewien.py @@ -6,7 +6,7 @@ from .common import InfoExtractor class PlayerGlobeWienIE(InfoExtractor): - _VALID_URL = r'https?://player.globe.wien/globe-wien/(?P.*)' + _VALID_URL = r'https?://player.(globe.wien|hader.at)/(globe-wien|hader)/(?P.*)' _TESTS = [ { 'url': 'https://player.globe.wien/globe-wien/corona-podcast-teil-4', @@ -29,7 +29,29 @@ class PlayerGlobeWienIE(InfoExtractor): 'params': { 'format': 'bestaudio', } - } + }, + { + 'url': 'https://player.hader.at/hader/hader-indien-video', + 'info_dict': { + 'id': 'hader-indien-video', + 'ext': 'mp4', + 'title': 'Film der Woche - Indien', + }, + 'params': { + 'format': 'bestvideo', + } + }, + { + 'url': 'https://player.hader.at/hader/hader-indien-video', + 'info_dict': { + 'id': 'hader-indien-video', + 'ext': 'mp4', + 'title': 'Film der Woche - Indien', + }, + 'params': { + 'format': 'bestaudio', + } + }, ] def _real_extract(self, url): @@ -38,7 +60,7 @@ class PlayerGlobeWienIE(InfoExtractor): webpage = self._download_webpage(url, video_id) formats = [] title = self._html_search_regex(r'(.+?)', webpage, 'title') - title = re.sub(r'^Globe Wien VOD -\s*', '', title) + title = re.sub(r'^(Globe Wien VOD -|Hader VOD -)\s*', '', title) stream_url = self._download_webpage("https://player.globe.wien/api/playout?vodId=" + video_id, video_id) From 873b80d0ffdb61df1867a5226b176b45f5f7eed9 Mon Sep 17 00:00:00 2001 From: Oneboy1979 Date: Sun, 12 Apr 2020 09:16:29 +0200 Subject: [PATCH 07/16] playerglobewien - extract all links and add audio only support --- youtube_dl/extractor/playerglobewien.py | 33 ++++++++++++++++++++----- 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/playerglobewien.py b/youtube_dl/extractor/playerglobewien.py index df614334c..91edfdb05 100755 --- a/youtube_dl/extractor/playerglobewien.py +++ b/youtube_dl/extractor/playerglobewien.py @@ -23,11 +23,12 @@ class PlayerGlobeWienIE(InfoExtractor): 'url': 'https://player.globe.wien/globe-wien/corona-podcast-teil-4', 'info_dict': { 'id': 'corona-podcast-teil-4', - 'ext': 'mp4', + 'ext': 'm4a', 'title': 'Eckel & Niavarani & Sarsam - Im Endspurt versagt', }, 'params': { 'format': 'bestaudio', + 'skip_download': True, } }, { @@ -45,11 +46,20 @@ class PlayerGlobeWienIE(InfoExtractor): 'url': 'https://player.hader.at/hader/hader-indien-video', 'info_dict': { 'id': 'hader-indien-video', - 'ext': 'mp4', + 'ext': 'm4a', 'title': 'Film der Woche - Indien', }, 'params': { 'format': 'bestaudio', + 'skip_download': True, + } + }, + { + 'url': 'https://player.hader.at/hader/hader-indien', + 'info_dict': { + 'id': 'hader-indien', + 'ext': 'mp3', + 'title': 'Hader & Dorfer lesen Indien', } }, ] @@ -62,12 +72,23 @@ class PlayerGlobeWienIE(InfoExtractor): title = self._html_search_regex(r'(.+?)', webpage, 'title') title = re.sub(r'^(Globe Wien VOD -|Hader VOD -)\s*', '', title) - stream_url = self._download_webpage("https://player.globe.wien/api/playout?vodId=" + video_id, video_id) + streamurl = self._download_json("https://player.globe.wien/api/playout?vodId=" + video_id, + video_id).get('streamUrl') - hls_url = self._parse_json(stream_url, video_id)['streamUrl']['hls'] + if streamurl.get('hls'): + formats.extend(self._extract_m3u8_formats( + streamurl.get('hls'), video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls')) - formats.extend(self._extract_m3u8_formats( - hls_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls')) + if streamurl.get('dash'): + formats.extend(self._extract_mpd_formats( + streamurl.get('dash'), video_id, mpd_id='dash', fatal=False)) + + if streamurl.get('audio'): + return { + 'id': video_id, + 'title': title, + 'url': streamurl.get('audio'), + } self._sort_formats(formats) return { From c3d2a27c8d5238d2dc117114bbb28645a04e10d3 Mon Sep 17 00:00:00 2001 From: Oneboy1979 Date: Sun, 12 Apr 2020 10:03:03 +0200 Subject: [PATCH 08/16] playerglobewien - fix regex to according coding convention --- youtube_dl/extractor/playerglobewien.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/playerglobewien.py b/youtube_dl/extractor/playerglobewien.py index 91edfdb05..5733f0a8d 100755 --- a/youtube_dl/extractor/playerglobewien.py +++ b/youtube_dl/extractor/playerglobewien.py @@ -6,7 +6,7 @@ from .common import InfoExtractor class PlayerGlobeWienIE(InfoExtractor): - _VALID_URL = r'https?://player.(globe.wien|hader.at)/(globe-wien|hader)/(?P.*)' + _VALID_URL = r'https?://player.(?:globe.wien|hader.at)/(?:globe-wien|hader)/(?P.*)' _TESTS = [ { 'url': 'https://player.globe.wien/globe-wien/corona-podcast-teil-4', @@ -69,7 +69,7 @@ class PlayerGlobeWienIE(InfoExtractor): print(video_id) webpage = self._download_webpage(url, video_id) formats = [] - title = self._html_search_regex(r'(.+?)', webpage, 'title') + title = self._html_search_regex(r'(?P<title>.+?)', webpage, 'title', group='title') title = re.sub(r'^(Globe Wien VOD -|Hader VOD -)\s*', '', title) streamurl = self._download_json("https://player.globe.wien/api/playout?vodId=" + video_id, From a734819a6c3d25e4b20aae848f1d0a46d4994893 Mon Sep 17 00:00:00 2001 From: Oneboy1979 Date: Sun, 12 Apr 2020 10:14:26 +0200 Subject: [PATCH 09/16] playerglobewien - simplify title extractor --- youtube_dl/extractor/playerglobewien.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/playerglobewien.py b/youtube_dl/extractor/playerglobewien.py index 5733f0a8d..2ca930e4c 100755 --- a/youtube_dl/extractor/playerglobewien.py +++ b/youtube_dl/extractor/playerglobewien.py @@ -69,7 +69,7 @@ class PlayerGlobeWienIE(InfoExtractor): print(video_id) webpage = self._download_webpage(url, video_id) formats = [] - title = self._html_search_regex(r'(?P<title>.+?)', webpage, 'title', group='title') + title = self._og_search_title(webpage) title = re.sub(r'^(Globe Wien VOD -|Hader VOD -)\s*', '', title) streamurl = self._download_json("https://player.globe.wien/api/playout?vodId=" + video_id, From bd7e0caa13859c04d9a6c50679c8d5f0a7eab27b Mon Sep 17 00:00:00 2001 From: Oneboy1979 Date: Sun, 12 Apr 2020 10:57:36 +0200 Subject: [PATCH 10/16] playerglobewien - add audio only to formats to prevent to returns --- youtube_dl/extractor/playerglobewien.py | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/playerglobewien.py b/youtube_dl/extractor/playerglobewien.py index 2ca930e4c..bf622d331 100755 --- a/youtube_dl/extractor/playerglobewien.py +++ b/youtube_dl/extractor/playerglobewien.py @@ -65,34 +65,33 @@ class PlayerGlobeWienIE(InfoExtractor): ] def _real_extract(self, url): - video_id = self._match_id(url) - print(video_id) - webpage = self._download_webpage(url, video_id) + format_id = self._match_id(url) + webpage = self._download_webpage(url, format_id) formats = [] title = self._og_search_title(webpage) title = re.sub(r'^(Globe Wien VOD -|Hader VOD -)\s*', '', title) - streamurl = self._download_json("https://player.globe.wien/api/playout?vodId=" + video_id, - video_id).get('streamUrl') + streamurl = self._download_json("https://player.globe.wien/api/playout?vodId=" + format_id, + format_id).get('streamUrl') if streamurl.get('hls'): formats.extend(self._extract_m3u8_formats( - streamurl.get('hls'), video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls')) + streamurl.get('hls'), format_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls')) if streamurl.get('dash'): formats.extend(self._extract_mpd_formats( - streamurl.get('dash'), video_id, mpd_id='dash', fatal=False)) + streamurl.get('dash'), format_id, mpd_id='dash', fatal=False)) if streamurl.get('audio'): - return { - 'id': video_id, - 'title': title, + formats.append({ 'url': streamurl.get('audio'), - } + 'format_id': format_id, + 'vcodec': 'none', + }) self._sort_formats(formats) return { - 'id': video_id, + 'id': format_id, 'title': title, 'formats': formats, } From 0b991b67f76332f5c1b1440ddc563f25ec1b2568 Mon Sep 17 00:00:00 2001 From: Oneboy1979 Date: Sun, 12 Apr 2020 12:41:15 +0200 Subject: [PATCH 11/16] playerglobewien - add description and thumbnail extractor --- youtube_dl/extractor/playerglobewien.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/youtube_dl/extractor/playerglobewien.py b/youtube_dl/extractor/playerglobewien.py index bf622d331..4b7d9b4c4 100755 --- a/youtube_dl/extractor/playerglobewien.py +++ b/youtube_dl/extractor/playerglobewien.py @@ -10,10 +10,13 @@ class PlayerGlobeWienIE(InfoExtractor): _TESTS = [ { 'url': 'https://player.globe.wien/globe-wien/corona-podcast-teil-4', + 'md5': 'f973a27e258bdeff686e63434e872f70', 'info_dict': { 'id': 'corona-podcast-teil-4', 'ext': 'mp4', 'title': 'Eckel & Niavarani & Sarsam - Im Endspurt versagt', + 'description': 'md5:fbd2e2a456fef3a171683dd9e33f1810', + 'thumbnail': r're:^https?://.*\.jpg', }, 'params': { 'format': 'bestvideo', @@ -67,6 +70,8 @@ class PlayerGlobeWienIE(InfoExtractor): def _real_extract(self, url): format_id = self._match_id(url) webpage = self._download_webpage(url, format_id) + thumbnail = self._html_search_regex(r' Date: Sun, 12 Apr 2020 13:32:00 +0200 Subject: [PATCH 12/16] playerglobeqien - added description, thumbnail and md5 checks --- youtube_dl/extractor/playerglobewien.py | 31 +++++++------------------ 1 file changed, 9 insertions(+), 22 deletions(-) diff --git a/youtube_dl/extractor/playerglobewien.py b/youtube_dl/extractor/playerglobewien.py index 4b7d9b4c4..11bf2d97f 100755 --- a/youtube_dl/extractor/playerglobewien.py +++ b/youtube_dl/extractor/playerglobewien.py @@ -20,49 +20,36 @@ class PlayerGlobeWienIE(InfoExtractor): }, 'params': { 'format': 'bestvideo', - } - }, - { - 'url': 'https://player.globe.wien/globe-wien/corona-podcast-teil-4', - 'info_dict': { - 'id': 'corona-podcast-teil-4', - 'ext': 'm4a', - 'title': 'Eckel & Niavarani & Sarsam - Im Endspurt versagt', - }, - 'params': { - 'format': 'bestaudio', 'skip_download': True, } }, { 'url': 'https://player.hader.at/hader/hader-indien-video', + 'md5': '0bca8d5b309361a9556cee6abff2c1b9', 'info_dict': { 'id': 'hader-indien-video', 'ext': 'mp4', 'title': 'Film der Woche - Indien', + 'description': 'md5:cad9f2bd7a0c5c0dff9cf1cff71288f6', + 'thumbnail': r're:^https?://.*\.jpg', }, 'params': { 'format': 'bestvideo', - } - }, - { - 'url': 'https://player.hader.at/hader/hader-indien-video', - 'info_dict': { - 'id': 'hader-indien-video', - 'ext': 'm4a', - 'title': 'Film der Woche - Indien', - }, - 'params': { - 'format': 'bestaudio', 'skip_download': True, } }, { 'url': 'https://player.hader.at/hader/hader-indien', + 'md5': 'b8bd7cf37d82529411a6e67005739fb3', 'info_dict': { 'id': 'hader-indien', 'ext': 'mp3', 'title': 'Hader & Dorfer lesen Indien', + 'description': 'md5:8b4e1de6c627b7d9ee6cb1c65debfa85', + 'thumbnail': r're:^https?://.*\.jpg', + }, + 'params': { + 'skip_download': True, } }, ] From 59bf7a1f26dd9a7f3a1baa70b301669c89296d2f Mon Sep 17 00:00:00 2001 From: Oneboy1979 Date: Sun, 12 Apr 2020 13:47:41 +0200 Subject: [PATCH 13/16] playerglobeqien - add fallback to thumbnail extractor --- youtube_dl/extractor/playerglobewien.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/playerglobewien.py b/youtube_dl/extractor/playerglobewien.py index 11bf2d97f..09c43c0b7 100755 --- a/youtube_dl/extractor/playerglobewien.py +++ b/youtube_dl/extractor/playerglobewien.py @@ -57,7 +57,9 @@ class PlayerGlobeWienIE(InfoExtractor): def _real_extract(self, url): format_id = self._match_id(url) webpage = self._download_webpage(url, format_id) - thumbnail = self._html_search_regex(r' Date: Fri, 17 Apr 2020 15:13:21 +0200 Subject: [PATCH 14/16] [playerglobewien] - add extraction from seehas, add extraction of all thumbnails --- youtube_dl/extractor/playerglobewien.py | 67 ++++++++++++++----------- 1 file changed, 37 insertions(+), 30 deletions(-) diff --git a/youtube_dl/extractor/playerglobewien.py b/youtube_dl/extractor/playerglobewien.py index 09c43c0b7..b2263e38c 100755 --- a/youtube_dl/extractor/playerglobewien.py +++ b/youtube_dl/extractor/playerglobewien.py @@ -6,7 +6,7 @@ from .common import InfoExtractor class PlayerGlobeWienIE(InfoExtractor): - _VALID_URL = r'https?://player.(?:globe.wien|hader.at)/(?:globe-wien|hader)/(?P.*)' + _VALID_URL = r'https?://player\.(hader\.at|globe\.wien)/[^/]+/(?P[^/?#]+)' _TESTS = [ { 'url': 'https://player.globe.wien/globe-wien/corona-podcast-teil-4', @@ -55,39 +55,46 @@ class PlayerGlobeWienIE(InfoExtractor): ] def _real_extract(self, url): - format_id = self._match_id(url) - webpage = self._download_webpage(url, format_id) - thumbnail = self._html_search_regex( - r']+id="__NEXT_DATA__"[^>]+type="application/json"[^>]*>([^<]+)', + webpage, 'next data'), + video_id) + + vod = next_data.get('props').get('initialState').get('vod') + formats = [] - title = self._og_search_title(webpage) - title = re.sub(r'^(Globe Wien VOD -|Hader VOD -)\s*', '', title) + for key in vod.get('streamUrl'): + src_url = vod.get('streamUrl').get(key) + if key == 'hls': + formats.extend(self._extract_m3u8_formats( + src_url, video_id, ext='mp4', m3u8_id=key, fatal=False)) + elif key == 'dash': + formats.extend(self._extract_mpd_formats( + src_url, video_id, mpd_id=key, fatal=False)) + else: + formats.append({ + 'format_id': key, + 'url': src_url + }) - streamurl = self._download_json("https://player.globe.wien/api/playout?vodId=" + format_id, - format_id).get('streamUrl') - - if streamurl.get('hls'): - formats.extend(self._extract_m3u8_formats( - streamurl.get('hls'), format_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls')) - - if streamurl.get('dash'): - formats.extend(self._extract_mpd_formats( - streamurl.get('dash'), format_id, mpd_id='dash', fatal=False)) - - if streamurl.get('audio'): - formats.append({ - 'url': streamurl.get('audio'), - 'format_id': format_id, - 'vcodec': 'none', + thumbnails = [] + for key in vod.get('images'): + thumbnails.append({ + 'id': key, + 'url': vod.get('images').get(key), }) - + self._check_formats(formats, video_id) self._sort_formats(formats) + return { - 'id': format_id, - 'title': title, - 'thumbnail': thumbnail, - 'description': description, + 'id': vod.get('id'), + 'title': vod.get('title'), + 'description': vod.get('teaserDescription'), + 'release_year': vod.get('year'), + 'duration': (vod.get('durationMinutes') or 0) * 60, 'formats': formats, + 'thumbnails': thumbnails, } From 9ed1d089d19fa1317fdd09c43026a526d2157361 Mon Sep 17 00:00:00 2001 From: Oneboy1979 Date: Fri, 17 Apr 2020 16:42:46 +0200 Subject: [PATCH 15/16] [playerglobewien] - removed unused import --- youtube_dl/extractor/playerglobewien.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/playerglobewien.py b/youtube_dl/extractor/playerglobewien.py index b2263e38c..259aa4ae2 100755 --- a/youtube_dl/extractor/playerglobewien.py +++ b/youtube_dl/extractor/playerglobewien.py @@ -1,7 +1,6 @@ # coding: utf-8 from __future__ import unicode_literals -import re from .common import InfoExtractor From 614c2697b556edb5420a30af4528668f7f5b8638 Mon Sep 17 00:00:00 2001 From: Oneboy1979 Date: Sun, 19 Apr 2020 10:08:49 +0200 Subject: [PATCH 16/16] [playerglobewien] - improvement url extractor (thx to sehaas) --- youtube_dl/extractor/playerglobewien.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/playerglobewien.py b/youtube_dl/extractor/playerglobewien.py index 259aa4ae2..116b2f169 100755 --- a/youtube_dl/extractor/playerglobewien.py +++ b/youtube_dl/extractor/playerglobewien.py @@ -65,8 +65,7 @@ class PlayerGlobeWienIE(InfoExtractor): vod = next_data.get('props').get('initialState').get('vod') formats = [] - for key in vod.get('streamUrl'): - src_url = vod.get('streamUrl').get(key) + for key, src_url in vod.get('streamUrl').items(): if key == 'hls': formats.extend(self._extract_m3u8_formats( src_url, video_id, ext='mp4', m3u8_id=key, fatal=False)) @@ -80,10 +79,10 @@ class PlayerGlobeWienIE(InfoExtractor): }) thumbnails = [] - for key in vod.get('images'): + for key, src_url in vod.get('images').items(): thumbnails.append({ 'id': key, - 'url': vod.get('images').get(key), + 'url': src_url, }) self._check_formats(formats, video_id) self._sort_formats(formats)