From a017c0c469dd04980c07d822c8d59b814e6cd0e2 Mon Sep 17 00:00:00 2001 From: Jan Gampe Date: Wed, 16 Jan 2019 16:00:33 +0100 Subject: [PATCH] Add Podlove Publisher Extractor, create classes --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/generic.py | 5 ++ youtube_dl/extractor/podlovepublisher.py | 59 ++++++++++++++++++++++++ 3 files changed, 65 insertions(+) create mode 100644 youtube_dl/extractor/podlovepublisher.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index de38c6641..9d800fca2 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -867,6 +867,7 @@ from .pluralsight import ( PluralsightIE, PluralsightCourseIE, ) +from .podlovepublisher import PodlovePublisherIE from .podomatic import PodomaticIE from .pokemon import PokemonIE from .polskieradio import ( diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 067de28cd..149ceb0a4 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -116,6 +116,7 @@ from .foxnews import FoxNewsIE from .viqeo import ViqeoIE from .expressen import ExpressenIE from .zype import ZypeIE +from .podlovepublisher import PodlovePublisherIE class GenericIE(InfoExtractor): @@ -3124,6 +3125,10 @@ class GenericIE(InfoExtractor): return self.playlist_from_matches( apa_urls, video_id, video_title, ie=APAIE.ie_key()) + podlove_url = PodlovePublisherIE._extract_url(webpage) + if podlove_url: + return self.url_result(podlove_url) + foxnews_urls = FoxNewsIE._extract_urls(webpage) if foxnews_urls: return self.playlist_from_matches( diff --git a/youtube_dl/extractor/podlovepublisher.py b/youtube_dl/extractor/podlovepublisher.py new file mode 100644 index 000000000..26d157936 --- /dev/null +++ b/youtube_dl/extractor/podlovepublisher.py @@ -0,0 +1,59 @@ +# coding: utf-8 +from __future__ import unicode_literals +from .common import InfoExtractor + +import datetime +import time +import re + +class PodlovePublisherIE(InfoExtractor): + _VALID_URL = r'''(?:https?:)?//.+?/?podlove_action=pwp4_config''' + + _TEST = { + 'url': 'https://not-safe-for-work.de/nsfw099-kanzlerkind-sebastian/?podlove_action=pwp4_config', + 'md5': '73ab53f3898e752f6db89b50c3b4658c', + 'info_dict': { + 'id': 'NSFW099 Kanzlerkind Sebastian', + 'ext': 'm4a', + 'title': 'NSFW099 Kanzlerkind Sebastian', + 'description': 'Uuuuund da sind wir wieder, keine 10 Monate nachdem wir das letzte Mal gesendet haben. Und bedenkt, dass solche Sendezyklen im Kern gut für Euch sind. So oder so haben wir uns einiges zu erzählen, auch wenn wir zunehmend aus der alten Brachialität rauszuwachsen scheinen. Dafür mehr Blick in die Zeit und dann und wann auch ins Internet.', + 'duration': 11723 + # TODO more properties, either as: + # * A value + # * MD5 checksum; start the string with md5: + # * A regular expression; start the string with re: + # * Any Python type (for example int or float) + } + } + + @staticmethod + def _extract_url(webpage): + mobj = re.search(r'(?:https?:)?//.+?/?podlove_action=pwp4_config',webpage) + if mobj: + return mobj.group(0) + else: + return None + + def _real_extract(self, url): + player_data = self._download_json(url, None) + + dur_ptime = time.strptime(player_data['duration'].split('.')[0],'%H:%M:%S') + duration_secs = datetime.timedelta(hours=dur_ptime.tm_hour,minutes=dur_ptime.tm_min,seconds=dur_ptime.tm_sec).total_seconds() + + print(duration_secs) + + return { + 'id': player_data['title'], + 'title': player_data['title'], + 'description': player_data['summary'], + 'filesize': int(player_data['audio'][0]['size']), + 'url': player_data['audio'][0]['url'], + 'duration': duration_secs + # TODO more properties (see youtube_dl/extractor/common.py) + } + + + + + +