diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index bef3b82ee..a6ca835ea 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -865,6 +865,7 @@ from .radiocanada import ( RadioCanadaIE, RadioCanadaAudioVideoIE, ) +from .radiocloud import RadioCloudIE from .radiode import RadioDeIE from .radiojavan import RadioJavanIE from .radiobremen import RadioBremenIE diff --git a/youtube_dl/extractor/radiocloud.py b/youtube_dl/extractor/radiocloud.py new file mode 100644 index 000000000..03bf69da3 --- /dev/null +++ b/youtube_dl/extractor/radiocloud.py @@ -0,0 +1,70 @@ +# coding: utf-8 +from __future__ import unicode_literals +import re + +from .common import InfoExtractor + +from ..utils import ( + ExtractorError, + extract_attributes, + get_elements_by_class, +) + + +class RadioCloudIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?radiocloud\.jp/archive/.*?content_id=(?P[0-9]+)' + _TEST = { + 'url': 'https://radiocloud.jp/archive/so?content_id=13165', + 'md5': '6f97afc008931d72c21db1331dbdce94', + 'info_dict': { + 'id': '13165', + 'ext': 'm4a', + 'title': '毒蝮三太夫のミュージックプレゼント:千葉県市川市鬼高『お酒の専門店「酒壱番」』編', + 'description': '1/16(月)放送(11:20~11:40頃)から。訪問先は、千葉県市川市鬼高3-15−5『お酒の専門店「酒壱番」』さん。47年続く国民的中継番組のトーク部分のみをお楽しみください。', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + def get_input_by_name(html, name): + for e in re.finditer(r'', html): + a = extract_attributes(e.group()) + if a.get('name') == name: + return a.get('value') + return None + + element = None + for e in get_elements_by_class('contents_box', webpage): + if get_input_by_name(e, 'content_id') == video_id: + element = e + break + + if not element: + raise ExtractorError('Could not find details of id %s' % video_id) + + file_url = get_input_by_name(element, 'file_url') + if not file_url: + raise ExtractorError('Could not find player URL') + file_url = 'https:' + file_url + + title = get_input_by_name(element, 'title') + if not title: + title = self._html_search_regex(r'(.+?)', element, 'title') + description = self._html_search_regex(r'
(.+?)
', element, + 'description', fatal=False) + + webpage = self._download_webpage(file_url, video_id, + headers={'Referer': url}, + note='Downloading player', + errnote='Unable to download player') + + file_url = self._search_regex(r'var\s+source\s*=\s*"(.+?)"', webpage, 'url') + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'url': file_url + }