youtube-dl/youtube_dl/extractor/orf.py

# coding: utf-8
from __future__ import unicode_literals

import re

from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
    determine_ext,
    float_or_none,
    HEADRequest,
    int_or_none,
    orderedSet,
    remove_end,
    strip_jsonp,
    unescapeHTML,
    unified_strdate,
    url_or_none,
)


class ORFTVthekIE(InfoExtractor):
    IE_NAME = 'orf:tvthek'
    IE_DESC = 'ORF TVthek'
    _VALID_URL = r'https?://tvthek\.orf\.at/(?:[^/]+/)+(?P<id>\d+)'

    _TESTS = [{
        'url': 'http://tvthek.orf.at/program/Aufgetischt/2745173/Aufgetischt-Mit-der-Steirischen-Tafelrunde/8891389',
        'playlist': [{
            'md5': '2942210346ed779588f428a92db88712',
            'info_dict': {
                'id': '8896777',
                'ext': 'mp4',
                'title': 'Aufgetischt: Mit der Steirischen Tafelrunde',
                'description': 'md5:c1272f0245537812d4e36419c207b67d',
                'duration': 2668,
                'upload_date': '20141208',
            },
        }],
        'skip': 'Blocked outside of Austria / Germany',
    }, {
        'url': 'http://tvthek.orf.at/topic/Im-Wandel-der-Zeit/8002126/Best-of-Ingrid-Thurnher/7982256',
        'info_dict': {
            'id': '7982259',
            'ext': 'mp4',
            'title': 'Best of Ingrid Thurnher',
            'upload_date': '20140527',
            'description': 'Viele Jahre war Ingrid Thurnher das "Gesicht" der ZIB 2. Vor ihrem Wechsel zur ZIB 2 im Jahr 1995 moderierte sie unter anderem "Land und Leute", "Österreich-Bild" und "Niederösterreich heute".',
        },
        'params': {
            'skip_download': True,  # rtsp downloads
        },
        'skip': 'Blocked outside of Austria / Germany',
    }, {
        'url': 'http://tvthek.orf.at/topic/Fluechtlingskrise/10463081/Heimat-Fremde-Heimat/13879132/Senioren-betreuen-Migrantenkinder/13879141',
        'only_matching': True,
    }, {
        'url': 'http://tvthek.orf.at/profile/Universum/35429',
        'only_matching': True,
    }]

    def _real_extract(self, url):
        playlist_id = self._match_id(url)
        webpage = self._download_webpage(url, playlist_id)

        data_jsb = self._parse_json(
            self._search_regex(
                r'<div[^>]+class=(["\']).*?VideoPlaylist.*?\1[^>]+data-jsb=(["\'])(?P<json>.+?)\2',
                webpage, 'playlist', group='json'),
            playlist_id, transform_source=unescapeHTML)['playlist']['videos']

        entries = []
        for sd in data_jsb:
            video_id, title = sd.get('id'), sd.get('title')
            if not video_id or not title:
                continue
            video_id = compat_str(video_id)
            formats = []
            for fd in sd['sources']:
                src = url_or_none(fd.get('src'))
                if not src:
                    continue
                format_id_list = []
                for key in ('delivery', 'quality', 'quality_string'):
                    value = fd.get(key)
                    if value:
                        format_id_list.append(value)
                format_id = '-'.join(format_id_list)
                if determine_ext(fd['src']) == 'm3u8':
                    formats.extend(self._extract_m3u8_formats(
                        fd['src'], video_id, 'mp4', m3u8_id=format_id))
                elif determine_ext(fd['src']) == 'f4m':
                    formats.extend(self._extract_f4m_formats(
                        fd['src'], video_id, f4m_id=format_id))
                else:
                    formats.append({
                        'format_id': format_id,
                        'url': src,
                        'protocol': fd.get('protocol'),
                    })

            # Check for geoblocking.
            # There is a property is_geoprotection, but that's always false
            geo_str = sd.get('geoprotection_string')
            if geo_str:
                try:
                    http_url = next(
                        f['url']
                        for f in formats
                        if re.match(r'^https?://.*\.mp4$', f['url']))
                except StopIteration:
                    pass
                else:
                    req = HEADRequest(http_url)
                    self._request_webpage(
                        req, video_id,
                        note='Testing for geoblocking',
                        errnote=((
                            'This video seems to be blocked outside of %s. '
                            'You may want to try the streaming-* formats.')
                            % geo_str),
                        fatal=False)

            self._check_formats(formats, video_id)
            self._sort_formats(formats)

            subtitles = {}
            for sub in sd.get('subtitles', []):
                sub_src = sub.get('src')
                if not sub_src:
                    continue
                subtitles.setdefault(sub.get('lang', 'de-AT'), []).append({
                    'url': sub_src,
                })

            upload_date = unified_strdate(sd.get('created_date'))
            entries.append({
                '_type': 'video',
                'id': video_id,
                'title': title,
                'formats': formats,
                'subtitles': subtitles,
                'description': sd.get('description'),
                'duration': int_or_none(sd.get('duration_in_seconds')),
                'upload_date': upload_date,
                'thumbnail': sd.get('image_full_url'),
            })

        return {
            '_type': 'playlist',
            'entries': entries,
            'id': playlist_id,
        }


class ORFRadioIE(InfoExtractor):
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        station = mobj.group('station')
        show_date = mobj.group('date')
        show_id = mobj.group('show')

        if station == 'fm4':
            show_id = '4%s' % show_id

        data = self._download_json(
            'http://audioapi.orf.at/%s/api/json/current/broadcast/%s/%s' % (station, show_id, show_date),
            show_id
        )

        def extract_entry_dict(info, title, subtitle):
            return {
                'id': info['loopStreamId'].replace('.mp3', ''),
                'url': 'http://loopstream01.apa.at/?channel=%s&id=%s' % (station, info['loopStreamId']),
                'title': title,
                'description': subtitle,
                'duration': (info['end'] - info['start']) / 1000,
                'timestamp': info['start'] / 1000,
                'ext': 'mp3'
            }

        entries = [extract_entry_dict(t, data['title'], data['subtitle']) for t in data['streams']]

        return {
            '_type': 'playlist',
            'id': show_id,
            'title': data['title'],
            'description': data['subtitle'],
            'entries': entries
        }


class ORFFM4IE(ORFRadioIE):
    IE_NAME = 'orf:fm4'
    IE_DESC = 'radio FM4'
    _VALID_URL = r'https?://(?P<station>fm4)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'

    _TEST = {
        'url': 'http://fm4.orf.at/player/20170107/CC',
        'md5': '2b0be47375432a7ef104453432a19212',
        'info_dict': {
            'id': '2017-01-07_2100_tl_54_7DaysSat18_31295',
            'ext': 'mp3',
            'title': 'Solid Steel Radioshow',
            'description': 'Die Mixshow von Coldcut und Ninja Tune.',
            'duration': 3599,
            'timestamp': 1483819257,
            'upload_date': '20170107',
        },
        'skip': 'Shows from ORF radios are only available for 7 days.'
    }


class ORFOE1IE(ORFRadioIE):
    IE_NAME = 'orf:oe1'
    IE_DESC = 'Radio Österreich 1'
    _VALID_URL = r'https?://(?P<station>oe1)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'

    _TEST = {
        'url': 'http://oe1.orf.at/player/20170108/456544',
        'md5': '34d8a6e67ea888293741c86a099b745b',
        'info_dict': {
            'id': '2017-01-08_0759_tl_51_7DaysSun6_256141',
            'ext': 'mp3',
            'title': 'Morgenjournal',
            'duration': 609,
            'timestamp': 1483858796,
            'upload_date': '20170108',
        },
        'skip': 'Shows from ORF radios are only available for 7 days.'
    }


class ORFIPTVIE(InfoExtractor):
    IE_NAME = 'orf:iptv'
    IE_DESC = 'iptv.ORF.at'
    _VALID_URL = r'https?://iptv\.orf\.at/(?:#/)?stories/(?P<id>\d+)'

    _TEST = {
        'url': 'http://iptv.orf.at/stories/2275236/',
        'md5': 'c8b22af4718a4b4af58342529453e3e5',
        'info_dict': {
            'id': '350612',
            'ext': 'flv',
            'title': 'Weitere Evakuierungen um Vulkan Calbuco',
            'description': 'md5:d689c959bdbcf04efeddedbf2299d633',
            'duration': 68.197,
            'thumbnail': r're:^https?://.*\.jpg$',
            'upload_date': '20150425',
        },
    }

    def _real_extract(self, url):
        story_id = self._match_id(url)

        webpage = self._download_webpage(
            'http://iptv.orf.at/stories/%s' % story_id, story_id)

        video_id = self._search_regex(
            r'data-video(?:id)?="(\d+)"', webpage, 'video id')

        data = self._download_json(
            'http://bits.orf.at/filehandler/static-api/json/current/data.json?file=%s' % video_id,
            video_id)[0]

        duration = float_or_none(data['duration'], 1000)

        video = data['sources']['default']
        load_balancer_url = video['loadBalancerUrl']
        abr = int_or_none(video.get('audioBitrate'))
        vbr = int_or_none(video.get('bitrate'))
        fps = int_or_none(video.get('videoFps'))
        width = int_or_none(video.get('videoWidth'))
        height = int_or_none(video.get('videoHeight'))
        thumbnail = video.get('preview')

        rendition = self._download_json(
            load_balancer_url, video_id, transform_source=strip_jsonp)

        f = {
            'abr': abr,
            'vbr': vbr,
            'fps': fps,
            'width': width,
            'height': height,
        }

        formats = []
        for format_id, format_url in rendition['redirect'].items():
            if format_id == 'rtmp':
                ff = f.copy()
                ff.update({
                    'url': format_url,
                    'format_id': format_id,
                })
                formats.append(ff)
            elif determine_ext(format_url) == 'f4m':
                formats.extend(self._extract_f4m_formats(
                    format_url, video_id, f4m_id=format_id))
            elif determine_ext(format_url) == 'm3u8':
                formats.extend(self._extract_m3u8_formats(
                    format_url, video_id, 'mp4', m3u8_id=format_id))
            else:
                continue
        self._sort_formats(formats)

        title = remove_end(self._og_search_title(webpage), ' - iptv.ORF.at')
        description = self._og_search_description(webpage)
        upload_date = unified_strdate(self._html_search_meta(
            'dc.date', webpage, 'upload date'))

        return {
            'id': video_id,
            'title': title,
            'description': description,
            'duration': duration,
            'thumbnail': thumbnail,
            'upload_date': upload_date,
            'formats': formats,
        }


class ORFFM4StoryIE(InfoExtractor):
    IE_NAME = 'orf:fm4:story'
    IE_DESC = 'fm4.orf.at stories'
    _VALID_URL = r'https?://fm4\.orf\.at/stories/(?P<id>\d+)'

    _TEST = {
        'url': 'http://fm4.orf.at/stories/2865738/',
        'playlist': [{
            'md5': 'e1c2c706c45c7b34cf478bbf409907ca',
            'info_dict': {
                'id': '547792',
                'ext': 'flv',
                'title': 'Manu Delago und Inner Tongue live',
                'description': 'Manu Delago und Inner Tongue haben bei der FM4 Soundpark Session live alles gegeben. Hier gibt es Fotos und die gesamte Session als Video.',
                'duration': 1748.52,
                'thumbnail': r're:^https?://.*\.jpg$',
                'upload_date': '20170913',
            },
        }, {
            'md5': 'c6dd2179731f86f4f55a7b49899d515f',
            'info_dict': {
                'id': '547798',
                'ext': 'flv',
                'title': 'Manu Delago und Inner Tongue live (2)',
                'duration': 1504.08,
                'thumbnail': r're:^https?://.*\.jpg$',
                'upload_date': '20170913',
                'description': 'Manu Delago und Inner Tongue haben bei der FM4 Soundpark Session live alles gegeben. Hier gibt es Fotos und die gesamte Session als Video.',
            },
        }],
    }

    def _real_extract(self, url):
        story_id = self._match_id(url)
        webpage = self._download_webpage(url, story_id)

        entries = []
        all_ids = orderedSet(re.findall(r'data-video(?:id)?="(\d+)"', webpage))
        for idx, video_id in enumerate(all_ids):
            data = self._download_json(
                'http://bits.orf.at/filehandler/static-api/json/current/data.json?file=%s' % video_id,
                video_id)[0]

            duration = float_or_none(data['duration'], 1000)

            video = data['sources']['q8c']
            load_balancer_url = video['loadBalancerUrl']
            abr = int_or_none(video.get('audioBitrate'))
            vbr = int_or_none(video.get('bitrate'))
            fps = int_or_none(video.get('videoFps'))
            width = int_or_none(video.get('videoWidth'))
            height = int_or_none(video.get('videoHeight'))
            thumbnail = video.get('preview')

            rendition = self._download_json(
                load_balancer_url, video_id, transform_source=strip_jsonp)

            f = {
                'abr': abr,
                'vbr': vbr,
                'fps': fps,
                'width': width,
                'height': height,
            }

            formats = []
            for format_id, format_url in rendition['redirect'].items():
                if format_id == 'rtmp':
                    ff = f.copy()
                    ff.update({
                        'url': format_url,
                        'format_id': format_id,
                    })
                    formats.append(ff)
                elif determine_ext(format_url) == 'f4m':
                    formats.extend(self._extract_f4m_formats(
                        format_url, video_id, f4m_id=format_id))
                elif determine_ext(format_url) == 'm3u8':
                    formats.extend(self._extract_m3u8_formats(
                        format_url, video_id, 'mp4', m3u8_id=format_id))
                else:
                    continue
            self._sort_formats(formats)

            title = remove_end(self._og_search_title(webpage), ' - fm4.ORF.at')
            if idx >= 1:
                # Titles are duplicates, make them unique
                title += ' (' + str(idx + 1) + ')'
            description = self._og_search_description(webpage)
            upload_date = unified_strdate(self._html_search_meta(
                'dc.date', webpage, 'upload date'))

            entries.append({
                'id': video_id,
                'title': title,
                'description': description,
                'duration': duration,
                'thumbnail': thumbnail,
                'upload_date': upload_date,
                'formats': formats,
            })

        return self.playlist_result(entries)
Fix orf.at extractor by adding file coding mark 2013-08-29 19:51:38 +02:00			`# coding: utf-8`
[orf] Use new extraction method (Fixes #2057) 2014-01-06 17:15:27 +01:00			`from __future__ import unicode_literals`
Fix orf.at extractor by adding file coding mark 2013-08-29 19:51:38 +02:00
[orf] Use new extraction method (Fixes #2057) 2014-01-06 17:15:27 +01:00			`import re`
Add an extractor for orf.at (closes #1346) Make find_xpath_attr also accept numbers in the value 2013-08-29 19:16:07 +02:00
			`from .common import InfoExtractor`
[orf:tvthek] Fix extraction and modernize (closes #10898) 2016-10-14 18:43:09 +02:00			`from ..compat import compat_str`
Add an extractor for orf.at (closes #1346) Make find_xpath_attr also accept numbers in the value 2013-08-29 19:16:07 +02:00			`from ..utils import (`
[orf] Add new extractor for f4m stories 2017-09-14 20:37:46 +02:00			`determine_ext,`
			`float_or_none,`
[orf] Use new extraction method (Fixes #2057) 2014-01-06 17:15:27 +01:00			`HEADRequest,`
[orf:iptv] Add extractor (Closes #5140) 2015-03-07 12:31:03 +01:00			`int_or_none,`
[orf] Add new extractor for f4m stories 2017-09-14 20:37:46 +02:00			`orderedSet,`
[orf:iptv] Add extractor (Closes #5140) 2015-03-07 12:31:03 +01:00			`remove_end,`
[orf] Add new extractor for f4m stories 2017-09-14 20:37:46 +02:00			`strip_jsonp,`
[orf:tvthek] Fix extraction and modernize (closes #10898) 2016-10-14 18:43:09 +02:00			`unescapeHTML,`
[orf] Add new extractor for f4m stories 2017-09-14 20:37:46 +02:00			`unified_strdate,`
[orf:tvthek] Improve extraction and remove unused code (closes #17956, closes #18024) 2018-11-02 17:46:56 +01:00			`url_or_none,`
Add an extractor for orf.at (closes #1346) Make find_xpath_attr also accept numbers in the value 2013-08-29 19:16:07 +02:00			`)`

[orf] Use new extraction method (Fixes #2057) 2014-01-06 17:15:27 +01:00
[orf] Move all ORF extractors in one place 2014-08-09 20:21:16 +02:00			`class ORFTVthekIE(InfoExtractor):`
			`IE_NAME = 'orf:tvthek'`
			`IE_DESC = 'ORF TVthek'`
[orf:tvthek] Fix extraction and modernize (closes #10898) 2016-10-14 18:43:09 +02:00			`_VALID_URL = r'https?://tvthek\.orf\.at/(?:[^/]+/)+(?P<id>\d+)'`
[orf] Use new extraction method (Fixes #2057) 2014-01-06 17:15:27 +01:00
[orf:tvthek] Add support for topic URLs (Fixes #4474) 2014-12-16 16:45:28 +01:00			`_TESTS = [{`
[orf] Modernize 2014-12-13 12:41:31 +01:00			`'url': 'http://tvthek.orf.at/program/Aufgetischt/2745173/Aufgetischt-Mit-der-Steirischen-Tafelrunde/8891389',`
			`'playlist': [{`
			`'md5': '2942210346ed779588f428a92db88712',`
			`'info_dict': {`
			`'id': '8896777',`
			`'ext': 'mp4',`
			`'title': 'Aufgetischt: Mit der Steirischen Tafelrunde',`
			`'description': 'md5:c1272f0245537812d4e36419c207b67d',`
			`'duration': 2668,`
			`'upload_date': '20141208',`
			`},`
			`}],`
[orf:tvthek] Add support for topic URLs (Fixes #4474) 2014-12-16 16:45:28 +01:00			`'skip': 'Blocked outside of Austria / Germany',`
			`}, {`
			`'url': 'http://tvthek.orf.at/topic/Im-Wandel-der-Zeit/8002126/Best-of-Ingrid-Thurnher/7982256',`
Fix unit tests for m3u8 and RTSP extractors that require ffmpeg or mplayer 2016-07-07 23:39:39 +02:00			`'info_dict': {`
			`'id': '7982259',`
			`'ext': 'mp4',`
			`'title': 'Best of Ingrid Thurnher',`
			`'upload_date': '20140527',`
			`'description': 'Viele Jahre war Ingrid Thurnher das "Gesicht" der ZIB 2. Vor ihrem Wechsel zur ZIB 2 im Jahr 1995 moderierte sie unter anderem "Land und Leute", "Österreich-Bild" und "Niederösterreich heute".',`
			`},`
			`'params': {`
			`'skip_download': True, # rtsp downloads`
			`},`
Fix some only matching tests (closes #14855) 2017-11-26 14:53:10 +01:00			`'skip': 'Blocked outside of Austria / Germany',`
[orf:tvthek] Fix extraction and modernize (closes #10898) 2016-10-14 18:43:09 +02:00			`}, {`
			`'url': 'http://tvthek.orf.at/topic/Fluechtlingskrise/10463081/Heimat-Fremde-Heimat/13879132/Senioren-betreuen-Migrantenkinder/13879141',`
Fix some only matching tests (closes #14855) 2017-11-26 14:53:10 +01:00			`'only_matching': True,`
[orf:tvthek] Fix extraction and modernize (closes #10898) 2016-10-14 18:43:09 +02:00			`}, {`
			`'url': 'http://tvthek.orf.at/profile/Universum/35429',`
Fix some only matching tests (closes #14855) 2017-11-26 14:53:10 +01:00			`'only_matching': True,`
[orf:tvthek] Add support for topic URLs (Fixes #4474) 2014-12-16 16:45:28 +01:00			`}]`
Add an extractor for orf.at (closes #1346) Make find_xpath_attr also accept numbers in the value 2013-08-29 19:16:07 +02:00
			`def _real_extract(self, url):`
[orf] Modernize 2014-12-13 12:41:31 +01:00			`playlist_id = self._match_id(url)`
Add an extractor for orf.at (closes #1346) Make find_xpath_attr also accept numbers in the value 2013-08-29 19:16:07 +02:00			`webpage = self._download_webpage(url, playlist_id)`

[orf:tvthek] Fix extraction and modernize (closes #10898) 2016-10-14 18:43:09 +02:00			`data_jsb = self._parse_json(`
			`self._search_regex(`
			`r'<div[^>]+class=(["\']).?VideoPlaylist.?\1[^>]+data-jsb=(["\'])(?P<json>.+?)\2',`
			`webpage, 'playlist', group='json'),`
			`playlist_id, transform_source=unescapeHTML)['playlist']['videos']`
[orf] Use new extraction method (Fixes #2057) 2014-01-06 17:15:27 +01:00
			`entries = []`
[orf:tvthek] Fix extraction and modernize (closes #10898) 2016-10-14 18:43:09 +02:00			`for sd in data_jsb:`
			`video_id, title = sd.get('id'), sd.get('title')`
			`if not video_id or not title:`
			`continue`
			`video_id = compat_str(video_id)`
[orf:tvthek] Fix extraction (closes #17737) use _extract_m3u8_formats and _extract_f4m_formats helper functions closes #17737 2018-10-30 23:44:50 +01:00			`formats = []`
			`for fd in sd['sources']:`
[orf:tvthek] Improve extraction and remove unused code (closes #17956, closes #18024) 2018-11-02 17:46:56 +01:00			`src = url_or_none(fd.get('src'))`
			`if not src:`
			`continue`
			`format_id_list = []`
			`for key in ('delivery', 'quality', 'quality_string'):`
			`value = fd.get(key)`
			`if value:`
			`format_id_list.append(value)`
			`format_id = '-'.join(format_id_list)`
[orf:tvthek] Fix extraction (closes #17737) use _extract_m3u8_formats and _extract_f4m_formats helper functions closes #17737 2018-10-30 23:44:50 +01:00			`if determine_ext(fd['src']) == 'm3u8':`
			`formats.extend(self._extract_m3u8_formats(`
			`fd['src'], video_id, 'mp4', m3u8_id=format_id))`
			`elif determine_ext(fd['src']) == 'f4m':`
			`formats.extend(self._extract_f4m_formats(`
			`fd['src'], video_id, f4m_id=format_id))`
[orf:tvthek] Improve extraction and remove unused code (closes #17956, closes #18024) 2018-11-02 17:46:56 +01:00			`else:`
			`formats.append({`
			`'format_id': format_id,`
			`'url': src,`
			`'protocol': fd.get('protocol'),`
			`})`
[orf] Use new extraction method (Fixes #2057) 2014-01-06 17:15:27 +01:00
			`# Check for geoblocking.`
			`# There is a property is_geoprotection, but that's always false`
			`geo_str = sd.get('geoprotection_string')`
			`if geo_str:`
			`try:`
			`http_url = next(`
			`f['url']`
			`for f in formats`
			`if re.match(r'^https?://.*\.mp4$', f['url']))`
			`except StopIteration:`
			`pass`
			`else:`
			`req = HEADRequest(http_url)`
[orf] Remove unused variable name 2014-01-07 05:51:46 +01:00			`self._request_webpage(`
[orf] Use new extraction method (Fixes #2057) 2014-01-06 17:15:27 +01:00			`req, video_id,`
			`note='Testing for geoblocking',`
			`errnote=((`
			`'This video seems to be blocked outside of %s. '`
			`'You may want to try the streaming-* formats.')`
			`% geo_str),`
			`fatal=False)`

[orf:tvthek] Check formats (Closes #8580) 2016-02-16 17:23:38 +01:00			`self._check_formats(formats, video_id)`
[orf] Use new extraction method (Fixes #2057) 2014-01-06 17:15:27 +01:00			`self._sort_formats(formats)`

[orf] add subtitles support(closes #10939) 2016-10-19 12:34:15 +02:00			`subtitles = {}`
			`for sub in sd.get('subtitles', []):`
			`sub_src = sub.get('src')`
			`if not sub_src:`
			`continue`
			`subtitles.setdefault(sub.get('lang', 'de-AT'), []).append({`
			`'url': sub_src,`
			`})`

[orf:tvthek] Fix extraction and modernize (closes #10898) 2016-10-14 18:43:09 +02:00			`upload_date = unified_strdate(sd.get('created_date'))`
[orf] Use new extraction method (Fixes #2057) 2014-01-06 17:15:27 +01:00			`entries.append({`
Add an extractor for orf.at (closes #1346) Make find_xpath_attr also accept numbers in the value 2013-08-29 19:16:07 +02:00			`'_type': 'video',`
[orf] Use new extraction method (Fixes #2057) 2014-01-06 17:15:27 +01:00			`'id': video_id,`
[orf:tvthek] Fix extraction and modernize (closes #10898) 2016-10-14 18:43:09 +02:00			`'title': title,`
[orf] Use new extraction method (Fixes #2057) 2014-01-06 17:15:27 +01:00			`'formats': formats,`
[orf] add subtitles support(closes #10939) 2016-10-19 12:34:15 +02:00			`'subtitles': subtitles,`
[orf] Use new extraction method (Fixes #2057) 2014-01-06 17:15:27 +01:00			`'description': sd.get('description'),`
[orf:tvthek] Fix extraction and modernize (closes #10898) 2016-10-14 18:43:09 +02:00			`'duration': int_or_none(sd.get('duration_in_seconds')),`
[orf] Use new extraction method (Fixes #2057) 2014-01-06 17:15:27 +01:00			`'upload_date': upload_date,`
			`'thumbnail': sd.get('image_full_url'),`
			`})`

			`return {`
			`'_type': 'playlist',`
			`'entries': entries,`
			`'id': playlist_id,`
			`}`
[orf] Move all ORF extractors in one place 2014-08-09 20:21:16 +02:00

[orf:radio] Fix extraction Since oe1.orf.at has been updated, both ORF radios supported by youtube_dl use the same API. This commit honors this fact by merging both extractors into one. 2017-01-08 15:49:54 +01:00			`class ORFRadioIE(InfoExtractor):`
[orf] Move all ORF extractors in one place 2014-08-09 20:21:16 +02:00			`def _real_extract(self, url):`
			`mobj = re.match(self._VALID_URL, url)`
[orf:radio] Fix extraction Since oe1.orf.at has been updated, both ORF radios supported by youtube_dl use the same API. This commit honors this fact by merging both extractors into one. 2017-01-08 15:49:54 +01:00			`station = mobj.group('station')`
[orf] Move all ORF extractors in one place 2014-08-09 20:21:16 +02:00			`show_date = mobj.group('date')`
			`show_id = mobj.group('show')`

[orf:radio] Fix extraction Since oe1.orf.at has been updated, both ORF radios supported by youtube_dl use the same API. This commit honors this fact by merging both extractors into one. 2017-01-08 15:49:54 +01:00			`if station == 'fm4':`
			`show_id = '4%s' % show_id`

[orf] Move all ORF extractors in one place 2014-08-09 20:21:16 +02:00			`data = self._download_json(`
[orf:radio] Fix extraction Since oe1.orf.at has been updated, both ORF radios supported by youtube_dl use the same API. This commit honors this fact by merging both extractors into one. 2017-01-08 15:49:54 +01:00			`'http://audioapi.orf.at/%s/api/json/current/broadcast/%s/%s' % (station, show_id, show_date),`
[orf] Move all ORF extractors in one place 2014-08-09 20:21:16 +02:00			`show_id`
			`)`

			`def extract_entry_dict(info, title, subtitle):`
			`return {`
			`'id': info['loopStreamId'].replace('.mp3', ''),`
[orf:radio] Fix extraction Since oe1.orf.at has been updated, both ORF radios supported by youtube_dl use the same API. This commit honors this fact by merging both extractors into one. 2017-01-08 15:49:54 +01:00			`'url': 'http://loopstream01.apa.at/?channel=%s&id=%s' % (station, info['loopStreamId']),`
[orf] Move all ORF extractors in one place 2014-08-09 20:21:16 +02:00			`'title': title,`
			`'description': subtitle,`
			`'duration': (info['end'] - info['start']) / 1000,`
			`'timestamp': info['start'] / 1000,`
			`'ext': 'mp3'`
			`}`

			`entries = [extract_entry_dict(t, data['title'], data['subtitle']) for t in data['streams']]`

			`return {`
			`'_type': 'playlist',`
			`'id': show_id,`
			`'title': data['title'],`
			`'description': data['subtitle'],`
			`'entries': entries`
PEP8 applied 2014-11-23 20:41:03 +01:00			`}`
[orf:iptv] Add extractor (Closes #5140) 2015-03-07 12:31:03 +01:00

[orf:radio] Fix extraction Since oe1.orf.at has been updated, both ORF radios supported by youtube_dl use the same API. This commit honors this fact by merging both extractors into one. 2017-01-08 15:49:54 +01:00			`class ORFFM4IE(ORFRadioIE):`
			`IE_NAME = 'orf:fm4'`
			`IE_DESC = 'radio FM4'`
[orf:radio] Cleanup _VALID_URLs (closes #11643) 2017-05-13 23:30:29 +02:00			`_VALID_URL = r'https?://(?P<station>fm4)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'`
[orf:radio] Fix extraction Since oe1.orf.at has been updated, both ORF radios supported by youtube_dl use the same API. This commit honors this fact by merging both extractors into one. 2017-01-08 15:49:54 +01:00
[orf:radio] Cleanup _VALID_URLs (closes #11643) 2017-05-13 23:30:29 +02:00			`_TEST = {`
			`'url': 'http://fm4.orf.at/player/20170107/CC',`
			`'md5': '2b0be47375432a7ef104453432a19212',`
			`'info_dict': {`
			`'id': '2017-01-07_2100_tl_54_7DaysSat18_31295',`
			`'ext': 'mp3',`
			`'title': 'Solid Steel Radioshow',`
			`'description': 'Die Mixshow von Coldcut und Ninja Tune.',`
			`'duration': 3599,`
			`'timestamp': 1483819257,`
			`'upload_date': '20170107',`
			`},`
			`'skip': 'Shows from ORF radios are only available for 7 days.'`
			`}`
[orf:radio] Fix extraction Since oe1.orf.at has been updated, both ORF radios supported by youtube_dl use the same API. This commit honors this fact by merging both extractors into one. 2017-01-08 15:49:54 +01:00

			`class ORFOE1IE(ORFRadioIE):`
			`IE_NAME = 'orf:oe1'`
			`IE_DESC = 'Radio Österreich 1'`
[orf:radio] Cleanup _VALID_URLs (closes #11643) 2017-05-13 23:30:29 +02:00			`_VALID_URL = r'https?://(?P<station>oe1)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'`
[orf:radio] Fix extraction Since oe1.orf.at has been updated, both ORF radios supported by youtube_dl use the same API. This commit honors this fact by merging both extractors into one. 2017-01-08 15:49:54 +01:00
[orf:radio] Cleanup _VALID_URLs (closes #11643) 2017-05-13 23:30:29 +02:00			`_TEST = {`
			`'url': 'http://oe1.orf.at/player/20170108/456544',`
			`'md5': '34d8a6e67ea888293741c86a099b745b',`
			`'info_dict': {`
			`'id': '2017-01-08_0759_tl_51_7DaysSun6_256141',`
			`'ext': 'mp3',`
			`'title': 'Morgenjournal',`
			`'duration': 609,`
			`'timestamp': 1483858796,`
			`'upload_date': '20170108',`
			`},`
			`'skip': 'Shows from ORF radios are only available for 7 days.'`
			`}`
[orf:radio] Fix extraction Since oe1.orf.at has been updated, both ORF radios supported by youtube_dl use the same API. This commit honors this fact by merging both extractors into one. 2017-01-08 15:49:54 +01:00

[orf:iptv] Add extractor (Closes #5140) 2015-03-07 12:31:03 +01:00			`class ORFIPTVIE(InfoExtractor):`
			`IE_NAME = 'orf:iptv'`
			`IE_DESC = 'iptv.ORF.at'`
Add support for https for all extractors as preventive and future-proof measure 2016-03-21 16:36:32 +01:00			`_VALID_URL = r'https?://iptv\.orf\.at/(?:#/)?stories/(?P<id>\d+)'`
[orf:iptv] Add extractor (Closes #5140) 2015-03-07 12:31:03 +01:00
			`_TEST = {`
[orf:iptv] Update test 2015-04-25 17:06:27 +02:00			`'url': 'http://iptv.orf.at/stories/2275236/',`
			`'md5': 'c8b22af4718a4b4af58342529453e3e5',`
[orf:iptv] Add extractor (Closes #5140) 2015-03-07 12:31:03 +01:00			`'info_dict': {`
[orf:iptv] Update test 2015-04-25 17:06:27 +02:00			`'id': '350612',`
[orf:iptv] Add extractor (Closes #5140) 2015-03-07 12:31:03 +01:00			`'ext': 'flv',`
[orf:iptv] Update test 2015-04-25 17:06:27 +02:00			`'title': 'Weitere Evakuierungen um Vulkan Calbuco',`
			`'description': 'md5:d689c959bdbcf04efeddedbf2299d633',`
			`'duration': 68.197,`
Fix "invalid escape sequences" error on Python 3.6 2017-01-02 13:08:07 +01:00			`'thumbnail': r're:^https?://.*\.jpg$',`
[orf:iptv] Update test 2015-04-25 17:06:27 +02:00			`'upload_date': '20150425',`
[orf:iptv] Add extractor (Closes #5140) 2015-03-07 12:31:03 +01:00			`},`
			`}`

			`def _real_extract(self, url):`
			`story_id = self._match_id(url)`

			`webpage = self._download_webpage(`
			`'http://iptv.orf.at/stories/%s' % story_id, story_id)`

			`video_id = self._search_regex(`
			`r'data-video(?:id)?="(\d+)"', webpage, 'video id')`

			`data = self._download_json(`
			`'http://bits.orf.at/filehandler/static-api/json/current/data.json?file=%s' % video_id,`
			`video_id)[0]`

			`duration = float_or_none(data['duration'], 1000)`

			`video = data['sources']['default']`
			`load_balancer_url = video['loadBalancerUrl']`
			`abr = int_or_none(video.get('audioBitrate'))`
			`vbr = int_or_none(video.get('bitrate'))`
			`fps = int_or_none(video.get('videoFps'))`
			`width = int_or_none(video.get('videoWidth'))`
			`height = int_or_none(video.get('videoHeight'))`
			`thumbnail = video.get('preview')`

			`rendition = self._download_json(`
			`load_balancer_url, video_id, transform_source=strip_jsonp)`

			`f = {`
			`'abr': abr,`
			`'vbr': vbr,`
			`'fps': fps,`
			`'width': width,`
			`'height': height,`
			`}`

			`formats = []`
			`for format_id, format_url in rendition['redirect'].items():`
			`if format_id == 'rtmp':`
			`ff = f.copy()`
			`ff.update({`
			`'url': format_url,`
			`'format_id': format_id,`
			`})`
			`formats.append(ff)`
			`elif determine_ext(format_url) == 'f4m':`
			`formats.extend(self._extract_f4m_formats(`
			`format_url, video_id, f4m_id=format_id))`
			`elif determine_ext(format_url) == 'm3u8':`
			`formats.extend(self._extract_m3u8_formats(`
			`format_url, video_id, 'mp4', m3u8_id=format_id))`
			`else:`
			`continue`
			`self._sort_formats(formats)`

			`title = remove_end(self._og_search_title(webpage), ' - iptv.ORF.at')`
			`description = self._og_search_description(webpage)`
			`upload_date = unified_strdate(self._html_search_meta(`
			`'dc.date', webpage, 'upload date'))`

			`return {`
			`'id': video_id,`
			`'title': title,`
			`'description': description,`
			`'duration': duration,`
			`'thumbnail': thumbnail,`
			`'upload_date': upload_date,`
			`'formats': formats,`
			`}`
[orf] Add new extractor for f4m stories 2017-09-14 20:37:46 +02:00

			`class ORFFM4StoryIE(InfoExtractor):`
			`IE_NAME = 'orf:fm4:story'`
			`IE_DESC = 'fm4.orf.at stories'`
			`_VALID_URL = r'https?://fm4\.orf\.at/stories/(?P<id>\d+)'`

			`_TEST = {`
			`'url': 'http://fm4.orf.at/stories/2865738/',`
			`'playlist': [{`
			`'md5': 'e1c2c706c45c7b34cf478bbf409907ca',`
			`'info_dict': {`
			`'id': '547792',`
			`'ext': 'flv',`
			`'title': 'Manu Delago und Inner Tongue live',`
			`'description': 'Manu Delago und Inner Tongue haben bei der FM4 Soundpark Session live alles gegeben. Hier gibt es Fotos und die gesamte Session als Video.',`
			`'duration': 1748.52,`
			`'thumbnail': r're:^https?://.*\.jpg$',`
			`'upload_date': '20170913',`
			`},`
			`}, {`
			`'md5': 'c6dd2179731f86f4f55a7b49899d515f',`
			`'info_dict': {`
			`'id': '547798',`
			`'ext': 'flv',`
			`'title': 'Manu Delago und Inner Tongue live (2)',`
			`'duration': 1504.08,`
			`'thumbnail': r're:^https?://.*\.jpg$',`
			`'upload_date': '20170913',`
			`'description': 'Manu Delago und Inner Tongue haben bei der FM4 Soundpark Session live alles gegeben. Hier gibt es Fotos und die gesamte Session als Video.',`
			`},`
			`}],`
			`}`

			`def _real_extract(self, url):`
			`story_id = self._match_id(url)`
			`webpage = self._download_webpage(url, story_id)`

			`entries = []`
			`all_ids = orderedSet(re.findall(r'data-video(?:id)?="(\d+)"', webpage))`
			`for idx, video_id in enumerate(all_ids):`
			`data = self._download_json(`
			`'http://bits.orf.at/filehandler/static-api/json/current/data.json?file=%s' % video_id,`
			`video_id)[0]`

			`duration = float_or_none(data['duration'], 1000)`

			`video = data['sources']['q8c']`
			`load_balancer_url = video['loadBalancerUrl']`
			`abr = int_or_none(video.get('audioBitrate'))`
			`vbr = int_or_none(video.get('bitrate'))`
			`fps = int_or_none(video.get('videoFps'))`
			`width = int_or_none(video.get('videoWidth'))`
			`height = int_or_none(video.get('videoHeight'))`
			`thumbnail = video.get('preview')`

			`rendition = self._download_json(`
			`load_balancer_url, video_id, transform_source=strip_jsonp)`

			`f = {`
			`'abr': abr,`
			`'vbr': vbr,`
			`'fps': fps,`
			`'width': width,`
			`'height': height,`
			`}`

			`formats = []`
			`for format_id, format_url in rendition['redirect'].items():`
			`if format_id == 'rtmp':`
			`ff = f.copy()`
			`ff.update({`
			`'url': format_url,`
			`'format_id': format_id,`
			`})`
			`formats.append(ff)`
			`elif determine_ext(format_url) == 'f4m':`
			`formats.extend(self._extract_f4m_formats(`
			`format_url, video_id, f4m_id=format_id))`
			`elif determine_ext(format_url) == 'm3u8':`
			`formats.extend(self._extract_m3u8_formats(`
			`format_url, video_id, 'mp4', m3u8_id=format_id))`
			`else:`
			`continue`
			`self._sort_formats(formats)`

			`title = remove_end(self._og_search_title(webpage), ' - fm4.ORF.at')`
			`if idx >= 1:`
			`# Titles are duplicates, make them unique`
			`title += ' (' + str(idx + 1) + ')'`
			`description = self._og_search_description(webpage)`
			`upload_date = unified_strdate(self._html_search_meta(`
			`'dc.date', webpage, 'upload date'))`

			`entries.append({`
			`'id': video_id,`
			`'title': title,`
			`'description': description,`
			`'duration': duration,`
			`'thumbnail': thumbnail,`
			`'upload_date': upload_date,`
			`'formats': formats,`
			`})`

			`return self.playlist_result(entries)`