[corepoweryogaondemand] Add new extractor

2025-02-18 18:17:55 +01:00 · 2020-03-19 17:22:43 -06:00 · 2020-03-19 17:22:43 -06:00 · 060b586620
commit 060b586620
parent 158bc5ac03
2 changed files with 58 additions and 0 deletions
--- a/youtube_dl/extractor/corepoweryogaondemand.py
+++ b/youtube_dl/extractor/corepoweryogaondemand.py
@ -0,0 +1,57 @@
 # coding: utf-8
 from __future__ import unicode_literals
 from .common import InfoExtractor
 from ..utils import (
    int_or_none,
 )
 class CorePowerYogaOnDemandIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?corepoweryogaondemand\.com/keep-up-your-practice/videos/(?P<id>[-a-zA-Z0-9]+)'
    _TEST = {
        'url': 'https://www.corepoweryogaondemand.com/keep-up-your-practice/videos/trust-the-unknown',
        'md5': 'b29716154060ddbf2defb7cd9a11492e',
        'info_dict': {
            'id': 'trust-the-unknown',
            'ext': 'mp4',
            'title': 'Trust the Unknown',
            'description': 'Trust in the infinite unknown and let your breath guide you.'
        }
    }
    def _extract_format(self, raw_format):
        return {
            'url': raw_format['url'],
            'height': raw_format.get('height'),
            'width': raw_format.get('width'),
            'fps': raw_format.get('fps'),
            'resolution': raw_format.get('quality')
        }
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        title = self._html_search_regex(r'<title>([^<]+) - [^<]+ - [^<]+</title>', webpage, 'title', default=None)
        # extract iframe
        iframe_url = self._search_regex(r'<iframe[^>]+src=\"https://embed\.vhx\.tv/(?P<url>[^\"]+)\"', webpage, 'embedded video iframe', group="url")
        iframe = self._download_webpage('https://embed.vhx.tv/' + iframe_url, 'Fetch embedded video')
        # vimeo config URL
        ott_data = self._search_regex(r'window\.OTTData\s*=\s*(?P<json>{.+})', iframe, 'video metadata', group="json")
        video_metadata = self._parse_json(ott_data, video_id)
        vimeo_url = video_metadata['config_url']
        # fetch media from config URL
        video_config = self._parse_json(self._download_webpage(vimeo_url, 'Fetch media information'), video_id)
        raw_formats = video_config['request']['files']['progressive']
        formats = [self._extract_format(i) for i in raw_formats]
        sorted_formats = sorted(formats, key=lambda i: int_or_none(i['resolution'][:-1]))
        return {
            'id': video_id,
            'title': title,
            'formats': sorted_formats,
            'description': self._og_search_description(webpage, default=None),
        }
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -230,6 +230,7 @@ from .commonprotocols import (
 )
 from .condenast import CondeNastIE
 from .contv import CONtvIE
 from .corepoweryogaondemand import CorePowerYogaOnDemandIE
 from .corus import CorusIE
 from .cracked import CrackedIE
 from .crackle import CrackleIE