From 060b58662011ea9c821d5287ff163dc25aa18188 Mon Sep 17 00:00:00 2001 From: user-c89703cc76e2ccd2 Date: Thu, 19 Mar 2020 17:22:43 -0600 Subject: [PATCH] [corepoweryogaondemand] Add new extractor --- youtube_dl/extractor/corepoweryogaondemand.py | 57 +++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + 2 files changed, 58 insertions(+) create mode 100644 youtube_dl/extractor/corepoweryogaondemand.py diff --git a/youtube_dl/extractor/corepoweryogaondemand.py b/youtube_dl/extractor/corepoweryogaondemand.py new file mode 100644 index 000000000..ea3211f21 --- /dev/null +++ b/youtube_dl/extractor/corepoweryogaondemand.py @@ -0,0 +1,57 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + int_or_none, +) + + +class CorePowerYogaOnDemandIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?corepoweryogaondemand\.com/keep-up-your-practice/videos/(?P[-a-zA-Z0-9]+)' + _TEST = { + 'url': 'https://www.corepoweryogaondemand.com/keep-up-your-practice/videos/trust-the-unknown', + 'md5': 'b29716154060ddbf2defb7cd9a11492e', + 'info_dict': { + 'id': 'trust-the-unknown', + 'ext': 'mp4', + 'title': 'Trust the Unknown', + 'description': 'Trust in the infinite unknown and let your breath guide you.' + } + } + + def _extract_format(self, raw_format): + return { + 'url': raw_format['url'], + 'height': raw_format.get('height'), + 'width': raw_format.get('width'), + 'fps': raw_format.get('fps'), + 'resolution': raw_format.get('quality') + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + title = self._html_search_regex(r'([^<]+) - [^<]+ - [^<]+', webpage, 'title', default=None) + + # extract iframe + iframe_url = self._search_regex(r']+src=\"https://embed\.vhx\.tv/(?P[^\"]+)\"', webpage, 'embedded video iframe', group="url") + iframe = self._download_webpage('https://embed.vhx.tv/' + iframe_url, 'Fetch embedded video') + + # vimeo config URL + ott_data = self._search_regex(r'window\.OTTData\s*=\s*(?P{.+})', iframe, 'video metadata', group="json") + video_metadata = self._parse_json(ott_data, video_id) + vimeo_url = video_metadata['config_url'] + + # fetch media from config URL + video_config = self._parse_json(self._download_webpage(vimeo_url, 'Fetch media information'), video_id) + raw_formats = video_config['request']['files']['progressive'] + formats = [self._extract_format(i) for i in raw_formats] + sorted_formats = sorted(formats, key=lambda i: int_or_none(i['resolution'][:-1])) + + return { + 'id': video_id, + 'title': title, + 'formats': sorted_formats, + 'description': self._og_search_description(webpage, default=None), + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 64d1fa251..42090db64 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -230,6 +230,7 @@ from .commonprotocols import ( ) from .condenast import CondeNastIE from .contv import CONtvIE +from .corepoweryogaondemand import CorePowerYogaOnDemandIE from .corus import CorusIE from .cracked import CrackedIE from .crackle import CrackleIE