Merge 406dab4edf into 48c5663c5f

2020-10-22 11:56:26 -04:00 · 2020-10-22 11:56:26 -04:00 · 57735b4016
parent 48c5663c5f 406dab4edf
commit 57735b4016
3 changed files with 151 additions and 0 deletions
--- a/youtube_dl/extractor/dropout.py
+++ b/youtube_dl/extractor/dropout.py
@ -0,0 +1,117 @@
 # coding: utf-8
 from __future__ import unicode_literals
 from .vimeo import VHXEmbedIE
 from ..utils import ExtractorError
 import re
 class DropoutIE(VHXEmbedIE):
    IE_NAME = 'dropout'
    IE_DESC = 'Dropout.tv'
    _NETRC_MACHINE = 'dropouttv'
    _LOGIN_URL = 'https://www.dropout.tv/login'
    _LOGOUT_URL = 'https://www.dropout.tv/logout'
    _VALID_URL = r'https://www\.dropout\.tv/(?:[^/]+/(?:season:[^/]/)?)?videos/(?P<id>.+)'
    _TESTS = [
        {
            'url': 'https://www.dropout.tv/dimension-20-tiny-heist/season:1/videos/big-little-crimes',
            'md5': '46edf4c6d632e2771a42a235f920b8f7',
            'info_dict': {
                'id': '382486557',
                'ext': 'mp4',
                'uploader': 'OTT Videos',
                'uploader_id': 'user80538407',
                'title': "Untitled",
                'thumbnail': r're:^https://i.vimeocdn.com/.*\.jpg$',
            }
        },
        {
            'url': 'https://www.dropout.tv/videos/um-actually-behind-the-scenes',
            'md5': '7fd342c652a86b996bae2920695593af',
            'info_dict': {
                'id': '265656116',
                'ext': 'mp4',
                'uploader': 'OTT Videos',
                'uploader_id': 'user80538407',
                'title': 'Um, Actually: Behind the Scenes',
                'thumbnail': r're:^https://i.vimeocdn.com/.*\.jpg$',
            }
        }
    ]
    def _real_initialize(self):
        self._login()
    def _login(self):
        email, password = self._get_login_info()
        if (email is None or password is None) and self._downloader.params.get('cookiefile') is None:
                raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
        self._vhx_login(email, password, self._LOGIN_URL)
    def _real_extract(self, url):
        webpage = self._download_webpage(url, None)
        if "The device limit for your account has been reached" in webpage:
            raise ExtractorError('Device Limit reached', expected=True)
        if "Start your free trial" in webpage or "Start Free Trial" in webpage or "Sign in" in webpage:
            raise ExtractorError('You don\'t seem to be logged in', expected=True)
        video = self._html_search_regex(r'<iframe[^>]*"(?P<embed>https://embed.vhx.tv/videos/[0-9]+[^"]*)"[^>]*>', webpage, 'embed')
        video_id = self._search_regex(r'https://embed.vhx.tv/videos/(?P<id>[0-9]+)', video, 'id')
        video_title = self._html_search_regex(r'<h1 class="[^"]*video-title[^"]*"[^>]*>\s*<strong>(?P<title>[^<]+)<', webpage, 'title', fatal=False)
        return self.url_result(video, video_id=video_id, video_title=video_title)
 class DropoutPlaylistIE(DropoutIE):
    IE_NAME = 'dropout:playlist'
    _VALID_URL = r'https://www\.dropout\.tv/(?P<id>.+)'
    _TESTS = [
        {
            'url': 'https://www.dropout.tv/um-actually',
            'md5': 'ebcd26ef54f546225e7cb96e79da31cc',
            'playlist_count': 33,
            'info_dict': {
                'id': 'um-actually',
                'title': 'Um, Actually',
            }
        },
        {
            'url': 'https://www.dropout.tv/new-releases',
            'md5': 'ebcd26ef54f546225e7cb96e79da31cc',
            'playlist_count': 15,
            'info_dict': {
                'id': 'new-releases',
                'title': 'New Releases',
            }
        },
        {
            'url': 'https://www.dropout.tv/troopers-the-web-series/season:2',
            'md5': 'ebcd26ef54f546225e7cb96e79da31cc',
            'playlist_count': 10,
            'info_dict': {
                'id': 'troopers-the-web-series/season:2',
                'title': 'Troopers: The Web Series',
            }
        }
    ]
    @classmethod
    def suitable(cls, url):
        return False if DropoutIE.suitable(url) else super(DropoutPlaylistIE, cls).suitable(url)
    def _real_extract(self, url):
        playlist_id = self._match_id(url)
        webpage = self._download_webpage(url, playlist_id)
        playlist_title = self._html_search_regex(r'<h1 class="[^"]*collection-title[^"]*"[^>]*>(?P<title>[^<]+)<', webpage, 'title')    
        items = []
        while True:
            items.extend(re.findall(r'browse-item-title[^>]+>[^<]*<a href="(?P<url>https://www.dropout.tv/[^/]+/[^"]+)"', webpage))
            next_page_url = self._search_regex(r'href="([^"]+\?[^"]*(?:&|&amp;)?page=\d+)"', webpage, 'next page url', default=None)
            if not next_page_url:
                break
            webpage = self._download_webpage('https://www.dropout.tv' + next_page_url, playlist_id)
        return self.playlist_from_matches(items, playlist_id=playlist_id, playlist_title=playlist_title)
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -294,6 +294,10 @@ from .discoveryvr import DiscoveryVRIE
 from .disney import DisneyIE
 from .dispeak import DigitallySpeakingIE
 from .dropbox import DropboxIE
 from .dropout import (
    DropoutIE,
    DropoutPlaylistIE,
 )
 from .dw import (
    DWIE,
    DWArticleIE,
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@ -1116,6 +1116,36 @@ class VHXEmbedIE(VimeoBaseInfoExtractor):
    IE_NAME = 'vhx:embed'
    _VALID_URL = r'https?://embed\.vhx\.tv/videos/(?P<id>\d+)'
    def _vhx_login(self, email, password, login_url):
        login_page = self._download_webpage(
            login_url, None,
            note='Downloading login page',
            errnote='unable to fetch login page'
        )
        """check if user is already logged in via cookies"""
        if "You are now signed in." in login_page:
            return
        login_form = self._hidden_inputs(login_page)
        login_form.update({
            'passwordless': 0,
            'email': email,
            'password': password
        })
        self._download_webpage(login_url, None, 'Logging in', 'Login failed',
                               expected_status=302,
                               data=urlencode_postdata(login_form),
                               headers={'Content-Type': 'application/x-www-form-urlencoded'})
    def _call_api(self, video_id, access_token, path='', query=None):
        return self._download_json(
            'https://api.vhx.tv/videos/' + video_id + path, video_id, headers={
                'Authorization': 'Bearer ' + access_token,
            }, query=query)
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)