Merge 406dab4edf into 48c5663c5f

2020-10-22 11:56:26 -04:00 · 2020-10-22 11:56:26 -04:00 · 57735b4016
parent 48c5663c5f 406dab4edf
commit 57735b4016
3 changed files with 151 additions and 0 deletions
--- a/youtube_dl/extractor/dropout.py
+++ b/youtube_dl/extractor/dropout.py
@ -0,0 +1,117 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .vimeo import VHXEmbedIE
+
+from ..utils import ExtractorError
+
+import re
+
+
+class DropoutIE(VHXEmbedIE):
+    IE_NAME = 'dropout'
+    IE_DESC = 'Dropout.tv'
+    _NETRC_MACHINE = 'dropouttv'
+    _LOGIN_URL = 'https://www.dropout.tv/login'
+    _LOGOUT_URL = 'https://www.dropout.tv/logout'
+    _VALID_URL = r'https://www\.dropout\.tv/(?:[^/]+/(?:season:[^/]/)?)?videos/(?P<id>.+)'
+    _TESTS = [
+        {
+            'url': 'https://www.dropout.tv/dimension-20-tiny-heist/season:1/videos/big-little-crimes',
+            'md5': '46edf4c6d632e2771a42a235f920b8f7',
+            'info_dict': {
+                'id': '382486557',
+                'ext': 'mp4',
+                'uploader': 'OTT Videos',
+                'uploader_id': 'user80538407',
+                'title': "Untitled",
+                'thumbnail': r're:^https://i.vimeocdn.com/.*\.jpg$',
+            }
+        },
+        {
+            'url': 'https://www.dropout.tv/videos/um-actually-behind-the-scenes',
+            'md5': '7fd342c652a86b996bae2920695593af',
+            'info_dict': {
+                'id': '265656116',
+                'ext': 'mp4',
+                'uploader': 'OTT Videos',
+                'uploader_id': 'user80538407',
+                'title': 'Um, Actually: Behind the Scenes',
+                'thumbnail': r're:^https://i.vimeocdn.com/.*\.jpg$',
+            }
+        }
+    ]
+
+    def _real_initialize(self):
+        self._login()
+
+    def _login(self):
+        email, password = self._get_login_info()
+        if (email is None or password is None) and self._downloader.params.get('cookiefile') is None:
+                raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
+        self._vhx_login(email, password, self._LOGIN_URL)
+
+    def _real_extract(self, url):
+        webpage = self._download_webpage(url, None)
+        if "The device limit for your account has been reached" in webpage:
+            raise ExtractorError('Device Limit reached', expected=True)
+        if "Start your free trial" in webpage or "Start Free Trial" in webpage or "Sign in" in webpage:
+            raise ExtractorError('You don\'t seem to be logged in', expected=True)
+
+        video = self._html_search_regex(r'<iframe[^>]*"(?P<embed>https://embed.vhx.tv/videos/[0-9]+[^"]*)"[^>]*>', webpage, 'embed')
+        video_id = self._search_regex(r'https://embed.vhx.tv/videos/(?P<id>[0-9]+)', video, 'id')
+        video_title = self._html_search_regex(r'<h1 class="[^"]*video-title[^"]*"[^>]*>\s*<strong>(?P<title>[^<]+)<', webpage, 'title', fatal=False)
+        return self.url_result(video, video_id=video_id, video_title=video_title)
+
+
+class DropoutPlaylistIE(DropoutIE):
+    IE_NAME = 'dropout:playlist'
+    _VALID_URL = r'https://www\.dropout\.tv/(?P<id>.+)'
+    _TESTS = [
+        {
+            'url': 'https://www.dropout.tv/um-actually',
+            'md5': 'ebcd26ef54f546225e7cb96e79da31cc',
+            'playlist_count': 33,
+            'info_dict': {
+                'id': 'um-actually',
+                'title': 'Um, Actually',
+            }
+        },
+        {
+            'url': 'https://www.dropout.tv/new-releases',
+            'md5': 'ebcd26ef54f546225e7cb96e79da31cc',
+            'playlist_count': 15,
+            'info_dict': {
+                'id': 'new-releases',
+                'title': 'New Releases',
+            }
+        },
+        {
+            'url': 'https://www.dropout.tv/troopers-the-web-series/season:2',
+            'md5': 'ebcd26ef54f546225e7cb96e79da31cc',
+            'playlist_count': 10,
+            'info_dict': {
+                'id': 'troopers-the-web-series/season:2',
+                'title': 'Troopers: The Web Series',
+            }
+        }
+    ]
+
+    @classmethod
+    def suitable(cls, url):
+        return False if DropoutIE.suitable(url) else super(DropoutPlaylistIE, cls).suitable(url)
+
+    def _real_extract(self, url):
+        playlist_id = self._match_id(url)
+        webpage = self._download_webpage(url, playlist_id)
+        playlist_title = self._html_search_regex(r'<h1 class="[^"]*collection-title[^"]*"[^>]*>(?P<title>[^<]+)<', webpage, 'title')    
+
+        items = []
+        while True:
+            items.extend(re.findall(r'browse-item-title[^>]+>[^<]*<a href="(?P<url>https://www.dropout.tv/[^/]+/[^"]+)"', webpage))
+            next_page_url = self._search_regex(r'href="([^"]+\?[^"]*(?:&|&amp;)?page=\d+)"', webpage, 'next page url', default=None)
+            if not next_page_url:
+                break
+            webpage = self._download_webpage('https://www.dropout.tv' + next_page_url, playlist_id)
+
+        return self.playlist_from_matches(items, playlist_id=playlist_id, playlist_title=playlist_title)
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -294,6 +294,10 @@ from .discoveryvr import DiscoveryVRIE
 from .disney import DisneyIE
 from .dispeak import DigitallySpeakingIE
 from .dropbox import DropboxIE
+from .dropout import (
+    DropoutIE,
+    DropoutPlaylistIE,
+)
 from .dw import (
    DWIE,
    DWArticleIE,
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@ -1116,6 +1116,36 @@ class VHXEmbedIE(VimeoBaseInfoExtractor):
    IE_NAME = 'vhx:embed'
    _VALID_URL = r'https?://embed\.vhx\.tv/videos/(?P<id>\d+)'

+    def _vhx_login(self, email, password, login_url):
+        login_page = self._download_webpage(
+            login_url, None,
+            note='Downloading login page',
+            errnote='unable to fetch login page'
+        )
+
+        """check if user is already logged in via cookies"""
+        if "You are now signed in." in login_page:
+            return
+
+        login_form = self._hidden_inputs(login_page)
+
+        login_form.update({
+            'passwordless': 0,
+            'email': email,
+            'password': password
+        })
+
+        self._download_webpage(login_url, None, 'Logging in', 'Login failed',
+                               expected_status=302,
+                               data=urlencode_postdata(login_form),
+                               headers={'Content-Type': 'application/x-www-form-urlencoded'})
+
+    def _call_api(self, video_id, access_token, path='', query=None):
+        return self._download_json(
+            'https://api.vhx.tv/videos/' + video_id + path, video_id, headers={
+                'Authorization': 'Bearer ' + access_token,
+            }, query=query)
+
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)