1
0
mirror of https://codeberg.org/polarisfm/youtube-dl synced 2024-11-24 01:14:32 +01:00
youtube-dl/youtube_dl/extractor/intldropout.py

138 lines
5.3 KiB
Python
Raw Normal View History

# coding: utf-8
from __future__ import unicode_literals
from .vimeo import VHXEmbedIE
from ..utils import (
ExtractorError,
2019-02-20 18:58:00 +01:00
urlencode_postdata,
)
import re
class IntlDropoutIE(VHXEmbedIE):
2019-02-21 18:24:23 +01:00
IE_NAME = 'intldropout'
2019-02-20 18:58:00 +01:00
IE_DESC = 'International Dropout.tv'
2019-03-03 13:53:11 +01:00
_NETRC_MACHINE = 'intldropouttv'
_LOGIN_URL = 'https://intl.dropout.tv/login'
_LOGOUT_URL = 'https://intl.dropout.tv/logout'
2019-02-24 18:38:21 +01:00
_VALID_URL = r'https://intl\.dropout\.tv/(?:[^/]+/season:[^/]+/)?videos/(?P<id>.+)'
2019-02-22 18:15:02 +01:00
_TESTS = [
{
'url': 'https://intl.dropout.tv/um-actually/season:1/videos/c-3po-s-origins-hp-lovecraft-the-food-album-with-weird-al-yankovic',
'md5': '8beaac579b6ba762f63cd452fd28dcce',
'info_dict': {
'id': '397785',
'ext': 'mp4',
'title': "C-3PO's Origins, HP Lovecraft, the Food Album (with Weird Al Yankovic)",
'thumbnail': r're:^https://vhx.imgix.net/.*\.jpg$',
'description': 'Caldwell Tanner, Siobhan Thompson, and Nate Dern inspect guns and review the Diagon Alley bar scene.',
'upload_date': '20181206',
'timestamp': 1544117975,
}
},
{
'url': 'https://intl.dropout.tv/videos/um-actually-behind-the-scenes',
'md5': 'b974927cd563423fe50945dbfdbb894c',
'info_dict': {
'id': '397943',
'ext': 'mp4',
'title': 'Um, Actually: Behind the Scenes',
'thumbnail': r're:^https://vhx.imgix.net/.*\.jpg$',
'description': 'What does it take to stump the nerdy? Mike Trapp and team pull back the curtain.',
'upload_date': '20181206',
'timestamp': 1544118409,
}
}
2019-02-22 18:15:02 +01:00
]
def _real_initialize(self):
self._login()
def _login(self):
email, password = self._get_login_info()
if email is None or password is None:
if self._downloader.params.get('cookiefile') is None:
raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
return True
2019-02-20 18:58:00 +01:00
login_page = self._download_webpage(
self._LOGIN_URL, None,
note='Downloading login page',
2019-02-24 18:12:08 +01:00
errnote='unable to fetch login page', fatal=False
)
2019-02-20 18:58:00 +01:00
if login_page is False:
return
2019-02-20 18:58:00 +01:00
if "You are now signed in." in login_page:
2019-04-25 10:36:16 +02:00
return
login_form = self._hidden_inputs(login_page)
login_form.update({
'passwordless': 0,
'email': email,
'password': password
})
2019-02-20 18:58:00 +01:00
2019-03-18 09:49:59 +01:00
self._download_webpage(self._LOGIN_URL, None, 'Logging in', 'Login failed',
expected_status=302,
data=urlencode_postdata(login_form),
headers={'Content-Type': 'application/x-www-form-urlencoded'})
2019-02-20 18:58:00 +01:00
def _real_extract(self, url):
2019-02-24 18:12:08 +01:00
webpage = self._download_webpage(url, None)
if "The device limit for your account has been reached" in webpage:
raise ExtractorError('Device Limit reached', expected=True)
2019-02-22 18:15:02 +01:00
video = self._html_search_regex(r'<iframe[^>]*"(?P<embed>https://embed.vhx.tv/videos/[0-9]+[^"]*)"[^>]*>', webpage, 'embed')
2019-02-21 18:22:42 +01:00
video_id = self._search_regex(r'https://embed.vhx.tv/videos/(?P<id>[0-9]+)', video, 'id')
2019-03-18 09:49:59 +01:00
video_title = self._html_search_regex(r'<h1 class="[^"]*video-title[^"]*"[^>]*><strong>(?P<title>[^<]+)<', webpage, 'title', fatal=False)
2019-02-21 18:22:42 +01:00
return self.url_result(video, video_id=video_id, video_title=video_title)
2019-02-22 18:15:02 +01:00
class IntlDropoutPlaylistIE(IntlDropoutIE):
IE_NAME = 'intldropout:playlist'
2019-03-18 09:49:59 +01:00
_VALID_URL = r'https://intl\.dropout\.tv/(?P<id>.+)'
2019-02-22 18:15:02 +01:00
_TESTS = [
{
'url': 'https://intl.dropout.tv/um-actually-the-web-series',
'md5': 'ebcd26ef54f546225e7cb96e79da31cc',
'playlist_count': 9,
'info_dict': {
'id': 'um-actually-the-web-series',
'title': 'Um, Actually: The Web Series',
}
},
{
'url': 'https://intl.dropout.tv/new-releases',
'md5': 'ebcd26ef54f546225e7cb96e79da31cc',
2019-03-05 11:16:50 +01:00
'playlist_count': 22,
2019-02-22 18:15:02 +01:00
'info_dict': {
'id': 'new-releases',
'title': 'New Releases',
}
2019-02-24 18:58:03 +01:00
},
{
'url': 'https://intl.dropout.tv/troopers/season:2',
'md5': 'ebcd26ef54f546225e7cb96e79da31cc',
'playlist_count': 10,
'info_dict': {
'id': 'troopers/season:2',
'title': 'Troopers',
}
2019-02-22 18:15:02 +01:00
}
]
2019-02-24 18:58:03 +01:00
@classmethod
def suitable(cls, url):
return False if IntlDropoutIE.suitable(url) else super(IntlDropoutPlaylistIE, cls).suitable(url)
2019-02-22 18:15:02 +01:00
def _real_extract(self, url):
2019-03-18 09:49:59 +01:00
playlist_id = self._match_id(url)
2019-02-24 18:12:08 +01:00
webpage = self._download_webpage(url, playlist_id)
2019-03-05 11:16:50 +01:00
items = re.findall(r'browse-item-title[^>]+>[^<]*<a href="(?P<url>https://intl.dropout.tv/[^/]+/[^"]+)"', webpage)
2019-02-22 18:15:02 +01:00
playlist_title = self._html_search_regex(r'<h1 class="[^"]*collection-title[^"]*"[^>]*>(?P<title>[^<]+)<', webpage, 'title')
return self.playlist_from_matches(items, playlist_id=playlist_id, playlist_title=playlist_title)