diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py index 12eeab271..bdacd7706 100644 --- a/youtube_dl/extractor/adobepass.py +++ b/youtube_dl/extractor/adobepass.py @@ -6,21 +6,35 @@ import time import xml.etree.ElementTree as etree from .common import InfoExtractor -from ..compat import compat_urlparse +from ..compat import ( + compat_kwargs, + compat_urlparse, +) from ..utils import ( unescapeHTML, urlencode_postdata, unified_timestamp, ExtractorError, + NO_DEFAULT, ) MSO_INFO = { + 'Dish': { + 'name': 'Dish', + 'username_field': 'username', + 'password_field': 'password', + }, 'DTV': { 'name': 'DIRECTV', 'username_field': 'username', 'password_field': 'password', }, + 'ATTOTT': { + 'name': 'DIRECTV NOW', + 'username_field': 'email', + 'password_field': 'loginpassword', + }, 'Rogers': { 'name': 'Rogers', 'username_field': 'UserName', @@ -31,6 +45,26 @@ MSO_INFO = { 'username_field': 'user', 'password_field': 'passwd', }, + 'TWC': { + 'name': 'Time Warner Cable | Spectrum', + 'username_field': 'Ecom_User_ID', + 'password_field': 'Ecom_Password', + }, + 'Brighthouse': { + 'name': 'Bright House Networks | Spectrum', + 'username_field': 'j_username', + 'password_field': 'j_password', + }, + 'Charter_Direct': { + 'name': 'Charter Spectrum', + 'username_field': 'IDToken1', + 'password_field': 'IDToken2', + }, + 'Verizon': { + 'name': 'Verizon FiOS', + 'username_field': 'IDToken1', + 'password_field': 'IDToken2', + }, 'thr030': { 'name': '3 Rivers Communications' }, @@ -1293,6 +1327,15 @@ class AdobePassIE(InfoExtractor): _USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0' _MVPD_CACHE = 'ap-mvpd' + _DOWNLOADING_LOGIN_PAGE = 'Downloading Provider Login Page' + + def _download_webpage_handle(self, *args, **kwargs): + headers = kwargs.get('headers', {}) + headers.update(self.geo_verification_headers()) + kwargs['headers'] = headers + return super(AdobePassIE, self)._download_webpage_handle( + *args, **compat_kwargs(kwargs)) + @staticmethod def _get_mvpd_resource(provider_id, title, guid, rating): channel = etree.Element('channel') @@ -1329,12 +1372,45 @@ class AdobePassIE(InfoExtractor): 'Content-Type': 'application/x-www-form-urlencoded', }) + def process_redirects(page_res, video_id, note, lastbookend=False): + page, urlh = page_res + while 'Redirecting...' in page: + redirect_url = extract_redirect_url(page) + if redirect_url: + page_res = self._download_webpage_handle( + redirect_url, video_id, note) + else: + form_data = self._hidden_inputs(page) + url = urlh.geturl() + if lastbookend: + url.replace('firstbookend', 'lastbookend') + page_res = self._download_webpage_handle( + url, video_id, note, + query=form_data) + page, urlh = page_res + return page_res + def raise_mvpd_required(): raise ExtractorError( 'This video is only available for users of participating TV providers. ' 'Use --ap-mso to specify Adobe Pass Multiple-system operator Identifier ' 'and --ap-username and --ap-password or --netrc to provide account credentials.', expected=True) + def extract_redirect_url(html, url=None, fatal=False): + # TODO: eliminate code duplication with generic extractor and move + # redirection code into _download_webpage_handle + REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)' + redirect_url = self._search_regex( + r'(?i)Resume' in mvpd_confirm_page: post_form(mvpd_confirm_page_res, 'Confirming Login') - + elif mso_id == 'Verizon': + # In general, if you're connecting from a Verizon-assigned IP, + # you will not actually pass your credentials. + provider_redirect_page, urlh = provider_redirect_page_res + if 'Please wait ...' in provider_redirect_page: + saml_redirect_url = self._html_search_regex( + r'self\.parent\.location=(["\'])(?P.+?)\1', + provider_redirect_page, + 'SAML Redirect URL', group='url') + saml_login_page = self._download_webpage( + saml_redirect_url, video_id, + 'Downloading SAML Login Page') + else: + saml_login_page_res = post_form( + provider_redirect_page_res, 'Logging in', { + mso_info['username_field']: username, + mso_info['password_field']: password, + }) + saml_login_page, urlh = saml_login_page_res + if 'Please try again.' in saml_login_page: + raise ExtractorError( + 'We\'re sorry, but either the User ID or Password entered is not correct.') + saml_login_url = self._search_regex( + r'xmlHttp\.open\("POST"\s*,\s*(["\'])(?P.+?)\1', + saml_login_page, 'SAML Login URL', group='url') + saml_response_json = self._download_json( + saml_login_url, video_id, 'Downloading SAML Response', + headers={'Content-Type': 'text/xml'}) + self._download_webpage( + saml_response_json['targetValue'], video_id, + 'Confirming Login', data=urlencode_postdata({ + 'SAMLResponse': saml_response_json['SAMLResponse'], + 'RelayState': saml_response_json['RelayState'] + }), headers={ + 'Content-Type': 'application/x-www-form-urlencoded' + }) + elif mso_id == 'Dish': + provider_redirect_page, urlh = provider_redirect_page_res + provider_refresh_redirect_url = extract_redirect_url( + provider_redirect_page, url=urlh.geturl()) + if provider_refresh_redirect_url: + provider_redirect_page_res = self._download_webpage_handle( + provider_refresh_redirect_url, video_id, + 'Downloading Provider Redirect Page (meta refresh)') + provider_redirect_page, urlh = provider_redirect_page_res + if '' in provider_redirect_page: + provider_redirect_page_res = post_form( + provider_redirect_page_res, 'Downloading login page (redirect)') + provider_redirect_page_res = process_redirects( + provider_redirect_page_res, video_id, 'Downloading login page (redirect)', True) + provider_redirect_page, urlh = provider_redirect_page_res + provider_redirect_page_res = self._download_webpage_handle( + urlh.geturl(), video_id, self._DOWNLOADING_LOGIN_PAGE) + provider_redirect_page, urlh = provider_redirect_page_res + mvpd_confirm_page_res = post_form(provider_redirect_page_res, 'Attempting social login', { + mso_info.get('username_field', 'username'): username, + mso_info.get('password_field', 'password'): password, + 'login_type': 'username,password', + 'source': 'authsynacor_identity1.dishnetwork.com', + 'source_button': 'authsynacor_identity1.dishnetwork.com', + 'remember_me': 'no' + }) + mvpd_confirm_page, urlh = mvpd_confirm_page_res + finish_url = urlh.geturl() + finish_url = finish_url.replace('/login','/finish') + mvpd_confirm_page_res = self._download_webpage_handle( + finish_url, video_id, 'Completing social login') + mvpd_confirm_page, urlh = mvpd_confirm_page_res + mvpd_confirm_page_res = post_form(mvpd_confirm_page_res, 'Logging in', { + mso_info.get('username_field', 'username'): username, + mso_info.get('password_field', 'password'): password, + 'login_type': 'username,password', + 'source': 'authsynacor_identity1.dishnetwork.com', + 'source_button': 'authsynacor_identity1.dishnetwork.com', + 'remember_me': 'no' + }) + mvpd_confirm_page_res = process_redirects( + mvpd_confirm_page_res, video_id, 'Confirming Login', True) + post_form(mvpd_confirm_page_res, 'Confirming Login') else: - # Normal, non-Comcast flow + # Some providers (e.g. DIRECTV NOW) have another meta refresh + # based redirect that should be followed. + provider_redirect_page, urlh = provider_redirect_page_res + provider_refresh_redirect_url = extract_redirect_url( + provider_redirect_page, url=urlh.geturl()) + if provider_refresh_redirect_url: + provider_redirect_page_res = self._download_webpage_handle( + provider_refresh_redirect_url, video_id, + 'Downloading Provider Redirect Page (meta refresh)') provider_login_page_res = post_form( - provider_redirect_page_res, 'Downloading Provider Login Page') + provider_redirect_page_res, self._DOWNLOADING_LOGIN_PAGE) mvpd_confirm_page_res = post_form(provider_login_page_res, 'Logging in', { mso_info.get('username_field', 'username'): username, mso_info.get('password_field', 'password'): password, @@ -1448,6 +1607,8 @@ class AdobePassIE(InfoExtractor): self._downloader.cache.store(self._MVPD_CACHE, requestor_id, {}) count += 1 continue + if '