From 24fa01bb5cabd878c80d2664b4ee4f2546a3a969 Mon Sep 17 00:00:00 2001 From: Tristan Waddington Date: Sat, 7 Mar 2020 21:17:50 -0800 Subject: [PATCH 1/5] Added login support for PornHub and PornHub Premium. The pornhub extractor has been updated with support for --netrc and --username/password authentication. This change allows authenticated users to archive content they have purchased. --- youtube_dl/extractor/pornhub.py | 79 ++++++++++++++++++++++++++++++--- 1 file changed, 72 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 3567a3283..60f97a203 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -22,6 +22,7 @@ from ..utils import ( remove_quotes, str_to_int, url_or_none, + urlencode_postdata, ) @@ -46,6 +47,63 @@ class PornHubBaseIE(InfoExtractor): return webpage, urlh + def _login_if_required(self, host): + login_info = self._get_login_info(netrc_machine=host.split('.')[0]) + + # PornHub Premium requires some kind of authentication + if 'premium' in host: + cookie_file = self._downloader.params.get('cookiefile') + if not cookie_file and not all(login_info): + raise ExtractorError( + 'PornHub Premium requires authentication.' + ' You may want to use --cookies or --netrc.', + expected=True) + + # Authenticate, if required + cookies = self._get_cookies('https://%s' % host) + if all(login_info) and not cookies: + self._login(host, login_info) + + def _login(self, host, login_info): + username = login_info[0] + password = login_info[1] + + if 'premium' in host: + login_form_url = 'https://%s/premium/login' % host + login_post_url = 'https://www.%s/front/authenticate' % host + else: + login_form_url = 'https://%s/login' % host + login_post_url = 'https://www.%s/front/authenticate' % host + + # Fetch login page + login_page = self._download_webpage( + login_form_url, video_id=None, note='Fetching login page', tries=3, fatal=True) + + # Fetch login form + login_form = self._hidden_inputs(login_page) + login_form.update({ + 'username': username, + 'password': password, + }) + + # Submit sign-in request + response = self._download_json( + login_post_url, video_id=None, note='Logging in to %s' % host, fatal=True, + data=urlencode_postdata(login_form), headers={ + 'Content-Type': 'application/x-www-form-urlencoded', + 'Referer': login_form_url, + }) + + # Success + if response.get('success') == '1': + return self.to_screen("Successfully authenticated") + + # Error + login_error = response.get('message') + if login_error: + raise ExtractorError('Unable to login: %s' % login_error, expected=True) + self.report_warning('Login has probably failed') + class PornHubIE(PornHubBaseIE): IE_DESC = 'PornHub and Thumbzilla' @@ -169,15 +227,11 @@ class PornHubIE(PornHubBaseIE): host = mobj.group('host') or 'pornhub.com' video_id = mobj.group('id') - if 'premium' in host: - if not self._downloader.params.get('cookiefile'): - raise ExtractorError( - 'PornHub Premium requires authentication.' - ' You may want to use --cookies.', - expected=True) - self._set_cookie(host, 'age_verified', '1') + # Authenticate, if required + self._login_if_required(host) + def dl_webpage(platform): self._set_cookie(host, 'platform', platform) return self._download_webpage( @@ -398,6 +452,9 @@ class PornHubPlaylistBaseIE(PornHubBaseIE): host = mobj.group('host') playlist_id = mobj.group('id') + # Authenticate, if required + self._login_if_required(host) + webpage = self._download_webpage(url, playlist_id) entries = self._extract_entries(webpage, host) @@ -438,7 +495,12 @@ class PornHubUserIE(PornHubPlaylistBaseIE): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) + host = mobj.group('host') user_id = mobj.group('id') + + # Authenticate, if required + self._login_if_required(host) + return self.url_result( '%s/videos' % mobj.group('url'), ie=PornHubPagedVideoListIE.ie_key(), video_id=user_id) @@ -459,6 +521,9 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE): host = mobj.group('host') item_id = mobj.group('id') + # Authenticate, if required + self._login_if_required(host) + page = int_or_none(self._search_regex( r'\bpage=(\d+)', url, 'page', default=None)) From 93e281288f535e08a20e3678bb1a8493bfcb5300 Mon Sep 17 00:00:00 2001 From: Tristan Waddington Date: Sun, 8 Mar 2020 12:53:33 -0700 Subject: [PATCH 2/5] Added default _NETRC_MACHINE value to make tests happy. --- youtube_dl/extractor/pornhub.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 60f97a203..0153044d8 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -27,6 +27,9 @@ from ..utils import ( class PornHubBaseIE(InfoExtractor): + + _NETRC_MACHINE = 'pornhub' # or 'pornhubpremium' + def _download_webpage_handle(self, *args, **kwargs): def dl(*args, **kwargs): return super(PornHubBaseIE, self)._download_webpage_handle(*args, **kwargs) From a052d5a81dfe31d8a35fb0166c8d20c65df75dd3 Mon Sep 17 00:00:00 2001 From: Tristan Waddington Date: Wed, 11 Mar 2020 16:22:26 -0700 Subject: [PATCH 3/5] Feedback --- youtube_dl/extractor/pornhub.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 0153044d8..4dd6ad883 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -62,7 +62,6 @@ class PornHubBaseIE(InfoExtractor): ' You may want to use --cookies or --netrc.', expected=True) - # Authenticate, if required cookies = self._get_cookies('https://%s' % host) if all(login_info) and not cookies: self._login(host, login_info) @@ -73,10 +72,10 @@ class PornHubBaseIE(InfoExtractor): if 'premium' in host: login_form_url = 'https://%s/premium/login' % host - login_post_url = 'https://www.%s/front/authenticate' % host else: login_form_url = 'https://%s/login' % host - login_post_url = 'https://www.%s/front/authenticate' % host + + login_post_url = 'https://www.%s/front/authenticate' % host # Fetch login page login_page = self._download_webpage( From a571ae79d567a1d42ad413eebfd3893b0fc8fec3 Mon Sep 17 00:00:00 2001 From: Tristan Waddington Date: Wed, 11 Mar 2020 16:26:23 -0700 Subject: [PATCH 4/5] Feedback --- youtube_dl/extractor/pornhub.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 4dd6ad883..366d03eeb 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -231,7 +231,6 @@ class PornHubIE(PornHubBaseIE): self._set_cookie(host, 'age_verified', '1') - # Authenticate, if required self._login_if_required(host) def dl_webpage(platform): @@ -454,7 +453,6 @@ class PornHubPlaylistBaseIE(PornHubBaseIE): host = mobj.group('host') playlist_id = mobj.group('id') - # Authenticate, if required self._login_if_required(host) webpage = self._download_webpage(url, playlist_id) @@ -500,7 +498,6 @@ class PornHubUserIE(PornHubPlaylistBaseIE): host = mobj.group('host') user_id = mobj.group('id') - # Authenticate, if required self._login_if_required(host) return self.url_result( @@ -523,7 +520,6 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE): host = mobj.group('host') item_id = mobj.group('id') - # Authenticate, if required self._login_if_required(host) page = int_or_none(self._search_regex( From 1b0793fbc88d9f9c7fdbb72422843c6ddd645789 Mon Sep 17 00:00:00 2001 From: Tristan Waddington Date: Mon, 30 Mar 2020 20:18:46 -0700 Subject: [PATCH 5/5] Fixed login issue by removing cookies check. --- youtube_dl/extractor/pornhub.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 366d03eeb..79205c4d0 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -62,8 +62,7 @@ class PornHubBaseIE(InfoExtractor): ' You may want to use --cookies or --netrc.', expected=True) - cookies = self._get_cookies('https://%s' % host) - if all(login_info) and not cookies: + if all(login_info): self._login(host, login_info) def _login(self, host, login_info):