Added login support for PornHub and PornHub Premium.

The pornhub extractor has been updated with support for --netrc and --username/password authentication. This change allows authenticated users to archive content they have purchased.
2024-11-29 19:47:54 +01:00 · 2020-03-07 21:17:50 -08:00 · 2020-03-07 21:17:50 -08:00 · 24fa01bb5c
commit 24fa01bb5c
parent 049c0486bb
1 changed files with 72 additions and 7 deletions
--- a/youtube_dl/extractor/pornhub.py
+++ b/youtube_dl/extractor/pornhub.py
@ -22,6 +22,7 @@ from ..utils import (
    remove_quotes,
    str_to_int,
    url_or_none,
+    urlencode_postdata,
 )


@ -46,6 +47,63 @@ class PornHubBaseIE(InfoExtractor):

        return webpage, urlh

+    def _login_if_required(self, host):
+        login_info = self._get_login_info(netrc_machine=host.split('.')[0])
+
+        # PornHub Premium requires some kind of authentication
+        if 'premium' in host:
+            cookie_file = self._downloader.params.get('cookiefile')
+            if not cookie_file and not all(login_info):
+                raise ExtractorError(
+                    'PornHub Premium requires authentication.'
+                    ' You may want to use --cookies or --netrc.',
+                    expected=True)
+
+        # Authenticate, if required
+        cookies = self._get_cookies('https://%s' % host)
+        if all(login_info) and not cookies:
+            self._login(host, login_info)
+
+    def _login(self, host, login_info):
+        username = login_info[0]
+        password = login_info[1]
+
+        if 'premium' in host:
+            login_form_url = 'https://%s/premium/login' % host
+            login_post_url = 'https://www.%s/front/authenticate' % host
+        else:
+            login_form_url = 'https://%s/login' % host
+            login_post_url = 'https://www.%s/front/authenticate' % host
+
+        # Fetch login page
+        login_page = self._download_webpage(
+            login_form_url, video_id=None, note='Fetching login page', tries=3, fatal=True)
+
+        # Fetch login form
+        login_form = self._hidden_inputs(login_page)
+        login_form.update({
+            'username': username,
+            'password': password,
+        })
+
+        # Submit sign-in request
+        response = self._download_json(
+            login_post_url, video_id=None, note='Logging in to %s' % host, fatal=True,
+            data=urlencode_postdata(login_form), headers={
+                'Content-Type': 'application/x-www-form-urlencoded',
+                'Referer': login_form_url,
+            })
+
+        # Success
+        if response.get('success') == '1':
+            return self.to_screen("Successfully authenticated")
+
+        # Error
+        login_error = response.get('message')
+        if login_error:
+            raise ExtractorError('Unable to login: %s' % login_error, expected=True)
+        self.report_warning('Login has probably failed')
+

 class PornHubIE(PornHubBaseIE):
    IE_DESC = 'PornHub and Thumbzilla'
@ -169,15 +227,11 @@ class PornHubIE(PornHubBaseIE):
        host = mobj.group('host') or 'pornhub.com'
        video_id = mobj.group('id')

-        if 'premium' in host:
-            if not self._downloader.params.get('cookiefile'):
-                raise ExtractorError(
-                    'PornHub Premium requires authentication.'
-                    ' You may want to use --cookies.',
-                    expected=True)
-
        self._set_cookie(host, 'age_verified', '1')

+        # Authenticate, if required
+        self._login_if_required(host)
+
        def dl_webpage(platform):
            self._set_cookie(host, 'platform', platform)
            return self._download_webpage(
@ -398,6 +452,9 @@ class PornHubPlaylistBaseIE(PornHubBaseIE):
        host = mobj.group('host')
        playlist_id = mobj.group('id')

+        # Authenticate, if required
+        self._login_if_required(host)
+
        webpage = self._download_webpage(url, playlist_id)

        entries = self._extract_entries(webpage, host)
@ -438,7 +495,12 @@ class PornHubUserIE(PornHubPlaylistBaseIE):

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
+        host = mobj.group('host')
        user_id = mobj.group('id')
+
+        # Authenticate, if required
+        self._login_if_required(host)
+
        return self.url_result(
            '%s/videos' % mobj.group('url'), ie=PornHubPagedVideoListIE.ie_key(),
            video_id=user_id)
@ -459,6 +521,9 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE):
        host = mobj.group('host')
        item_id = mobj.group('id')

+        # Authenticate, if required
+        self._login_if_required(host)
+
        page = int_or_none(self._search_regex(
            r'\bpage=(\d+)', url, 'page', default=None))