From 24fa01bb5cabd878c80d2664b4ee4f2546a3a969 Mon Sep 17 00:00:00 2001
From: Tristan Waddington <tristan.waddington@gmail.com>
Date: Sat, 7 Mar 2020 21:17:50 -0800
Subject: [PATCH] Added login support for PornHub and PornHub Premium.

The pornhub extractor has been updated with support for --netrc and
--username/password authentication. This change allows authenticated
users to archive content they have purchased.
---
 youtube_dl/extractor/pornhub.py | 79 ++++++++++++++++++++++++++++++---
 1 file changed, 72 insertions(+), 7 deletions(-)

diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py
index 3567a3283..60f97a203 100644
--- a/youtube_dl/extractor/pornhub.py
+++ b/youtube_dl/extractor/pornhub.py
@@ -22,6 +22,7 @@ from ..utils import (
     remove_quotes,
     str_to_int,
     url_or_none,
+    urlencode_postdata,
 )
 
 
@@ -46,6 +47,63 @@ class PornHubBaseIE(InfoExtractor):
 
         return webpage, urlh
 
+    def _login_if_required(self, host):
+        login_info = self._get_login_info(netrc_machine=host.split('.')[0])
+
+        # PornHub Premium requires some kind of authentication
+        if 'premium' in host:
+            cookie_file = self._downloader.params.get('cookiefile')
+            if not cookie_file and not all(login_info):
+                raise ExtractorError(
+                    'PornHub Premium requires authentication.'
+                    ' You may want to use --cookies or --netrc.',
+                    expected=True)
+
+        # Authenticate, if required
+        cookies = self._get_cookies('https://%s' % host)
+        if all(login_info) and not cookies:
+            self._login(host, login_info)
+
+    def _login(self, host, login_info):
+        username = login_info[0]
+        password = login_info[1]
+
+        if 'premium' in host:
+            login_form_url = 'https://%s/premium/login' % host
+            login_post_url = 'https://www.%s/front/authenticate' % host
+        else:
+            login_form_url = 'https://%s/login' % host
+            login_post_url = 'https://www.%s/front/authenticate' % host
+
+        # Fetch login page
+        login_page = self._download_webpage(
+            login_form_url, video_id=None, note='Fetching login page', tries=3, fatal=True)
+
+        # Fetch login form
+        login_form = self._hidden_inputs(login_page)
+        login_form.update({
+            'username': username,
+            'password': password,
+        })
+
+        # Submit sign-in request
+        response = self._download_json(
+            login_post_url, video_id=None, note='Logging in to %s' % host, fatal=True,
+            data=urlencode_postdata(login_form), headers={
+                'Content-Type': 'application/x-www-form-urlencoded',
+                'Referer': login_form_url,
+            })
+
+        # Success
+        if response.get('success') == '1':
+            return self.to_screen("Successfully authenticated")
+
+        # Error
+        login_error = response.get('message')
+        if login_error:
+            raise ExtractorError('Unable to login: %s' % login_error, expected=True)
+        self.report_warning('Login has probably failed')
+
 
 class PornHubIE(PornHubBaseIE):
     IE_DESC = 'PornHub and Thumbzilla'
@@ -169,15 +227,11 @@ class PornHubIE(PornHubBaseIE):
         host = mobj.group('host') or 'pornhub.com'
         video_id = mobj.group('id')
 
-        if 'premium' in host:
-            if not self._downloader.params.get('cookiefile'):
-                raise ExtractorError(
-                    'PornHub Premium requires authentication.'
-                    ' You may want to use --cookies.',
-                    expected=True)
-
         self._set_cookie(host, 'age_verified', '1')
 
+        # Authenticate, if required
+        self._login_if_required(host)
+
         def dl_webpage(platform):
             self._set_cookie(host, 'platform', platform)
             return self._download_webpage(
@@ -398,6 +452,9 @@ class PornHubPlaylistBaseIE(PornHubBaseIE):
         host = mobj.group('host')
         playlist_id = mobj.group('id')
 
+        # Authenticate, if required
+        self._login_if_required(host)
+
         webpage = self._download_webpage(url, playlist_id)
 
         entries = self._extract_entries(webpage, host)
@@ -438,7 +495,12 @@ class PornHubUserIE(PornHubPlaylistBaseIE):
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
+        host = mobj.group('host')
         user_id = mobj.group('id')
+
+        # Authenticate, if required
+        self._login_if_required(host)
+
         return self.url_result(
             '%s/videos' % mobj.group('url'), ie=PornHubPagedVideoListIE.ie_key(),
             video_id=user_id)
@@ -459,6 +521,9 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE):
         host = mobj.group('host')
         item_id = mobj.group('id')
 
+        # Authenticate, if required
+        self._login_if_required(host)
+
         page = int_or_none(self._search_regex(
             r'\bpage=(\d+)', url, 'page', default=None))