Merge 1b0793fbc8 into 7d740e7dc7

2020-10-21 17:17:54 +02:00 · 2020-10-21 17:17:54 +02:00 · 12e48e36d2
parent 7d740e7dc7 1b0793fbc8
commit 12e48e36d2
1 changed files with 69 additions and 7 deletions
--- a/youtube_dl/extractor/pornhub.py
+++ b/youtube_dl/extractor/pornhub.py
@ -23,10 +23,14 @@ from ..utils import (
    remove_quotes,
    str_to_int,
    url_or_none,
+    urlencode_postdata,
 )


 class PornHubBaseIE(InfoExtractor):
+
+    _NETRC_MACHINE = 'pornhub'  # or 'pornhubpremium'
+
    def _download_webpage_handle(self, *args, **kwargs):
        def dl(*args, **kwargs):
            return super(PornHubBaseIE, self)._download_webpage_handle(*args, **kwargs)
@ -47,6 +51,61 @@ class PornHubBaseIE(InfoExtractor):

        return webpage, urlh

+    def _login_if_required(self, host):
+        login_info = self._get_login_info(netrc_machine=host.split('.')[0])
+
+        # PornHub Premium requires some kind of authentication
+        if 'premium' in host:
+            cookie_file = self._downloader.params.get('cookiefile')
+            if not cookie_file and not all(login_info):
+                raise ExtractorError(
+                    'PornHub Premium requires authentication.'
+                    ' You may want to use --cookies or --netrc.',
+                    expected=True)
+
+        if all(login_info):
+            self._login(host, login_info)
+
+    def _login(self, host, login_info):
+        username = login_info[0]
+        password = login_info[1]
+
+        if 'premium' in host:
+            login_form_url = 'https://%s/premium/login' % host
+        else:
+            login_form_url = 'https://%s/login' % host
+
+        login_post_url = 'https://www.%s/front/authenticate' % host
+
+        # Fetch login page
+        login_page = self._download_webpage(
+            login_form_url, video_id=None, note='Fetching login page', tries=3, fatal=True)
+
+        # Fetch login form
+        login_form = self._hidden_inputs(login_page)
+        login_form.update({
+            'username': username,
+            'password': password,
+        })
+
+        # Submit sign-in request
+        response = self._download_json(
+            login_post_url, video_id=None, note='Logging in to %s' % host, fatal=True,
+            data=urlencode_postdata(login_form), headers={
+                'Content-Type': 'application/x-www-form-urlencoded',
+                'Referer': login_form_url,
+            })
+
+        # Success
+        if response.get('success') == '1':
+            return self.to_screen("Successfully authenticated")
+
+        # Error
+        login_error = response.get('message')
+        if login_error:
+            raise ExtractorError('Unable to login: %s' % login_error, expected=True)
+        self.report_warning('Login has probably failed')
+

 class PornHubIE(PornHubBaseIE):
    IE_DESC = 'PornHub and Thumbzilla'
@ -172,15 +231,10 @@ class PornHubIE(PornHubBaseIE):
        host = mobj.group('host') or 'pornhub.com'
        video_id = mobj.group('id')

-        if 'premium' in host:
-            if not self._downloader.params.get('cookiefile'):
-                raise ExtractorError(
-                    'PornHub Premium requires authentication.'
-                    ' You may want to use --cookies.',
-                    expected=True)
-
        self._set_cookie(host, 'age_verified', '1')

+        self._login_if_required(host)
+
        def dl_webpage(platform):
            self._set_cookie(host, 'platform', platform)
            return self._download_webpage(
@ -405,6 +459,8 @@ class PornHubPlaylistBaseIE(PornHubBaseIE):
        host = mobj.group('host')
        playlist_id = mobj.group('id')

+        self._login_if_required(host)
+
        webpage = self._download_webpage(url, playlist_id)

        entries = self._extract_entries(webpage, host)
@ -445,7 +501,11 @@ class PornHubUserIE(PornHubPlaylistBaseIE):

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
+        host = mobj.group('host')
        user_id = mobj.group('id')
+
+        self._login_if_required(host)
+
        return self.url_result(
            '%s/videos' % mobj.group('url'), ie=PornHubPagedVideoListIE.ie_key(),
            video_id=user_id)
@ -466,6 +526,8 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE):
        host = mobj.group('host')
        item_id = mobj.group('id')

+        self._login_if_required(host)
+
        page = int_or_none(self._search_regex(
            r'\bpage=(\d+)', url, 'page', default=None))