From a13c8aad39c26fac72a2ae92d5a8b3b74451f228 Mon Sep 17 00:00:00 2001 From: nindogo Date: Sun, 14 Apr 2019 15:07:25 +0300 Subject: [PATCH 01/16] Initial commit This commit is to set up the environment correctly for updating and merging changes as they arise. --- youtube_dl/extractor/extractors.py | 4 + youtube_dl/extractor/porntrex.py | 134 +++++++++++++++++++++++++++++ 2 files changed, 138 insertions(+) create mode 100644 youtube_dl/extractor/porntrex.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index cc19af5c4..c06d8dfa0 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -901,6 +901,10 @@ from .pornhub import ( from .pornotube import PornotubeIE from .pornovoisines import PornoVoisinesIE from .pornoxo import PornoXOIE +from .porntrex import ( + PornTrexIE, + PornTrexPlayListIE, +) from .puhutv import ( PuhuTVIE, PuhuTVSerieIE, diff --git a/youtube_dl/extractor/porntrex.py b/youtube_dl/extractor/porntrex.py new file mode 100644 index 000000000..e8e1cf0f5 --- /dev/null +++ b/youtube_dl/extractor/porntrex.py @@ -0,0 +1,134 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + urlencode_postdata, + ExtractorError, +) + + +class PornTrexIE(InfoExtractor): + _NETRC_MACHINE = 'porntrex' + _VALID_URL = r'https?://(?:www\.)?porntrex\.com/video/(?P[0-9]+)/' + _TEST = { + 'url': 'https://www.porntrex.com/video/519351/be-ariyana-adin-breaking-and-entering-this-pussy', + 'info_dict': { + 'id': '519351', + 'ext': 'mp4', + 'title': 'BE - Ariyana Adin - Breaking And Entering This Pussy', + 'uploader': 'brand95', + 'description': 'BE - Ariyana Adin - Breaking And Entering This Pussy', + } + } + + def _login(self): + username, password = self._get_login_info() + if username is None: + return + + login_page = self._download_webpage( + 'https://www.porntrex.com/login/', None, 'Downloading login page') + + login_form = self._hidden_inputs(login_page) + + login_form.update({ + 'username': username.encode('utf-8'), + 'pass': password.encode('utf-8'), + 'remember_me': str(1).encode('utf-8'), + }) + + login_page = self._download_webpage( + 'https://www.porntrex.com/ajax-login/', None, + note='Logging in', + data=urlencode_postdata(login_form)) + + if re.search(r'generic-error hidden', login_page): + raise ExtractorError( + 'Unable to login, incorrect username and/or password', + expected=True) + + def _real_initialize(self): + self._login() + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + private_string = 'Only active members can watch private videos.' + is_video_private_regex = re.compile(private_string) + if re.findall(is_video_private_regex, webpage): + self.raise_login_required() + + title = self._html_search_regex( + r'(.+?)', webpage, 'title',) + url2_regex = re.compile("'(https://www.porntrex.com/get_file/.*?)/'") + url2 = re.findall(url2_regex, webpage) + uploader_regex = re.compile( + r'(.+?)', + re.DOTALL) + uploader = re.findall(uploader_regex, webpage)[0].strip() + thumbnails_regex = re.compile(r'href="(http.*?/screenshots/\d+.jpg/)"') + thumbnails_list = re.findall(thumbnails_regex, webpage) + thumbnails = [] + for thumbs in thumbnails_list: + thumbnails.append({'url': thumbs}) + formats = [] + for x, _ in enumerate(url2): + formats.append({'url': url2[x], + 'ext': url2[x].split('.')[-1], + 'protocol': url2[x].split(':')[0], + }) + + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'description': self._og_search_description(webpage), + 'uploader': uploader, + 'thumbnails': thumbnails, + 'formats': formats, + } + + +class PornTrexPlayListIE(InfoExtractor): + _NETRC_MACHINE = 'porntrex' + _VALID_URL = \ + r'https?://(?:www\.)?porntrex\.com/playlists/(?P[0-9]+)/' + _TEST = { + 'url': 'https://www.porntrex.com/playlists/60671/les45/', + 'info_dict': { + 'id': '477697', + 'ext': 'mp4', + 'uploader': 'tarpi', + 'title': '4. Kelly Divine, Tiffany Minx (1080p)', + 'description': '4. Kelly Divine, Tiffany Minx (1080p)' + } + } + + def _real_extract(self, url): + playlist_id = self._match_id(url) + webpage = self._download_webpage(url, playlist_id) + + get_all_urls_regex = re.compile('data-playlist-item="(.*?)"') + all_urls = re.findall(get_all_urls_regex, webpage) + + entries = [] + for this_url in all_urls: + entries.append({'_type': 'url', + 'id': 'PornTrex', + 'url': this_url, + }) + + return { + '_type': 'playlist', + 'id': playlist_id, + 'title': self._html_search_regex( + r'(.+?)', + webpage, + 'title',), + 'entries': entries, + } From e7a76dde029f52d30095272f10bd7cfccad1db6b Mon Sep 17 00:00:00 2001 From: nindogo Date: Mon, 15 Apr 2019 23:40:05 +0300 Subject: [PATCH 02/16] Added flashvars regex --- youtube_dl/extractor/porntrex.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/youtube_dl/extractor/porntrex.py b/youtube_dl/extractor/porntrex.py index e8e1cf0f5..98ed965e9 100644 --- a/youtube_dl/extractor/porntrex.py +++ b/youtube_dl/extractor/porntrex.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals import re +import json from .common import InfoExtractor from ..utils import ( @@ -84,6 +85,16 @@ class PornTrexIE(InfoExtractor): self._sort_formats(formats) + flashvars_regex = re.compile( + r'flashvars.*?(\{.*?\})', + flags=re.DOTALL + ) + + flashvars = json.loads( + re.findall(flashvars_regex, webpage)[0] + ) + print(flashvars) + return { 'id': video_id, 'title': title, From db23d518968773842904c832b18d9a3affdad796 Mon Sep 17 00:00:00 2001 From: nindogo Date: Thu, 18 Apr 2019 16:27:19 +0300 Subject: [PATCH 03/16] Clean up of code before pull request. --- youtube_dl/extractor/porntrex.py | 25 +++++++------------------ 1 file changed, 7 insertions(+), 18 deletions(-) diff --git a/youtube_dl/extractor/porntrex.py b/youtube_dl/extractor/porntrex.py index 98ed965e9..db3af70a8 100644 --- a/youtube_dl/extractor/porntrex.py +++ b/youtube_dl/extractor/porntrex.py @@ -2,7 +2,6 @@ from __future__ import unicode_literals import re -import json from .common import InfoExtractor from ..utils import ( @@ -15,13 +14,14 @@ class PornTrexIE(InfoExtractor): _NETRC_MACHINE = 'porntrex' _VALID_URL = r'https?://(?:www\.)?porntrex\.com/video/(?P[0-9]+)/' _TEST = { - 'url': 'https://www.porntrex.com/video/519351/be-ariyana-adin-breaking-and-entering-this-pussy', + 'url': 'https://www.porntrex.com/video/350451/naomi-woods-the-list', 'info_dict': { - 'id': '519351', + 'id': '350451', 'ext': 'mp4', - 'title': 'BE - Ariyana Adin - Breaking And Entering This Pussy', - 'uploader': 'brand95', - 'description': 'BE - Ariyana Adin - Breaking And Entering This Pussy', + 'title': 'Naomi Woods - The List in 4k', + 'uploader': 'delman', + 'description': 'Naomi Woods The List', + 'url': 'https://www.porntrex.com/get_file/7/5223e8c2d6a378f22eccc8fd8e881746005ebc9d40/350000/350451/350451_2160p.mp4' } } @@ -82,19 +82,8 @@ class PornTrexIE(InfoExtractor): 'ext': url2[x].split('.')[-1], 'protocol': url2[x].split(':')[0], }) - self._sort_formats(formats) - flashvars_regex = re.compile( - r'flashvars.*?(\{.*?\})', - flags=re.DOTALL - ) - - flashvars = json.loads( - re.findall(flashvars_regex, webpage)[0] - ) - print(flashvars) - return { 'id': video_id, 'title': title, @@ -130,7 +119,7 @@ class PornTrexPlayListIE(InfoExtractor): entries = [] for this_url in all_urls: entries.append({'_type': 'url', - 'id': 'PornTrex', + 'id': this_url.split('/')[4], 'url': this_url, }) From 331ebfc375187fd02e02f02d513d51fef88b83fb Mon Sep 17 00:00:00 2001 From: nindogo Date: Thu, 18 Apr 2019 23:51:49 +0300 Subject: [PATCH 04/16] Added login and password to playlist class. Added login and password to playlist class. Also added a new playlist for the the test of the playlist class. I do not know how to test the credentials in youtube-dl yet so needed a class that did not need username and password and that had all its videos available for access. --- youtube_dl/extractor/porntrex.py | 43 +++++++++++++++++++++++++++----- 1 file changed, 37 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/porntrex.py b/youtube_dl/extractor/porntrex.py index db3af70a8..17c9671a2 100644 --- a/youtube_dl/extractor/porntrex.py +++ b/youtube_dl/extractor/porntrex.py @@ -99,16 +99,47 @@ class PornTrexPlayListIE(InfoExtractor): _VALID_URL = \ r'https?://(?:www\.)?porntrex\.com/playlists/(?P[0-9]+)/' _TEST = { - 'url': 'https://www.porntrex.com/playlists/60671/les45/', + 'url': 'https://www.porntrex.com/playlists/31075/2016-collection/', + 'id': '31075', + 'title': 'FTVGirls 2016 Collection', 'info_dict': { - 'id': '477697', + 'id': '345462', 'ext': 'mp4', - 'uploader': 'tarpi', - 'title': '4. Kelly Divine, Tiffany Minx (1080p)', - 'description': '4. Kelly Divine, Tiffany Minx (1080p)' + 'uploader': 'publicgirls', + 'title': 'FTVGirls.16.05 - Adria Part 2', + 'description': 'https://www.indexxx.com/models/121033/adria-rae/', } } + def _login(self): + username, password = self._get_login_info() + if username is None: + return + + login_page = self._download_webpage( + 'https://www.porntrex.com/login/', None, 'Downloading login page') + + login_form = self._hidden_inputs(login_page) + + login_form.update({ + 'username': username.encode('utf-8'), + 'pass': password.encode('utf-8'), + 'remember_me': str(1).encode('utf-8'), + }) + + login_page = self._download_webpage( + 'https://www.porntrex.com/ajax-login/', None, + note='Logging in', + data=urlencode_postdata(login_form)) + + if re.search(r'generic-error hidden', login_page): + raise ExtractorError( + 'Unable to login, incorrect username and/or password', + expected=True) + + def _real_initialize(self): + self._login() + def _real_extract(self, url): playlist_id = self._match_id(url) webpage = self._download_webpage(url, playlist_id) @@ -125,7 +156,7 @@ class PornTrexPlayListIE(InfoExtractor): return { '_type': 'playlist', - 'id': playlist_id, + 'id': url.split('/')[4], 'title': self._html_search_regex( r'(.+?)', webpage, From 201096182d91d5b2580be1038e871d951957f114 Mon Sep 17 00:00:00 2001 From: nindogo Date: Sat, 20 Apr 2019 01:30:34 +0300 Subject: [PATCH 05/16] Slight clean up. Some clean ups to remove overly specific url that appears to change daily. --- youtube_dl/extractor/porntrex.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/porntrex.py b/youtube_dl/extractor/porntrex.py index 17c9671a2..889c24fec 100644 --- a/youtube_dl/extractor/porntrex.py +++ b/youtube_dl/extractor/porntrex.py @@ -21,7 +21,6 @@ class PornTrexIE(InfoExtractor): 'title': 'Naomi Woods - The List in 4k', 'uploader': 'delman', 'description': 'Naomi Woods The List', - 'url': 'https://www.porntrex.com/get_file/7/5223e8c2d6a378f22eccc8fd8e881746005ebc9d40/350000/350451/350451_2160p.mp4' } } From 9281a8547c3151f144c9024a7be7e8fb2facc7e4 Mon Sep 17 00:00:00 2001 From: nindogo Date: Sat, 20 Apr 2019 21:52:26 +0300 Subject: [PATCH 06/16] Added extraction message to URL Extractor. It looks better when the extractor informs the user that it is looking at the file for data. --- youtube_dl/extractor/porntrex.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/porntrex.py b/youtube_dl/extractor/porntrex.py index 889c24fec..e253510d9 100644 --- a/youtube_dl/extractor/porntrex.py +++ b/youtube_dl/extractor/porntrex.py @@ -57,6 +57,7 @@ class PornTrexIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) + self.report_extraction(video_id) private_string = 'Only active members can watch private videos.' is_video_private_regex = re.compile(private_string) if re.findall(is_video_private_regex, webpage): From 6d0f924cbf7a9ce668eca000bd6d89207b073c4c Mon Sep 17 00:00:00 2001 From: nindogo Date: Sun, 21 Apr 2019 23:06:54 +0300 Subject: [PATCH 07/16] Set up Base class for shared methods. Reorganised the script adding a PornTrexBaseIE class that will be used for shared methods. Currently hosting only two methods: 1. _login 2. _real_initialize --- youtube_dl/extractor/porntrex.py | 60 +++++++++----------------------- 1 file changed, 17 insertions(+), 43 deletions(-) diff --git a/youtube_dl/extractor/porntrex.py b/youtube_dl/extractor/porntrex.py index e253510d9..085ded422 100644 --- a/youtube_dl/extractor/porntrex.py +++ b/youtube_dl/extractor/porntrex.py @@ -10,19 +10,7 @@ from ..utils import ( ) -class PornTrexIE(InfoExtractor): - _NETRC_MACHINE = 'porntrex' - _VALID_URL = r'https?://(?:www\.)?porntrex\.com/video/(?P[0-9]+)/' - _TEST = { - 'url': 'https://www.porntrex.com/video/350451/naomi-woods-the-list', - 'info_dict': { - 'id': '350451', - 'ext': 'mp4', - 'title': 'Naomi Woods - The List in 4k', - 'uploader': 'delman', - 'description': 'Naomi Woods The List', - } - } +class PornTrexBaseIE(InfoExtractor): def _login(self): username, password = self._get_login_info() @@ -53,6 +41,21 @@ class PornTrexIE(InfoExtractor): def _real_initialize(self): self._login() + +class PornTrexIE(PornTrexBaseIE): + _NETRC_MACHINE = 'porntrex' + _VALID_URL = r'https?://(?:www\.)?porntrex\.com/video/(?P[0-9]+)/' + _TEST = { + 'url': 'https://www.porntrex.com/video/350451/naomi-woods-the-list', + 'info_dict': { + 'id': '350451', + 'ext': 'mp4', + 'title': 'Naomi Woods - The List in 4k', + 'uploader': 'delman', + 'description': 'Naomi Woods The List', + } + } + def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) @@ -94,7 +97,7 @@ class PornTrexIE(InfoExtractor): } -class PornTrexPlayListIE(InfoExtractor): +class PornTrexPlayListIE(PornTrexBaseIE): _NETRC_MACHINE = 'porntrex' _VALID_URL = \ r'https?://(?:www\.)?porntrex\.com/playlists/(?P[0-9]+)/' @@ -111,35 +114,6 @@ class PornTrexPlayListIE(InfoExtractor): } } - def _login(self): - username, password = self._get_login_info() - if username is None: - return - - login_page = self._download_webpage( - 'https://www.porntrex.com/login/', None, 'Downloading login page') - - login_form = self._hidden_inputs(login_page) - - login_form.update({ - 'username': username.encode('utf-8'), - 'pass': password.encode('utf-8'), - 'remember_me': str(1).encode('utf-8'), - }) - - login_page = self._download_webpage( - 'https://www.porntrex.com/ajax-login/', None, - note='Logging in', - data=urlencode_postdata(login_form)) - - if re.search(r'generic-error hidden', login_page): - raise ExtractorError( - 'Unable to login, incorrect username and/or password', - expected=True) - - def _real_initialize(self): - self._login() - def _real_extract(self, url): playlist_id = self._match_id(url) webpage = self._download_webpage(url, playlist_id) From bbf0342afd2797f006c58d75c211365f7796ad99 Mon Sep 17 00:00:00 2001 From: nindogo Date: Tue, 30 Apr 2019 04:13:32 +0300 Subject: [PATCH 08/16] Change test case for PornTrexIE The video that we used to test the previous version of porntrex.py is no longer on the website. This led to failures in the tests. Here we have a corrected version with working URL. --- youtube_dl/extractor/porntrex.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/porntrex.py b/youtube_dl/extractor/porntrex.py index 085ded422..b4274c1d2 100644 --- a/youtube_dl/extractor/porntrex.py +++ b/youtube_dl/extractor/porntrex.py @@ -46,13 +46,13 @@ class PornTrexIE(PornTrexBaseIE): _NETRC_MACHINE = 'porntrex' _VALID_URL = r'https?://(?:www\.)?porntrex\.com/video/(?P[0-9]+)/' _TEST = { - 'url': 'https://www.porntrex.com/video/350451/naomi-woods-the-list', + 'url': 'https://www.porntrex.com/video/311136/naomi-gets-fingered-before-the-fucking', 'info_dict': { - 'id': '350451', + 'id': '311136', 'ext': 'mp4', - 'title': 'Naomi Woods - The List in 4k', - 'uploader': 'delman', - 'description': 'Naomi Woods The List', + 'title': 'Naomi gets fingered before the fucking', + 'uploader': 'cumberland', + 'description': 'Sexy brunette babe likes to get her tight cunt slammed in hardcore fashion.', } } From d3a6216e60d0abd83fde746669b2794ea46323c6 Mon Sep 17 00:00:00 2001 From: nindogo Date: Sun, 12 May 2019 03:00:20 +0300 Subject: [PATCH 09/16] Applied changes requested by maintainers. Four changes made as requested. --- youtube_dl/extractor/porntrex.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/porntrex.py b/youtube_dl/extractor/porntrex.py index b4274c1d2..7e159a2c7 100644 --- a/youtube_dl/extractor/porntrex.py +++ b/youtube_dl/extractor/porntrex.py @@ -68,22 +68,20 @@ class PornTrexIE(PornTrexBaseIE): title = self._html_search_regex( r'(.+?)', webpage, 'title',) - url2_regex = re.compile("'(https://www.porntrex.com/get_file/.*?)/'") - url2 = re.findall(url2_regex, webpage) - uploader_regex = re.compile( - r'(.+?)', - re.DOTALL) - uploader = re.findall(uploader_regex, webpage)[0].strip() + movie_urls_regex = re.compile("'(https://www.porntrex.com/get_file/.*?)/'") + movie_urls = re.findall(movie_urls_regex, webpage) + uploader = self._search_regex(r'/members/\d+?/["\']>\s+(.+?)\s+', webpage, 'new_uploader').strip() thumbnails_regex = re.compile(r'href="(http.*?/screenshots/\d+.jpg/)"') thumbnails_list = re.findall(thumbnails_regex, webpage) thumbnails = [] for thumbs in thumbnails_list: thumbnails.append({'url': thumbs}) formats = [] - for x, _ in enumerate(url2): - formats.append({'url': url2[x], - 'ext': url2[x].split('.')[-1], - 'protocol': url2[x].split(':')[0], + for movie_url in movie_urls: + formats.append({'url': movie_url, + 'ext': movie_url.split('.')[-1], + 'protocol': movie_url.split(':')[0], + 'height': int(self._search_regex(r'_(\d+)p.', movie_url.split('/')[8], 'height', default='480')), }) self._sort_formats(formats) @@ -94,6 +92,7 @@ class PornTrexIE(PornTrexBaseIE): 'uploader': uploader, 'thumbnails': thumbnails, 'formats': formats, + 'age_limit': 18, } From 54db14c4eaea8caf8e43d67b12691bcd759516c3 Mon Sep 17 00:00:00 2001 From: nindogo Date: Wed, 15 May 2019 01:35:21 +0300 Subject: [PATCH 10/16] Small changes done to make things cleaner. Hopefully also make things pythonic. --- youtube_dl/extractor/porntrex.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/porntrex.py b/youtube_dl/extractor/porntrex.py index 7e159a2c7..f3788526e 100644 --- a/youtube_dl/extractor/porntrex.py +++ b/youtube_dl/extractor/porntrex.py @@ -81,7 +81,7 @@ class PornTrexIE(PornTrexBaseIE): formats.append({'url': movie_url, 'ext': movie_url.split('.')[-1], 'protocol': movie_url.split(':')[0], - 'height': int(self._search_regex(r'_(\d+)p.', movie_url.split('/')[8], 'height', default='480')), + 'height': int(self._search_regex(r'_(\d+)p.', movie_url.split('/')[-1], 'height', default='480')), }) self._sort_formats(formats) @@ -129,7 +129,7 @@ class PornTrexPlayListIE(PornTrexBaseIE): return { '_type': 'playlist', - 'id': url.split('/')[4], + 'id': playlist_id, 'title': self._html_search_regex( r'(.+?)', webpage, From d861c752f66ec31bbebbdb531dcd08da011fbef2 Mon Sep 17 00:00:00 2001 From: nindogo Date: Wed, 15 May 2019 01:54:59 +0300 Subject: [PATCH 11/16] Debug on test cases. --- youtube_dl/extractor/porntrex.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/porntrex.py b/youtube_dl/extractor/porntrex.py index f3788526e..1fa436e9d 100644 --- a/youtube_dl/extractor/porntrex.py +++ b/youtube_dl/extractor/porntrex.py @@ -53,6 +53,7 @@ class PornTrexIE(PornTrexBaseIE): 'title': 'Naomi gets fingered before the fucking', 'uploader': 'cumberland', 'description': 'Sexy brunette babe likes to get her tight cunt slammed in hardcore fashion.', + 'age_limit': 18, } } @@ -110,6 +111,7 @@ class PornTrexPlayListIE(PornTrexBaseIE): 'uploader': 'publicgirls', 'title': 'FTVGirls.16.05 - Adria Part 2', 'description': 'https://www.indexxx.com/models/121033/adria-rae/', + 'age_limit': 18, } } From fe2344fd8e035712bf41ffa76f71bd41a088559e Mon Sep 17 00:00:00 2001 From: nindogo Date: Mon, 24 Jun 2019 04:25:30 +0300 Subject: [PATCH 12/16] Changes to close pending fixes. Changes done to close pending fixes raised by youtube_dl maintainer. --- youtube_dl/extractor/porntrex.py | 101 ++++++++++++++++--------------- 1 file changed, 53 insertions(+), 48 deletions(-) diff --git a/youtube_dl/extractor/porntrex.py b/youtube_dl/extractor/porntrex.py index 1fa436e9d..01498f479 100644 --- a/youtube_dl/extractor/porntrex.py +++ b/youtube_dl/extractor/porntrex.py @@ -11,6 +11,7 @@ from ..utils import ( class PornTrexBaseIE(InfoExtractor): + _NETRC_MACHINE = 'porntrex' def _login(self): username, password = self._get_login_info() @@ -23,9 +24,9 @@ class PornTrexBaseIE(InfoExtractor): login_form = self._hidden_inputs(login_page) login_form.update({ - 'username': username.encode('utf-8'), - 'pass': password.encode('utf-8'), - 'remember_me': str(1).encode('utf-8'), + 'username': username, + 'pass': password, + 'remember_me': 1, }) login_page = self._download_webpage( @@ -43,7 +44,6 @@ class PornTrexBaseIE(InfoExtractor): class PornTrexIE(PornTrexBaseIE): - _NETRC_MACHINE = 'porntrex' _VALID_URL = r'https?://(?:www\.)?porntrex\.com/video/(?P[0-9]+)/' _TEST = { 'url': 'https://www.porntrex.com/video/311136/naomi-gets-fingered-before-the-fucking', @@ -61,28 +61,36 @@ class PornTrexIE(PornTrexBaseIE): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - self.report_extraction(video_id) - private_string = 'Only active members can watch private videos.' - is_video_private_regex = re.compile(private_string) - if re.findall(is_video_private_regex, webpage): + # print(self._html_search_meta('description', webpage, 'description', fatal=False)) + # print(self._og_search_description(webpage)) + # quit() + + if re.findall(r'Only active members can watch private videos.', webpage): self.raise_login_required() - title = self._html_search_regex( - r'(.+?)', webpage, 'title',) - movie_urls_regex = re.compile("'(https://www.porntrex.com/get_file/.*?)/'") - movie_urls = re.findall(movie_urls_regex, webpage) - uploader = self._search_regex(r'/members/\d+?/["\']>\s+(.+?)\s+', webpage, 'new_uploader').strip() - thumbnails_regex = re.compile(r'href="(http.*?/screenshots/\d+.jpg/)"') - thumbnails_list = re.findall(thumbnails_regex, webpage) + title = self._html_search_regex(r'(.+?)', + webpage, + 'title', + fatal=False) + + uploader = self._search_regex(r'(?m)/members/\d+?/["\']>\s+(.+?)\s+', + webpage, + 'new_uploader', + fatal=False).strip() + + thumbnails_list = re.findall(r'href="(http.*?/screenshots/\d+.jpg/)["\']', webpage) thumbnails = [] for thumbs in thumbnails_list: thumbnails.append({'url': thumbs}) + formats = [] + movie_urls = re.findall(r"['\"](https://www.porntrex.com/get_file/.*?)/['\"]", webpage) for movie_url in movie_urls: formats.append({'url': movie_url, - 'ext': movie_url.split('.')[-1], - 'protocol': movie_url.split(':')[0], - 'height': int(self._search_regex(r'_(\d+)p.', movie_url.split('/')[-1], 'height', default='480')), + 'height': int(self._search_regex(r'_(\d+)p\.', + movie_url, + 'height', + default='480')), }) self._sort_formats(formats) @@ -98,43 +106,40 @@ class PornTrexIE(PornTrexBaseIE): class PornTrexPlayListIE(PornTrexBaseIE): - _NETRC_MACHINE = 'porntrex' - _VALID_URL = \ - r'https?://(?:www\.)?porntrex\.com/playlists/(?P[0-9]+)/' - _TEST = { - 'url': 'https://www.porntrex.com/playlists/31075/2016-collection/', - 'id': '31075', - 'title': 'FTVGirls 2016 Collection', + _VALID_URL = r'https?://(?:www\.)?porntrex\.com/playlists/(?P[0-9]+)/' + _TESTS = [{ + 'url': 'https://www.porntrex.com/playlists/13598/tushy32/', 'info_dict': { - 'id': '345462', - 'ext': 'mp4', - 'uploader': 'publicgirls', - 'title': 'FTVGirls.16.05 - Adria Part 2', - 'description': 'https://www.indexxx.com/models/121033/adria-rae/', - 'age_limit': 18, - } - } + 'id': '13598', + 'title': 'Tushy', + 'description': 'Huge collection of free hd porn videos. Tons of amateur sex and professional hd movies. Daily updated videos of hot busty teen, latina, amateur & more...', + }, + 'playlist_mincount': 74, + }, { + 'url': 'https://www.porntrex.com/playlists/31075/2016-collection/', + 'info_dict': { + 'id': '31075', + 'title': 'FTVGirls 2016 Collection', + 'description': 'FTVGirls 2016 Complete Collection (122 videos)', + }, + 'playlist_mincount': 3, + }] def _real_extract(self, url): playlist_id = self._match_id(url) webpage = self._download_webpage(url, playlist_id) - get_all_urls_regex = re.compile('data-playlist-item="(.*?)"') - all_urls = re.findall(get_all_urls_regex, webpage) + all_urls = re.findall(r'data-playlist-item="(.*?)"', webpage) entries = [] for this_url in all_urls: - entries.append({'_type': 'url', - 'id': this_url.split('/')[4], - 'url': this_url, - }) + entries.append(self.url_result(this_url)) - return { - '_type': 'playlist', - 'id': playlist_id, - 'title': self._html_search_regex( - r'(.+?)', - webpage, - 'title',), - 'entries': entries, - } + playlist_description = self._html_search_meta('description', webpage, 'description', fatal=False) + + playlist_title = self._html_search_regex(r'(.+?)', webpage, 'title', fatal=False) + + return self.playlist_result(entries, + playlist_id=playlist_id, + playlist_title=playlist_title, + playlist_description=playlist_description) From 5c27f8874585a8d784c9fde97ea0157272785221 Mon Sep 17 00:00:00 2001 From: nindogo Date: Mon, 24 Jun 2019 04:34:39 +0300 Subject: [PATCH 13/16] Slight cleanups. --- youtube_dl/extractor/porntrex.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/youtube_dl/extractor/porntrex.py b/youtube_dl/extractor/porntrex.py index 01498f479..944b6d022 100644 --- a/youtube_dl/extractor/porntrex.py +++ b/youtube_dl/extractor/porntrex.py @@ -61,10 +61,6 @@ class PornTrexIE(PornTrexBaseIE): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - # print(self._html_search_meta('description', webpage, 'description', fatal=False)) - # print(self._og_search_description(webpage)) - # quit() - if re.findall(r'Only active members can watch private videos.', webpage): self.raise_login_required() From 8901527aae59f8fa63b881c3125bc2532a9c1ffd Mon Sep 17 00:00:00 2001 From: nindogo Date: Thu, 27 Jun 2019 10:42:32 +0300 Subject: [PATCH 14/16] Clean up regex as advised in review. Changed regex to reduce the chance of getting string if it is an empty string. --- youtube_dl/extractor/porntrex.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/porntrex.py b/youtube_dl/extractor/porntrex.py index 944b6d022..46dbd9a09 100644 --- a/youtube_dl/extractor/porntrex.py +++ b/youtube_dl/extractor/porntrex.py @@ -125,7 +125,7 @@ class PornTrexPlayListIE(PornTrexBaseIE): playlist_id = self._match_id(url) webpage = self._download_webpage(url, playlist_id) - all_urls = re.findall(r'data-playlist-item="(.*?)"', webpage) + all_urls = re.findall(r'data-playlist-item=["\'](https?://www\.porntrex\.com/[^\'"]+)', webpage) entries = [] for this_url in all_urls: From 88bf7eb9bb9cbb4a75d167e45471c00f6104e565 Mon Sep 17 00:00:00 2001 From: nindogo Date: Thu, 27 Jun 2019 11:08:22 +0300 Subject: [PATCH 15/16] Slight changes to enable scanning for videos in pages on logging in. --- youtube_dl/extractor/porntrex.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/porntrex.py b/youtube_dl/extractor/porntrex.py index 46dbd9a09..cf7442b21 100644 --- a/youtube_dl/extractor/porntrex.py +++ b/youtube_dl/extractor/porntrex.py @@ -80,7 +80,7 @@ class PornTrexIE(PornTrexBaseIE): thumbnails.append({'url': thumbs}) formats = [] - movie_urls = re.findall(r"['\"](https://www.porntrex.com/get_file/.*?)/['\"]", webpage) + movie_urls = re.findall(r"'(https://www.porntrex.com/get_file/.*?)/'", webpage) for movie_url in movie_urls: formats.append({'url': movie_url, 'height': int(self._search_regex(r'_(\d+)p\.', From 54e063db97644909f1c1c548dd071076dad39b43 Mon Sep 17 00:00:00 2001 From: nindogo Date: Thu, 27 Jun 2019 11:27:40 +0300 Subject: [PATCH 16/16] A few more clean ups to porntrex.py. --- youtube_dl/extractor/porntrex.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/porntrex.py b/youtube_dl/extractor/porntrex.py index cf7442b21..75e780eb7 100644 --- a/youtube_dl/extractor/porntrex.py +++ b/youtube_dl/extractor/porntrex.py @@ -80,8 +80,10 @@ class PornTrexIE(PornTrexBaseIE): thumbnails.append({'url': thumbs}) formats = [] - movie_urls = re.findall(r"'(https://www.porntrex.com/get_file/.*?)/'", webpage) + movie_urls = re.findall(r"[\"'](https://www.porntrex.com/get_file/.*?)/[\"']", webpage) for movie_url in movie_urls: + if re.search(r'screenshots', movie_url): + continue formats.append({'url': movie_url, 'height': int(self._search_regex(r'_(\d+)p\.', movie_url,