youtube-dl/youtube_dl/extractor/porntrex.py

# coding: utf-8
from __future__ import unicode_literals

import re

from .common import InfoExtractor
from ..utils import (
    urlencode_postdata,
    ExtractorError,
)


class PornTrexBaseIE(InfoExtractor):

    def _login(self):
        username, password = self._get_login_info()
        if username is None:
            return

        login_page = self._download_webpage(
            'https://www.porntrex.com/login/', None, 'Downloading login page')

        login_form = self._hidden_inputs(login_page)

        login_form.update({
            'username': username.encode('utf-8'),
            'pass': password.encode('utf-8'),
            'remember_me': str(1).encode('utf-8'),
        })

        login_page = self._download_webpage(
            'https://www.porntrex.com/ajax-login/', None,
            note='Logging in',
            data=urlencode_postdata(login_form))

        if re.search(r'generic-error hidden', login_page):
            raise ExtractorError(
                'Unable to login, incorrect username and/or password',
                expected=True)

    def _real_initialize(self):
        self._login()


class PornTrexIE(PornTrexBaseIE):
    _NETRC_MACHINE = 'porntrex'
    _VALID_URL = r'https?://(?:www\.)?porntrex\.com/video/(?P<id>[0-9]+)/'
    _TEST = {
        'url': 'https://www.porntrex.com/video/311136/naomi-gets-fingered-before-the-fucking',
        'info_dict': {
            'id': '311136',
            'ext': 'mp4',
            'title': 'Naomi gets fingered before the fucking',
            'uploader': 'cumberland',
            'description': 'Sexy brunette babe likes to get her tight cunt slammed in hardcore fashion.',
        }
    }

    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)

        self.report_extraction(video_id)
        private_string = 'Only active members can watch private videos.'
        is_video_private_regex = re.compile(private_string)
        if re.findall(is_video_private_regex, webpage):
            self.raise_login_required()

        title = self._html_search_regex(
            r'<title>(.+?)</title>', webpage, 'title',)
        url2_regex = re.compile("'(https://www.porntrex.com/get_file/.*?)/'")
        url2 = re.findall(url2_regex, webpage)
        uploader_regex = re.compile(
            r'<a href="https://www.porntrex.com/members/[0-9]+?/">(.+?)</a>',
            re.DOTALL)
        uploader = re.findall(uploader_regex, webpage)[0].strip()
        thumbnails_regex = re.compile(r'href="(http.*?/screenshots/\d+.jpg/)"')
        thumbnails_list = re.findall(thumbnails_regex, webpage)
        thumbnails = []
        for thumbs in thumbnails_list:
            thumbnails.append({'url': thumbs})
        formats = []
        for x, _ in enumerate(url2):
            formats.append({'url': url2[x],
                            'ext': url2[x].split('.')[-1],
                            'protocol': url2[x].split(':')[0],
                            })
        self._sort_formats(formats)

        return {
            'id': video_id,
            'title': title,
            'description': self._og_search_description(webpage),
            'uploader': uploader,
            'thumbnails': thumbnails,
            'formats': formats,
        }


class PornTrexPlayListIE(PornTrexBaseIE):
    _NETRC_MACHINE = 'porntrex'
    _VALID_URL = \
        r'https?://(?:www\.)?porntrex\.com/playlists/(?P<id>[0-9]+)/'
    _TEST = {
        'url': 'https://www.porntrex.com/playlists/31075/2016-collection/',
        'id': '31075',
        'title': 'FTVGirls 2016 Collection',
        'info_dict': {
            'id': '345462',
            'ext': 'mp4',
            'uploader': 'publicgirls',
            'title': 'FTVGirls.16.05 - Adria Part 2',
            'description': 'https://www.indexxx.com/models/121033/adria-rae/',
        }
    }

    def _real_extract(self, url):
        playlist_id = self._match_id(url)
        webpage = self._download_webpage(url, playlist_id)

        get_all_urls_regex = re.compile('data-playlist-item="(.*?)"')
        all_urls = re.findall(get_all_urls_regex, webpage)

        entries = []
        for this_url in all_urls:
            entries.append({'_type': 'url',
                            'id': this_url.split('/')[4],
                            'url': this_url,
                            })

        return {
            '_type': 'playlist',
            'id': url.split('/')[4],
            'title': self._html_search_regex(
                r'<title>(.+?)</title>',
                webpage,
                'title',),
            'entries': entries,
        }
Initial commit This commit is to set up the environment correctly for updating and merging changes as they arise. 2019-04-14 14:07:25 +02:00			`# coding: utf-8`
			`from __future__ import unicode_literals`

			`import re`

			`from .common import InfoExtractor`
			`from ..utils import (`
			`urlencode_postdata,`
			`ExtractorError,`
			`)`


Set up Base class for shared methods. Reorganised the script adding a PornTrexBaseIE class that will be used for shared methods. Currently hosting only two methods: 1. _login 2. _real_initialize 2019-04-21 22:06:54 +02:00			`class PornTrexBaseIE(InfoExtractor):`
Initial commit This commit is to set up the environment correctly for updating and merging changes as they arise. 2019-04-14 14:07:25 +02:00
			`def _login(self):`
			`username, password = self._get_login_info()`
			`if username is None:`
			`return`

			`login_page = self._download_webpage(`
			`'https://www.porntrex.com/login/', None, 'Downloading login page')`

			`login_form = self._hidden_inputs(login_page)`

			`login_form.update({`
			`'username': username.encode('utf-8'),`
			`'pass': password.encode('utf-8'),`
			`'remember_me': str(1).encode('utf-8'),`
			`})`

			`login_page = self._download_webpage(`
			`'https://www.porntrex.com/ajax-login/', None,`
			`note='Logging in',`
			`data=urlencode_postdata(login_form))`

			`if re.search(r'generic-error hidden', login_page):`
			`raise ExtractorError(`
			`'Unable to login, incorrect username and/or password',`
			`expected=True)`

			`def _real_initialize(self):`
			`self._login()`

Set up Base class for shared methods. Reorganised the script adding a PornTrexBaseIE class that will be used for shared methods. Currently hosting only two methods: 1. _login 2. _real_initialize 2019-04-21 22:06:54 +02:00
			`class PornTrexIE(PornTrexBaseIE):`
			`_NETRC_MACHINE = 'porntrex'`
			`_VALID_URL = r'https?://(?:www\.)?porntrex\.com/video/(?P<id>[0-9]+)/'`
			`_TEST = {`
Change test case for PornTrexIE The video that we used to test the previous version of porntrex.py is no longer on the website. This led to failures in the tests. Here we have a corrected version with working URL. 2019-04-30 03:13:32 +02:00			`'url': 'https://www.porntrex.com/video/311136/naomi-gets-fingered-before-the-fucking',`
Set up Base class for shared methods. Reorganised the script adding a PornTrexBaseIE class that will be used for shared methods. Currently hosting only two methods: 1. _login 2. _real_initialize 2019-04-21 22:06:54 +02:00			`'info_dict': {`
Change test case for PornTrexIE The video that we used to test the previous version of porntrex.py is no longer on the website. This led to failures in the tests. Here we have a corrected version with working URL. 2019-04-30 03:13:32 +02:00			`'id': '311136',`
Set up Base class for shared methods. Reorganised the script adding a PornTrexBaseIE class that will be used for shared methods. Currently hosting only two methods: 1. _login 2. _real_initialize 2019-04-21 22:06:54 +02:00			`'ext': 'mp4',`
Change test case for PornTrexIE The video that we used to test the previous version of porntrex.py is no longer on the website. This led to failures in the tests. Here we have a corrected version with working URL. 2019-04-30 03:13:32 +02:00			`'title': 'Naomi gets fingered before the fucking',`
			`'uploader': 'cumberland',`
			`'description': 'Sexy brunette babe likes to get her tight cunt slammed in hardcore fashion.',`
Set up Base class for shared methods. Reorganised the script adding a PornTrexBaseIE class that will be used for shared methods. Currently hosting only two methods: 1. _login 2. _real_initialize 2019-04-21 22:06:54 +02:00			`}`
			`}`

Initial commit This commit is to set up the environment correctly for updating and merging changes as they arise. 2019-04-14 14:07:25 +02:00			`def _real_extract(self, url):`
			`video_id = self._match_id(url)`
			`webpage = self._download_webpage(url, video_id)`

Added extraction message to URL Extractor. It looks better when the extractor informs the user that it is looking at the file for data. 2019-04-20 20:52:26 +02:00			`self.report_extraction(video_id)`
Initial commit This commit is to set up the environment correctly for updating and merging changes as they arise. 2019-04-14 14:07:25 +02:00			`private_string = 'Only active members can watch private videos.'`
			`is_video_private_regex = re.compile(private_string)`
			`if re.findall(is_video_private_regex, webpage):`
			`self.raise_login_required()`

			`title = self._html_search_regex(`
			`r'<title>(.+?)</title>', webpage, 'title',)`
			`url2_regex = re.compile("'(https://www.porntrex.com/get_file/.*?)/'")`
			`url2 = re.findall(url2_regex, webpage)`
			`uploader_regex = re.compile(`
			`r'<a href="https://www.porntrex.com/members/[0-9]+?/">(.+?)</a>',`
			`re.DOTALL)`
			`uploader = re.findall(uploader_regex, webpage)[0].strip()`
			`thumbnails_regex = re.compile(r'href="(http.*?/screenshots/\d+.jpg/)"')`
			`thumbnails_list = re.findall(thumbnails_regex, webpage)`
			`thumbnails = []`
			`for thumbs in thumbnails_list:`
			`thumbnails.append({'url': thumbs})`
			`formats = []`
			`for x, _ in enumerate(url2):`
			`formats.append({'url': url2[x],`
			`'ext': url2[x].split('.')[-1],`
			`'protocol': url2[x].split(':')[0],`
			`})`
			`self._sort_formats(formats)`

			`return {`
			`'id': video_id,`
			`'title': title,`
			`'description': self._og_search_description(webpage),`
			`'uploader': uploader,`
			`'thumbnails': thumbnails,`
			`'formats': formats,`
			`}`


Set up Base class for shared methods. Reorganised the script adding a PornTrexBaseIE class that will be used for shared methods. Currently hosting only two methods: 1. _login 2. _real_initialize 2019-04-21 22:06:54 +02:00			`class PornTrexPlayListIE(PornTrexBaseIE):`
Initial commit This commit is to set up the environment correctly for updating and merging changes as they arise. 2019-04-14 14:07:25 +02:00			`_NETRC_MACHINE = 'porntrex'`
			`_VALID_URL = \`
			`r'https?://(?:www\.)?porntrex\.com/playlists/(?P<id>[0-9]+)/'`
			`_TEST = {`
Added login and password to playlist class. Added login and password to playlist class. Also added a new playlist for the the test of the playlist class. I do not know how to test the credentials in youtube-dl yet so needed a class that did not need username and password and that had all its videos available for access. 2019-04-18 22:51:49 +02:00			`'url': 'https://www.porntrex.com/playlists/31075/2016-collection/',`
			`'id': '31075',`
			`'title': 'FTVGirls 2016 Collection',`
Initial commit This commit is to set up the environment correctly for updating and merging changes as they arise. 2019-04-14 14:07:25 +02:00			`'info_dict': {`
Added login and password to playlist class. Added login and password to playlist class. Also added a new playlist for the the test of the playlist class. I do not know how to test the credentials in youtube-dl yet so needed a class that did not need username and password and that had all its videos available for access. 2019-04-18 22:51:49 +02:00			`'id': '345462',`
Initial commit This commit is to set up the environment correctly for updating and merging changes as they arise. 2019-04-14 14:07:25 +02:00			`'ext': 'mp4',`
Added login and password to playlist class. Added login and password to playlist class. Also added a new playlist for the the test of the playlist class. I do not know how to test the credentials in youtube-dl yet so needed a class that did not need username and password and that had all its videos available for access. 2019-04-18 22:51:49 +02:00			`'uploader': 'publicgirls',`
			`'title': 'FTVGirls.16.05 - Adria Part 2',`
			`'description': 'https://www.indexxx.com/models/121033/adria-rae/',`
Initial commit This commit is to set up the environment correctly for updating and merging changes as they arise. 2019-04-14 14:07:25 +02:00			`}`
			`}`

			`def _real_extract(self, url):`
			`playlist_id = self._match_id(url)`
			`webpage = self._download_webpage(url, playlist_id)`

			`get_all_urls_regex = re.compile('data-playlist-item="(.*?)"')`
			`all_urls = re.findall(get_all_urls_regex, webpage)`

			`entries = []`
			`for this_url in all_urls:`
			`entries.append({'_type': 'url',`
Clean up of code before pull request. 2019-04-18 15:27:19 +02:00			`'id': this_url.split('/')[4],`
Initial commit This commit is to set up the environment correctly for updating and merging changes as they arise. 2019-04-14 14:07:25 +02:00			`'url': this_url,`
			`})`

			`return {`
			`'_type': 'playlist',`
Added login and password to playlist class. Added login and password to playlist class. Also added a new playlist for the the test of the playlist class. I do not know how to test the credentials in youtube-dl yet so needed a class that did not need username and password and that had all its videos available for access. 2019-04-18 22:51:49 +02:00			`'id': url.split('/')[4],`
Initial commit This commit is to set up the environment correctly for updating and merging changes as they arise. 2019-04-14 14:07:25 +02:00			`'title': self._html_search_regex(`
			`r'<title>(.+?)</title>',`
			`webpage,`
			`'title',),`
			`'entries': entries,`
			`}`