youtube-dl/youtube_dl/extractor/addanime.py

from __future__ import unicode_literals

import re

from .common import InfoExtractor
from ..compat import (
    compat_HTTPError,
    compat_str,
    compat_urllib_parse_urlencode,
    compat_urllib_parse_urlparse,
)
from ..utils import (
    ExtractorError,
    qualities,
)


class AddAnimeIE(InfoExtractor):
    _VALID_URL = r'https?://(?:\w+\.)?add-anime\.net/(?:watch_video\.php\?(?:.*?)v=|video/)(?P<id>[\w_]+)'
    _TESTS = [{
        'url': 'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9',
        'md5': '72954ea10bc979ab5e2eb288b21425a0',
        'info_dict': {
            'id': '24MR3YO5SAS9',
            'ext': 'mp4',
            'description': 'One Piece 606',
            'title': 'One Piece 606',
        }
    }, {
        'url': 'http://add-anime.net/video/MDUGWYKNGBD8/One-Piece-687',
        'only_matching': True,
    }]

    def _real_extract(self, url):
        video_id = self._match_id(url)

        try:
            webpage = self._download_webpage(url, video_id)
        except ExtractorError as ee:
            if not isinstance(ee.cause, compat_HTTPError) or \
               ee.cause.code != 503:
                raise

            redir_webpage = ee.cause.read().decode('utf-8')
            action = self._search_regex(
                r'<form id="challenge-form" action="([^"]+)"',
                redir_webpage, 'Redirect form')
            vc = self._search_regex(
                r'<input type="hidden" name="jschl_vc" value="([^"]+)"/>',
                redir_webpage, 'redirect vc value')
            av = re.search(
                r'a\.value = ([0-9]+)[+]([0-9]+)[*]([0-9]+);',
                redir_webpage)
            if av is None:
                raise ExtractorError('Cannot find redirect math task')
            av_res = int(av.group(1)) + int(av.group(2)) * int(av.group(3))

            parsed_url = compat_urllib_parse_urlparse(url)
            av_val = av_res + len(parsed_url.netloc)
            confirm_url = (
                parsed_url.scheme + '://' + parsed_url.netloc +
                action + '?' +
                compat_urllib_parse_urlencode({
                    'jschl_vc': vc, 'jschl_answer': compat_str(av_val)}))
            self._download_webpage(
                confirm_url, video_id,
                note='Confirming after redirect')
            webpage = self._download_webpage(url, video_id)

        FORMATS = ('normal', 'hq')
        quality = qualities(FORMATS)
        formats = []
        for format_id in FORMATS:
            rex = r"var %s_video_file = '(.*?)';" % re.escape(format_id)
            video_url = self._search_regex(rex, webpage, 'video file URLx',
                                           fatal=False)
            if not video_url:
                continue
            formats.append({
                'format_id': format_id,
                'url': video_url,
                'quality': quality(format_id),
            })
        self._sort_formats(formats)
        video_title = self._og_search_title(webpage)
        video_description = self._og_search_description(webpage)

        return {
            '_type': 'video',
            'id': video_id,
            'formats': formats,
            'title': video_title,
            'description': video_description
        }
[addanime] Modernize 2014-03-24 16:39:53 +01:00			`from __future__ import unicode_literals`

[addanime] add file 2013-08-28 10:28:16 +02:00			`import re`

			`from .common import InfoExtractor`
[util] Move compatibility functions out of util utils is large enough without these compatibility functions. Everything that is present in newer versions of Python (i.e. with dev Python it's just an import) goes into compat.py . Everything else (i.e. youtube-dl-specific helpers) goes into utils.py . 2014-11-02 11:23:40 +01:00			`from ..compat import (`
[addanime] add file 2013-08-28 10:28:16 +02:00			`compat_HTTPError,`
			`compat_str,`
[compat] Add compat_urllib_parse_urlencode and eliminate encode_dict encode_dict functionality has been improved and moved directly into compat_urllib_parse_urlencode All occurrences of compat_urllib_parse.urlencode throughout the codebase have been replaced by compat_urllib_parse_urlencode Closes #8974 2016-03-25 20:46:57 +01:00			`compat_urllib_parse_urlencode,`
[addanime] add file 2013-08-28 10:28:16 +02:00			`compat_urllib_parse_urlparse,`
[util] Move compatibility functions out of util utils is large enough without these compatibility functions. Everything that is present in newer versions of Python (i.e. with dev Python it's just an import) goes into compat.py . Everything else (i.e. youtube-dl-specific helpers) goes into utils.py . 2014-11-02 11:23:40 +01:00			`)`
			`from ..utils import (`
[addanime] add file 2013-08-28 10:28:16 +02:00			`ExtractorError,`
[addanime] Add format quality (Closes #5371) 2015-04-07 17:00:22 +02:00			`qualities,`
[addanime] add file 2013-08-28 10:28:16 +02:00			`)`


			`class AddAnimeIE(InfoExtractor):`
Add support for https for all extractors as preventive and future-proof measure 2016-03-21 16:36:32 +01:00			`_VALID_URL = r'https?://(?:\w+\.)?add-anime\.net/(?:watch_video\.php\?(?:.*?)v=\|video/)(?P<id>[\w_]+)'`
[addanime] Add test for #5372 2015-04-07 17:01:35 +02:00			`_TESTS = [{`
[addanime] Modernize 2014-03-24 16:39:53 +01:00			`'url': 'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9',`
			`'md5': '72954ea10bc979ab5e2eb288b21425a0',`
			`'info_dict': {`
			`'id': '24MR3YO5SAS9',`
			`'ext': 'mp4',`
			`'description': 'One Piece 606',`
			`'title': 'One Piece 606',`
[addanime] add file 2013-08-28 10:28:16 +02:00			`}`
[addanime] Add test for #5372 2015-04-07 17:01:35 +02:00			`}, {`
			`'url': 'http://add-anime.net/video/MDUGWYKNGBD8/One-Piece-687',`
			`'only_matching': True,`
			`}]`
[addanime] add file 2013-08-28 10:28:16 +02:00
			`def _real_extract(self, url):`
[addanime] Modernize 2014-11-26 12:40:05 +01:00			`video_id = self._match_id(url)`

[addanime] add file 2013-08-28 10:28:16 +02:00			`try:`
			`webpage = self._download_webpage(url, video_id)`
			`except ExtractorError as ee:`
[addanime] Use new formats system 2013-10-28 11:24:47 +01:00			`if not isinstance(ee.cause, compat_HTTPError) or \`
			`ee.cause.code != 503:`
[addanime] add file 2013-08-28 10:28:16 +02:00			`raise`

			`redir_webpage = ee.cause.read().decode('utf-8')`
			`action = self._search_regex(`
			`r'<form id="challenge-form" action="([^"]+)"',`
[addanime] Modernize 2014-03-24 16:39:53 +01:00			`redir_webpage, 'Redirect form')`
[addanime] add file 2013-08-28 10:28:16 +02:00			`vc = self._search_regex(`
			`r'<input type="hidden" name="jschl_vc" value="([^"]+)"/>',`
[addanime] Modernize 2014-03-24 16:39:53 +01:00			`redir_webpage, 'redirect vc value')`
[addanime] add file 2013-08-28 10:28:16 +02:00			`av = re.search(`
			`r'a\.value = ([0-9]+)[+]([0-9]+)[*]([0-9]+);',`
			`redir_webpage)`
			`if av is None:`
[addanime] Modernize 2014-11-26 12:40:05 +01:00			`raise ExtractorError('Cannot find redirect math task')`
[addanime] add file 2013-08-28 10:28:16 +02:00			`av_res = int(av.group(1)) + int(av.group(2)) * int(av.group(3))`

			`parsed_url = compat_urllib_parse_urlparse(url)`
			`av_val = av_res + len(parsed_url.netloc)`
			`confirm_url = (`
[addanime] Modernize 2014-03-24 16:39:53 +01:00			`parsed_url.scheme + '://' + parsed_url.netloc +`
[addanime] add file 2013-08-28 10:28:16 +02:00			`action + '?' +`
[compat] Add compat_urllib_parse_urlencode and eliminate encode_dict encode_dict functionality has been improved and moved directly into compat_urllib_parse_urlencode All occurrences of compat_urllib_parse.urlencode throughout the codebase have been replaced by compat_urllib_parse_urlencode Closes #8974 2016-03-25 20:46:57 +01:00			`compat_urllib_parse_urlencode({`
[addanime] add file 2013-08-28 10:28:16 +02:00			`'jschl_vc': vc, 'jschl_answer': compat_str(av_val)}))`
			`self._download_webpage(`
			`confirm_url, video_id,`
[addanime] Modernize 2014-03-24 16:39:53 +01:00			`note='Confirming after redirect')`
[addanime] add file 2013-08-28 10:28:16 +02:00			`webpage = self._download_webpage(url, video_id)`

[addanime] Add format quality (Closes #5371) 2015-04-07 17:00:22 +02:00			`FORMATS = ('normal', 'hq')`
			`quality = qualities(FORMATS)`
[addanime] Use new formats system 2013-10-28 11:24:47 +01:00			`formats = []`
[addanime] Add format quality (Closes #5371) 2015-04-07 17:00:22 +02:00			`for format_id in FORMATS:`
[addanime] Use new formats system 2013-10-28 11:24:47 +01:00			`rex = r"var %s_video_file = '(.*?)';" % re.escape(format_id)`
[addanime] Modernize 2014-03-24 16:39:53 +01:00			`video_url = self._search_regex(rex, webpage, 'video file URLx',`
[addanime] Use new formats system 2013-10-28 11:24:47 +01:00			`fatal=False)`
			`if not video_url:`
			`continue`
			`formats.append({`
			`'format_id': format_id,`
			`'url': video_url,`
[addanime] Add format quality (Closes #5371) 2015-04-07 17:00:22 +02:00			`'quality': quality(format_id),`
[addanime] Use new formats system 2013-10-28 11:24:47 +01:00			`})`
[addanime] Modernize 2014-03-24 16:39:53 +01:00			`self._sort_formats(formats)`
[addanime] add file 2013-08-28 10:28:16 +02:00			`video_title = self._og_search_title(webpage)`
			`video_description = self._og_search_description(webpage)`

			`return {`
			`'_type': 'video',`
[addanime] Modernize 2014-03-24 16:39:53 +01:00			`'id': video_id,`
[addanime] Use new formats system 2013-10-28 11:24:47 +01:00			`'formats': formats,`
[addanime] add file 2013-08-28 10:28:16 +02:00			`'title': video_title,`
			`'description': video_description`
			`}`