Merge branch 'cinemassacre' of github.com:rzhxeo/youtube-dl into rzhxeo-cinemassacre

2024-11-22 16:44:32 +01:00 · 2013-10-11 19:53:27 +02:00 · 2013-10-11 19:53:27 +02:00 · d2f9cdb205
commit d2f9cdb205
parent 3823342d9d ca215e0a4f
2 changed files with 116 additions and 0 deletions
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -12,6 +12,7 @@ from .brightcove import BrightcoveIE
 from .c56 import C56IE
 from .canalplus import CanalplusIE
 from .canalc2 import Canalc2IE
 from .cinemassacre import CinemassacreIE
 from .cnn import CNNIE
 from .collegehumor import CollegeHumorIE
 from .comedycentral import ComedyCentralIE
--- a/youtube_dl/extractor/cinemassacre.py
+++ b/youtube_dl/extractor/cinemassacre.py
@ -0,0 +1,115 @@
 # encoding: utf-8
 import re
 from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
 )
 class CinemassacreIE(InfoExtractor):
    _VALID_URL = r'(?:http://)?(?:www\.)?(?P<url>cinemassacre\.com/(?P<date_Y>[0-9]{4})/(?P<date_m>[0-9]{2})/(?P<date_d>[0-9]{2})/.+?)(?:[/?].*)?'
    _TESTS = [{
        u'url': u'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/',
        u'file': u'19911.mp4',
        u'info_dict': {
            u'upload_date': u'20121110', 
            u'title': u'“Angry Video Game Nerd: The Movie” – Trailer',
            u'description': u'md5:fb87405fcb42a331742a0dce2708560b',
        },
        u'params': {
            u'skip_download': True,
        },
    },
    {
        u'url': u'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940',
        u'file': u'521be8ef82b16.mp4',
        u'info_dict': {
            u'upload_date': u'20131002', 
            u'title': u'The Mummy’s Hand (1940)',
        },
        u'params': {
            u'skip_download': True,
        },
    }]
    def _real_extract(self,url):
        mobj = re.match(self._VALID_URL, url)
        webpage_url = u'http://' + mobj.group('url')
        webpage = self._download_webpage(webpage_url, None) # Don't know video id yet
        video_date = mobj.group('date_Y') + mobj.group('date_m') + mobj.group('date_d')
        mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/(?:embed|player)\.php\?id=(?:Cinemassacre-)?(?P<video_id>.+?))"', webpage)
        if not mobj:
            raise ExtractorError(u'Can\'t extract embed url and video id')
        playerdata_url = mobj.group(u'embed_url')
        video_id = mobj.group(u'video_id')
        video_title = self._html_search_regex(r'<title>(?P<title>.+?)\|',
            webpage, u'title')
        video_description = self._html_search_regex(r'<div class="entry-content">(?P<description>.+?)</div>',
            webpage, u'description', flags=re.DOTALL, fatal=False)
        if len(video_description) == 0:
            video_description = None
        playerdata = self._download_webpage(playerdata_url, video_id)
        base_url = self._html_search_regex(r'\'streamer\': \'(?P<base_url>rtmp://[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3})/(?:vod|Cinemassacre)\'',
            playerdata, u'base_url')
        base_url += '/Cinemassacre/'
         # Important: The file names in playerdata are not used by the player and even wrong for some videos
        sd_file = 'Cinemassacre-%s_high.mp4' % video_id
        hd_file = 'Cinemassacre-%s.mp4' % video_id
        video_thumbnail = 'http://image.screenwavemedia.com/Cinemassacre/Cinemassacre-%s_thumb_640x360.jpg' % video_id
        formats = [{
            'id':          video_id,
            'url':         base_url + hd_file,
            'format':      'hd',
            'ext':         'mp4',
            'title':       video_title,
            'description': video_description,
            'upload_date': video_date,
            'thumbnail':   video_thumbnail,
        },
        {
            'id':          video_id,
            'url':         base_url + sd_file,
            'ext':         'mp4',
            'format':      'sd',
            'title':       video_title,
            'description': video_description,
            'upload_date': video_date,
            'thumbnail':   video_thumbnail,
        }]
        if self._downloader.params.get('listformats', None):
            self._print_formats(formats)
            return
        req_format = self._downloader.params.get('format', 'best')
        self.to_screen(u'Format: %s' % req_format)
        if req_format is None or req_format == 'best':
            return [formats[0]]
        elif req_format == 'worst':
            return [formats[-1]]
        elif req_format in ('-1', 'all'):
            return formats
        else:
            format = self._specific( req_format, formats )
            if format is None:
                raise ExtractorError(u'Requested format not available')
            return [format]
    def _print_formats(self, formats):
        """Print all available formats"""
        print(u'Available formats:')
        print(u'ext\t\tformat')
        print(u'---------------------------------')
        for format in formats:
            print(u'%s\t\t%s'  % (format['ext'], format['format']))
    def _specific(self, req_format, formats):
        for x in formats:
            if x["format"] == req_format:
                return x
        return None