(?P<title>[^

([^<]+)

[\d\w]+)', url) if api_mobj is not None: url = 'http://blip.tv/play/g_%s' % api_mobj.group('video_id') urlp = compat_urllib_parse_urlparse(url) if urlp.path.startswith('/play/'): request = compat_urllib_request.Request(url) response = compat_urllib_request.urlopen(request) redirecturl = response.geturl() rurlp = compat_urllib_parse_urlparse(redirecturl) file_id = compat_parse_qs(rurlp.fragment)['file'][0].rpartition('/')[2] url = 'http://blip.tv/a/a-' + file_id return self._real_extract(url) if '?' in url: cchar = '&' else: cchar = '?' json_url = url + cchar + 'skin=json&version=2&no_wrap=1' request = compat_urllib_request.Request(json_url) request.add_header('User-Agent', 'iTunes/10.6.1') self.report_extraction(mobj.group(1)) info = None try: urlh = compat_urllib_request.urlopen(request) if urlh.headers.get('Content-Type', '').startswith('video/'): # Direct download basename = url.split('/')[-1] title,ext = os.path.splitext(basename) title = title.decode('UTF-8') ext = ext.replace('.', '') self.report_direct_download(title) info = { 'id': title, 'url': url, 'uploader': None, 'upload_date': None, 'title': title, 'ext': ext, 'urlhandle': urlh } except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: raise ExtractorError(u'ERROR: unable to download video info webpage: %s' % compat_str(err)) if info is None: # Regular URL try: json_code_bytes = urlh.read() json_code = json_code_bytes.decode('utf-8') except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: raise ExtractorError(u'Unable to read video info webpage: %s' % compat_str(err)) try: json_data = json.loads(json_code) if 'Post' in json_data: data = json_data['Post'] else: data = json_data upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d') video_url = data['media']['url'] umobj = re.match(self._URL_EXT, video_url) if umobj is None: raise ValueError('Can not determine filename extension') ext = umobj.group(1) info = { 'id': data['item_id'], 'url': video_url, 'uploader': data['display_name'], 'upload_date': upload_date, 'title': data['title'], 'ext': ext, 'format': data['media']['mimeType'], 'thumbnail': data['thumbnailUrl'], 'description': data['description'], 'player_url': data['embedUrl'], 'user_agent': 'iTunes/10.6.1', } except (ValueError,KeyError) as err: raise ExtractorError(u'Unable to parse video information: %s' % repr(err)) return [info] class MyVideoIE(InfoExtractor): """Information Extractor for myvideo.de.""" _VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/watch/([0-9]+)/([^?/]+).*' IE_NAME = u'myvideo' # Original Code from: https://github.com/dersphere/plugin.video.myvideo_de.git # Released into the Public Domain by Tristan Fischer on 2013-05-19 # https://github.com/rg3/youtube-dl/pull/842 def __rc4crypt(self,data, key): x = 0 box = list(range(256)) for i in list(range(256)): x = (x + box[i] + compat_ord(key[i % len(key)])) % 256 box[i], box[x] = box[x], box[i] x = 0 y = 0 out = '' for char in data: x = (x + 1) % 256 y = (y + box[x]) % 256 box[x], box[y] = box[y], box[x] out += chr(compat_ord(char) ^ box[(box[x] + box[y]) % 256]) return out def __md5(self,s): return hashlib.md5(s).hexdigest().encode() def _real_extract(self,url): mobj = re.match(self._VALID_URL, url) if mobj is None: raise ExtractorError(u'invalid URL: %s' % url) video_id = mobj.group(1) GK = ( b'WXpnME1EZGhNRGhpTTJNM01XVmhOREU0WldNNVpHTTJOakpt' b'TW1FMU5tVTBNR05pWkRaa05XRXhNVFJoWVRVd1ptSXhaVEV3' b'TnpsbA0KTVRkbU1tSTRNdz09' ) # Get video webpage webpage_url = 'http://www.myvideo.de/watch/%s' % video_id webpage = self._download_webpage(webpage_url, video_id) mobj = re.search('source src=\'(.+?)[.]([^.]+)\'', webpage) if mobj is not None: self.report_extraction(video_id) video_url = mobj.group(1) + '.flv' video_title = self._html_search_regex('([^<]+)', webpage, u'title') video_ext = self._search_regex('[.](.+?)$', video_url, u'extension') return [{ 'id': video_id, 'url': video_url, 'uploader': None, 'upload_date': None, 'title': video_title, 'ext': u'flv', }] # try encxml mobj = re.search('var flashvars={(.+?)}', webpage) if mobj is None: raise ExtractorError(u'Unable to extract video') params = {} encxml = '' sec = mobj.group(1) for (a, b) in re.findall('(.+?):\'(.+?)\',?', sec): if not a == '_encxml': params[a] = b else: encxml = compat_urllib_parse.unquote(b) if not params.get('domain'): params['domain'] = 'www.myvideo.de' xmldata_url = '%s?%s' % (encxml, compat_urllib_parse.urlencode(params)) if 'flash_playertype=MTV' in xmldata_url: self._downloader.report_warning(u'avoiding MTV player') xmldata_url = ( 'http://www.myvideo.de/dynamic/get_player_video_xml.php' '?flash_playertype=D&ID=%s&_countlimit=4&autorun=yes' ) % video_id # get enc data enc_data = self._download_webpage(xmldata_url, video_id).split('=')[1] enc_data_b = binascii.unhexlify(enc_data) sk = self.__md5( base64.b64decode(base64.b64decode(GK)) + self.__md5( str(video_id).encode('utf-8') ) ) dec_data = self.__rc4crypt(enc_data_b, sk) # extracting infos self.report_extraction(video_id) video_url = None mobj = re.search('connectionurl=\'(.*?)\'', dec_data) if mobj: video_url = compat_urllib_parse.unquote(mobj.group(1)) if 'myvideo2flash' in video_url: self._downloader.report_warning(u'forcing RTMPT ...') video_url = video_url.replace('rtmpe://', 'rtmpt://') if not video_url: # extract non rtmp videos mobj = re.search('path=\'(http.*?)\' source=\'(.*?)\'', dec_data) if mobj is None: raise ExtractorError(u'unable to extract url') video_url = compat_urllib_parse.unquote(mobj.group(1)) + compat_urllib_parse.unquote(mobj.group(2)) video_file = self._search_regex('source=\'(.*?)\'', dec_data, u'video file') video_file = compat_urllib_parse.unquote(video_file) if not video_file.endswith('f4m'): ppath, prefix = video_file.split('.') video_playpath = '%s:%s' % (prefix, ppath) video_hls_playlist = '' else: video_playpath = '' video_hls_playlist = ( video_filepath + video_file ).replace('.f4m', '.m3u8') video_swfobj = self._search_regex('swfobject.embedSWF\(\'(.+?)\'', webpage, u'swfobj') video_swfobj = compat_urllib_parse.unquote(video_swfobj) video_title = self._html_search_regex("(.*?)

(.*?)

\d+)': \{\s*FILENAME: \"(?P[\w:/\.\?=]+)\"(,\s*MOVIE_NAME: \"(?P[\w:/\.\?=\+-]+)\")?\s*\}," mweb = re.finditer(urlRE, webpage) namesRE = r'(?P.+?)' titles = re.finditer(namesRE, webpage) thumbsRE = r'

' thumbs = re.finditer(thumbsRE, webpage) videos = [] for vid,vtitle,thumb in zip(mweb,titles,thumbs): video_id = vid.group('videoID') title = vtitle.group('videoName') video_url = vid.group('videoURL') video_thumb = thumb.group('thumbnail') if not video_url: raise ExtractorError(u'Cannot find video url for %s' % video_id) info = { 'id':video_id, 'url':video_url, 'ext': 'flv', 'title': unescapeHTML(title), 'thumbnail': video_thumb } videos.append(info) return [self.playlist_result(videos, gameID, game_title)] class UstreamIE(InfoExtractor): _VALID_URL = r'https?://www\.ustream\.tv/recorded/(?P\d+)' IE_NAME = u'ustream' def _real_extract(self, url): m = re.match(self._VALID_URL, url) video_id = m.group('videoID') video_url = u'http://tcdn.ustream.tv/video/%s' % video_id webpage = self._download_webpage(url, video_id) self.report_extraction(video_id) video_title = self._html_search_regex(r'data-title="(?P.+)"', webpage, u'title') uploader = self._html_search_regex(r'data-content-type="channel".*?>(?P<uploader>.*?)</a>', webpage, u'uploader', fatal=False, flags=re.DOTALL) thumbnail = self._html_search_regex(r'<link rel="image_src" href="(?P<thumb>.*?)"', webpage, u'thumbnail', fatal=False) info = { 'id': video_id, 'url': video_url, 'ext': 'flv', 'title': video_title, 'uploader': uploader, 'thumbnail': thumbnail, } return info class WorldStarHipHopIE(InfoExtractor): _VALID_URL = r'https?://(?:www|m)\.worldstar(?:candy|hiphop)\.com/videos/video\.php\?v=(?P<id>.*)' IE_NAME = u'WorldStarHipHop' def _real_extract(self, url): m = re.match(self._VALID_URL, url) video_id = m.group('id') webpage_src = self._download_webpage(url, video_id) video_url = self._search_regex(r'so\.addVariable$"file","(.*?)"$', webpage_src, u'video URL') if 'mp4' in video_url: ext = 'mp4' else: ext = 'flv' video_title = self._html_search_regex(r"<title>(.*)", webpage_src, u'title') # Getting thumbnail and if not thumbnail sets correct title for WSHH candy video. thumbnail = self._html_search_regex(r'rel="image_src" href="(.*)" />', webpage_src, u'thumbnail', fatal=False) if not thumbnail: _title = r"""candytitles.*>(.*)

(?P.+?)

playlists)/(?P\d+)) # We have a playlist | ((?Ptalks)) # We have a simple talk ) (/lang/(.*?))? # The url may contain the language /(?P\w+) # Here goes the name and then ".html" ''' @classmethod def suitable(cls, url): """Receives a URL and returns True if suitable for this IE.""" return re.match(cls._VALID_URL, url, re.VERBOSE) is not None def _real_extract(self, url): m=re.match(self._VALID_URL, url, re.VERBOSE) if m.group('type_talk'): return [self._talk_info(url)] else : playlist_id=m.group('playlist_id') name=m.group('name') self.to_screen(u'Getting info of playlist %s: "%s"' % (playlist_id,name)) return [self._playlist_videos_info(url,name,playlist_id)] def _playlist_videos_info(self,url,name,playlist_id=0): '''Returns the videos of the playlist''' video_RE=r'''

([^<]+)

Please enter your birth date to continue:

(.*?)

(?P.+?)

(.+?)