# coding: utf-8 from __future__ import unicode_literals import re import time from ..utils import ( ExtractorError, decode_packed_codes, get_element_by_class, get_element_by_id, parse_filesize, strip_or_none, ) from .common import InfoExtractor class Mp4UploadIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?mp4upload\.com/(?:embed-)?(?P[a-z\d]+)' _TESTS = [{ 'url': 'http://www.mp4upload.com/e52ycvdl4x29', 'md5': '09780a74b0de79ada5f9a8955f0704fc', 'info_dict': { 'id': 'e52ycvdl4x29', 'ext': 'mp4', 'title': '橋本潮 - ロマンティックあげるよ.mp4', 'timestamp': 1467471956, 'thumbnail': r're:^https?://.*\.jpg$', 'vcodec': 'ffh264', 'width': 454, 'height': 360, 'fps': 29.970, 'acodec': 'ffaac', 'asr': 44100, 'abr': 96, # Something adds this to the _real_extract return value, and the test runner expects it present. # Should probably be autocalculated from the timestamp instead, just like _real_extract. 'upload_date': '20160702', }, }, { 'url': 'https://www.mp4upload.com/embed-e52ycvdl4x29.html', 'only_matching': True, }] def _real_extract(self, url): video_id = self._match_id(url) page_url = 'https://www.mp4upload.com/%s' % video_id embed_url = 'https://www.mp4upload.com/embed-%s.html' % video_id webpage = self._download_webpage(page_url, video_id) if 'File not found' in webpage or 'File Not Found' in webpage: raise ExtractorError('File not found', expected=True, video_id=video_id) title = strip_or_none(get_element_by_class('dfilename', webpage)) if not title: raise ExtractorError('Title not found', expected=True, video_id=video_id) info_dict = { 'title': title, 'id': video_id, } file_info = re.findall( r'>(?P