diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index c6f8a785a..9253e0330 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -631,6 +631,7 @@ from .mixcloud import ( ) from .mlb import MLBIE from .mnet import MnetIE +from .mp4upload import Mp4UploadIE from .moevideo import MoeVideoIE from .mofosex import MofosexIE from .mojvideo import MojvideoIE diff --git a/youtube_dl/extractor/mp4upload.py b/youtube_dl/extractor/mp4upload.py new file mode 100644 index 000000000..3a8fde02a --- /dev/null +++ b/youtube_dl/extractor/mp4upload.py @@ -0,0 +1,136 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re +import time + +from ..utils import ( + ExtractorError, + decode_packed_codes, + get_element_by_class, + get_element_by_id, + js_to_json, + parse_filesize, + strip_or_none, +) +from .common import InfoExtractor + + +class Mp4UploadIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?mp4upload\.com/(?:embed-)?(?P[a-z\d]+)' + _TESTS = [{ + 'url': 'http://www.mp4upload.com/e52ycvdl4x29', + 'md5': '09780a74b0de79ada5f9a8955f0704fc', + + 'info_dict': { + 'id': 'e52ycvdl4x29', + 'ext': 'mp4', + 'title': '橋本潮 - ロマンティックあげるよ.mp4', + 'timestamp': 1467471956, + 'thumbnail': r're:^https?://.*\.jpg$', + + 'vcodec': 'ffh264', + 'width': 454, + 'height': 360, + 'fps': 29.970, + + 'acodec': 'ffaac', + 'asr': 44100, + 'abr': 96, + + # Something adds this to the _real_extract return value, and the test runner expects it present. + # Should probably be autocalculated from the timestamp instead, just like _real_extract. + 'upload_date': '20160702', + }, + }, { + 'url': 'https://www.mp4upload.com/embed-e52ycvdl4x29.html', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + page_url = 'https://www.mp4upload.com/%s' % video_id + embed_url = 'https://www.mp4upload.com/embed-%s.html' % video_id + + webpage = self._download_webpage(page_url, video_id) + if 'File not found' in webpage or 'File Not Found' in webpage: + raise ExtractorError('File not found', expected=True, video_id=video_id) + + title = strip_or_none(get_element_by_class('dfilename', webpage)) + if not title: + raise ExtractorError('Title not found', expected=True, video_id=video_id) + + info_dict = { + 'title': title, + 'id': video_id, + } + + file_info = re.findall( + r'">(?P