From a6246c1f5b67e9bab3d6408df6ef0a91528e4680 Mon Sep 17 00:00:00 2001 From: Petar Kukolj Date: Tue, 2 Oct 2018 18:28:46 +0200 Subject: [PATCH] [videofy.me] Fixing extractor to work after site redesign --- youtube_dl/extractor/videofyme.py | 72 ++++++++++++++++++++----------- 1 file changed, 46 insertions(+), 26 deletions(-) diff --git a/youtube_dl/extractor/videofyme.py b/youtube_dl/extractor/videofyme.py index cd3f50a63..288ac16b3 100644 --- a/youtube_dl/extractor/videofyme.py +++ b/youtube_dl/extractor/videofyme.py @@ -1,9 +1,16 @@ from __future__ import unicode_literals +import json + from .common import InfoExtractor from ..utils import ( int_or_none, parse_iso8601, + unescapeHTML, + sanitize_url, + clean_html, + get_element_by_attribute, + js_to_json, ) @@ -11,42 +18,55 @@ class VideofyMeIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.videofy\.me/.+?|p\.videofy\.me/v)/(?P\d+)(&|#|$)' IE_NAME = 'videofy.me' - _TEST = { - 'url': 'http://www.videofy.me/thisisvideofyme/1100701', - 'md5': 'c77d700bdc16ae2e9f3c26019bd96143', + _TESTS = [{ + 'url': 'https://www.videofy.me/v/24582', + 'md5': '1e46140bacdae8959827903cecd054d9', 'info_dict': { - 'id': '1100701', + 'id': '24582', 'ext': 'mp4', - 'title': 'This is VideofyMe', - 'description': '', - 'upload_date': '20130326', - 'timestamp': 1364288959, - 'uploader': 'VideofyMe', - 'uploader_id': 'thisisvideofyme', + 'title': 'The VideofyMe app demo!', + 'description': 'This is VideofyMe.', + 'upload_date': '20120607', + 'timestamp': 1339070671, + 'uploader': 'oskarglauser', + 'uploader_id': 7010, 'view_count': int, - 'likes': int, - 'comment_count': int, }, - } + }, { + 'url': 'https://www.videofy.me/v/2975905', + 'md5': '79ad4498ab14dec72e815a8f85c7641c', + 'info_dict': { + 'id': '2975905', + 'ext': 'mp4', + 'title': 'But', + 'description': '', + 'upload_date': '20180126', + 'timestamp': 1516931131, + 'uploader': 'iamatlien', + 'uploader_id': 1798214, + 'view_count': int, + }, + },] def _real_extract(self, url): video_id = self._match_id(url) - config = self._download_json('http://vf-player-info-loader.herokuapp.com/%s.json' % video_id, video_id)['videoinfo'] + page = self._download_webpage(url, video_id) - video = config.get('video') - blog = config.get('blog', {}) + video_info = json.loads(get_element_by_attribute('type', 'application/ld+json', page)) + + meta = self._download_json('https://www.videofy.me/wp-json/wp/v2/posts/%s' % video_id, video_id) + uploader_id = meta.get('author') + uploader_name = self._download_json('https://www.videofy.me/wp-json/wp/v2/users/%s' % uploader_id, uploader_id, fatal=False).get('name') return { 'id': video_id, - 'title': video['title'], - 'url': video['sources']['source']['url'], - 'thumbnail': video.get('thumb'), - 'description': video.get('description'), - 'timestamp': parse_iso8601(video.get('date')), - 'uploader': blog.get('name'), - 'uploader_id': blog.get('identifier'), - 'view_count': int_or_none(self._search_regex(r'([0-9]+)', video.get('views'), 'view count', fatal=False)), - 'likes': int_or_none(video.get('likes')), - 'comment_count': int_or_none(video.get('nrOfComments')), + 'title': video_info['name'], + 'url': video_info['contentUrl'], + 'thumbnail': video_info.get('thumbnailUrl'), + 'description': clean_html(video_info.get('description')), + 'timestamp': parse_iso8601(video_info.get('uploadDate')), + 'uploader_id': uploader_id, + 'uploader': uploader_name, + 'view_count': int_or_none(video_info.get('interactionCount')), }