From 15a0eb17a27533f9036caccee1b127b0f0162d61 Mon Sep 17 00:00:00 2001 From: oleksis Date: Fri, 1 Feb 2019 19:07:44 -0500 Subject: [PATCH] [picta] Add new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/picta.py | 61 ++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+) create mode 100644 youtube_dl/extractor/picta.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 2ffcffa9e..6fbf57fff 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -859,6 +859,7 @@ from .picarto import ( PicartoIE, PicartoVodIE, ) +from .picta import PictaIE from .piksel import PikselIE from .pinkbike import PinkbikeIE from .pladform import PladformIE diff --git a/youtube_dl/extractor/picta.py b/youtube_dl/extractor/picta.py new file mode 100644 index 000000000..674a6251c --- /dev/null +++ b/youtube_dl/extractor/picta.py @@ -0,0 +1,61 @@ +# coding: utf-8 +from __future__ import unicode_literals +from ..utils import ( + int_or_none, + unified_timestamp, + ExtractorError +) +from .common import InfoExtractor + + +class PictaIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?picta\.cu/medias/(?P[0-9]+)' + _TEST = { + 'url': 'https://www.picta.cu/medias/818', + 'file': 'Orishas - Everyday-818.webm', + 'md5': 'ebd10d5a34f23059e08419aa123aebdb', + 'info_dict': { + 'id': '818', + 'ext': 'webm', + 'title': 'Orishas - Everyday', + 'thumbnail': r're:^https?://.*imagen/img.*\.png$', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + # https://www.picta.cu/api/v1/publicacion/?id_publicacion=818&tipo=publicacion + # https://www.picta.cu/api/v1/publicacion/?format=json&id_publicacion=818&tipo=publicacion + json_url = 'https://www.picta.cu/api/v1/publicacion/?format=json&id_publicacion=' + \ + str(video_id) + '&tipo=publicacion' + # JSON MetaFields + meta = self._download_json(json_url, video_id) + # Fields + title = meta.get('results')[0].get('nombre') or self._search_regex( + r']+class="post-video-title"[^>]*>([^<]+)', webpage, 'title') + description = meta.get('results')[0].get('descripcion') + uploader = meta.get('results')[0].get('usuario') + add_date = meta.get('results')[0].get('fecha_creacion') + timestamp = int_or_none(unified_timestamp(add_date)) + thumbnail = meta.get('results')[0].get('url_imagen') + manifest_url = meta.get('results')[0].get('url_manifiesto') + # Formats + formats = [] + # MPD manifest + if manifest_url: + formats.extend(self._extract_mpd_formats(manifest_url, video_id)) + if not formats: + raise ExtractorError('Cannot find video formats') + + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'description': description, + 'uploader': uploader, + 'timestamp': timestamp, + 'thumbnail': thumbnail, + }