Merge 6a7dee2451
into d65d89183f
This commit is contained in:
commit
10fa3761ee
|
@ -10,13 +10,13 @@ build/
|
|||
dist/
|
||||
MANIFEST
|
||||
README.txt
|
||||
youtube-dl.1
|
||||
youtube-dl.bash-completion
|
||||
youtube-dl.fish
|
||||
youtube_dl/extractor/lazy_extractors.py
|
||||
youtube-dl
|
||||
youtube-dl.exe
|
||||
youtube-dl.tar.gz
|
||||
picta-dl.1
|
||||
picta-dl.bash-completion
|
||||
picta-dl.fish
|
||||
picta_dl/extractor/lazy_extractors.py
|
||||
picta-dl
|
||||
picta-dl.exe
|
||||
picta-dl.tar.gz
|
||||
.coverage
|
||||
cover/
|
||||
updates_key.pem
|
||||
|
@ -40,7 +40,7 @@ updates_key.pem
|
|||
*.swp
|
||||
test/local_parameters.json
|
||||
.tox
|
||||
youtube-dl.zsh
|
||||
picta-dl.zsh
|
||||
|
||||
# IntelliJ related files
|
||||
.idea
|
||||
|
@ -48,6 +48,9 @@ youtube-dl.zsh
|
|||
|
||||
tmp/
|
||||
venv/
|
||||
.env
|
||||
|
||||
*.spec
|
||||
|
||||
# VS Code related files
|
||||
.vscode
|
||||
.vscode
|
|
@ -2125,8 +2125,8 @@ class InfoExtractor(object):
|
|||
|
||||
def extract_Initialization(source):
|
||||
initialization = source.find(_add_ns('Initialization'))
|
||||
if initialization is not None:
|
||||
ms_info['initialization_url'] = initialization.attrib['sourceURL']
|
||||
if initialization is not None and initialization.get('sourceURL') is not None:
|
||||
ms_info['initialization_url'] = initialization.get('sourceURL')
|
||||
|
||||
segment_list = element.find(_add_ns('SegmentList'))
|
||||
if segment_list is not None:
|
||||
|
@ -2134,7 +2134,9 @@ class InfoExtractor(object):
|
|||
extract_Initialization(segment_list)
|
||||
segment_urls_e = segment_list.findall(_add_ns('SegmentURL'))
|
||||
if segment_urls_e:
|
||||
ms_info['segment_urls'] = [segment.attrib['media'] for segment in segment_urls_e]
|
||||
segment_urls = [segment.get('media') for segment in segment_urls_e if segment.get('media') is not None]
|
||||
if len(segment_urls) > 0:
|
||||
ms_info['segment_urls'] = segment_urls
|
||||
else:
|
||||
segment_template = element.find(_add_ns('SegmentTemplate'))
|
||||
if segment_template is not None:
|
||||
|
|
|
@ -840,6 +840,7 @@ from .picarto import (
|
|||
PicartoIE,
|
||||
PicartoVodIE,
|
||||
)
|
||||
from .picta import PictaIE, PictaChannelPlaylistIE, PictaUserPlaylistIE
|
||||
from .piksel import PikselIE
|
||||
from .pinkbike import PinkbikeIE
|
||||
from .pladform import PladformIE
|
||||
|
|
|
@ -0,0 +1,422 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from base64 import b64encode
|
||||
import re
|
||||
|
||||
from ..compat import compat_str, compat_HTTPError
|
||||
from ..utils import int_or_none, unified_timestamp, try_get, ExtractorError
|
||||
from .common import InfoExtractor
|
||||
|
||||
ROOT_BASE_URL = "https://www.picta.cu/"
|
||||
API_BASE_URL = "https://api.picta.cu/api/v2/"
|
||||
|
||||
|
||||
# noinspection PyAbstractClass
|
||||
class PictaBaseIE(InfoExtractor):
|
||||
|
||||
@staticmethod
|
||||
def _extract_video(video, video_id=None, require_title=True):
|
||||
if len(video["results"]) == 0:
|
||||
raise ExtractorError("Cannot find video!")
|
||||
|
||||
title = (
|
||||
video["results"][0]["nombre"]
|
||||
if require_title
|
||||
else video.get("results")[0].get("nombre")
|
||||
)
|
||||
description = try_get(
|
||||
video, lambda x: x["results"][0]["descripcion"], compat_str
|
||||
)
|
||||
slug_url = try_get(
|
||||
video, lambda x: x["results"][0]["slug_url"], compat_str
|
||||
)
|
||||
uploader = try_get(
|
||||
video, lambda x: x["results"][0]["usuario"]["username"], compat_str
|
||||
)
|
||||
add_date = try_get(video, lambda x: x["results"][0]["fecha_creacion"])
|
||||
timestamp = int_or_none(unified_timestamp(add_date))
|
||||
thumbnail = try_get(video, lambda x: x["results"][0]["url_imagen"])
|
||||
manifest_url = try_get(video, lambda x: x["results"][0]["url_manifiesto"])
|
||||
category = try_get(
|
||||
video, lambda x: x["results"][0]["categoria"]["tipologia"]["nombre"], compat_str
|
||||
)
|
||||
playlist_channel = (
|
||||
video["results"][0]["lista_reproduccion_canal"][0]
|
||||
if len(video["results"][0]["lista_reproduccion_canal"]) > 0
|
||||
else None
|
||||
)
|
||||
subtitle_url = try_get(video, lambda x: x["results"][0]["url_subtitulo"])
|
||||
|
||||
return {
|
||||
"id": try_get(video, lambda x: x["results"][0]["id"], compat_str) or video_id,
|
||||
"title": title,
|
||||
"slug_url": slug_url,
|
||||
"description": description,
|
||||
"thumbnail": thumbnail,
|
||||
"uploader": uploader,
|
||||
"timestamp": timestamp,
|
||||
"category": [category] if category else None,
|
||||
"manifest_url": manifest_url,
|
||||
"playlist_channel": playlist_channel,
|
||||
"subtitle_url": subtitle_url,
|
||||
}
|
||||
|
||||
|
||||
# noinspection PyAbstractClass
|
||||
class PictaIE(PictaBaseIE):
|
||||
IE_NAME = "picta"
|
||||
IE_DESC = "Picta videos"
|
||||
_VALID_URL = r"https?://(?:www\.)?picta\.cu/(?:medias|movie|embed)/(?:\?v=)?(?P<id>[\da-z-]+)" \
|
||||
r"(?:\?playlist=(?P<playlist_id>[\da-z-]+))?"
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
"url": "https://www.picta.cu/medias/orishas-everyday-2019-01-16-16-36-42-443003",
|
||||
"file": "Orishas - Everyday-orishas-everyday-2019-01-16-16-36-42-443003.webm",
|
||||
"md5": "7ffdeb0043500c4bb660c04e74e90f7a",
|
||||
"info_dict": {
|
||||
"id": "818",
|
||||
"slug_url": "orishas-everyday-2019-01-16-16-36-42-443003",
|
||||
"ext": "webm",
|
||||
"title": "Orishas - Everyday",
|
||||
"thumbnail": r"re:^https?://.*imagen/img.*\.png$",
|
||||
"upload_date": "20190116",
|
||||
"description": "Orishas - Everyday (Video Oficial)",
|
||||
"uploader": "admin",
|
||||
"timestamp": 1547656602,
|
||||
},
|
||||
"params": {"format": "4"},
|
||||
},
|
||||
{
|
||||
"url": ("https://www.picta.cu/embed/"
|
||||
"palmiche-galeno-tercer-lugar-torneo-virtual-robotica-2020-05-21-16-15-31-431895"),
|
||||
"file": ("Palmiche Galeno tercer lugar en torneo virtual de "
|
||||
"robótica-palmiche-galeno-tercer-lugar-torneo-virtual-robotica-2020-05-21-16-15-31-431895.mp4"),
|
||||
"md5": "6031b7a3add2eade9c5bef7ecf5d4b02",
|
||||
"info_dict": {
|
||||
"id": "3500",
|
||||
"slug_url": "palmiche-galeno-tercer-lugar-torneo-virtual-robotica-2020-05-21-16-15-31-431895",
|
||||
"ext": "mp4",
|
||||
"title": "Palmiche Galeno tercer lugar en torneo virtual de robótica",
|
||||
"thumbnail": r"re:^https?://.*imagen/img.*\.jpeg$",
|
||||
"upload_date": "20200521",
|
||||
"description": ("En esta emisión:\r\n"
|
||||
"Iniciará en La Habana nuevo método para medir el consumo "
|
||||
"eléctrico | https://bit.ly/jtlecturacee\r\n"
|
||||
"GICAcovid: nueva aplicación web para los centros de "
|
||||
"aislamiento | https://bit.ly/jtgicacovid\r\n"
|
||||
"Obtuvo Palmiche tercer lugar en la primera competencia "
|
||||
"virtual de robótica | https://bit.ly/jtpalmichegaleno\r\n"
|
||||
"\r\n"
|
||||
"Síguenos en:\r\n"
|
||||
"Facebook: http://www.facebook.com/JuventudTecnicaCuba\r\n"
|
||||
"Twitter e Instagram: @juventudtecnica\r\n"
|
||||
"Telegram: http://t.me/juventudtecnica"),
|
||||
"uploader": "ernestoguerra21",
|
||||
"timestamp": 1590077731,
|
||||
},
|
||||
},
|
||||
{
|
||||
"url": "https://www.picta.cu/movie/phineas-ferb-pelicula-candace-universo-2020-08-28-21-00-32-857026",
|
||||
"only_matching": True,
|
||||
},
|
||||
{"url": "https://www.picta.cu/embed/?v=818", "only_matching": True},
|
||||
{
|
||||
"url": ("https://www.picta.cu/embed/"
|
||||
"palmiche-galeno-tercer-lugar-torneo-virtual-robotica-2020-05-21-16-15-31-431895"),
|
||||
"only_matching": True,
|
||||
},
|
||||
]
|
||||
|
||||
_LANGUAGES_CODES = ['es']
|
||||
_LANG_ES = _LANGUAGES_CODES[0]
|
||||
|
||||
_SUBTITLE_FORMATS = ('srt', )
|
||||
|
||||
def _real_initialize(self):
|
||||
self.playlist_id = None
|
||||
|
||||
@classmethod
|
||||
def _match_playlist_id(cls, url):
|
||||
if '_VALID_URL_RE' not in cls.__dict__:
|
||||
cls._VALID_URL_RE = re.compile(cls._VALID_URL)
|
||||
m = cls._VALID_URL_RE.match(url)
|
||||
assert m
|
||||
return m.group('playlist_id')
|
||||
|
||||
def _get_subtitles(self, video):
|
||||
sub_lang_list = {}
|
||||
lang = self._LANG_ES
|
||||
|
||||
sub_url = video.get('subtitle_url', '')
|
||||
|
||||
if sub_url:
|
||||
sub_formats = []
|
||||
for ext in self._SUBTITLE_FORMATS:
|
||||
sub_formats.append({
|
||||
'url': sub_url,
|
||||
'ext': ext,
|
||||
})
|
||||
sub_lang_list[lang] = sub_formats
|
||||
if not sub_lang_list:
|
||||
self._downloader.report_warning('video doesn\'t have subtitles')
|
||||
return {}
|
||||
return sub_lang_list
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = None
|
||||
video_id = self._match_id(url)
|
||||
json_url = API_BASE_URL + "publicacion/?format=json&slug_url_raw=%s" % video_id
|
||||
video = self._download_json(json_url, video_id, "Downloading video JSON")
|
||||
info = self._extract_video(video, video_id)
|
||||
if (
|
||||
info["playlist_channel"]
|
||||
and self.playlist_id is None
|
||||
and self._match_playlist_id(url) is None
|
||||
):
|
||||
playlist_id = info["playlist_channel"].get("id")
|
||||
self.playlist_id = playlist_id
|
||||
# Download Playlist (--yes-playlist) in first place
|
||||
if (
|
||||
self.playlist_id is None
|
||||
and self._match_playlist_id(url)
|
||||
and not self._downloader.params.get('noplaylist')
|
||||
):
|
||||
playlist_id = compat_str(self._match_playlist_id(url))
|
||||
self.playlist_id = playlist_id
|
||||
self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % playlist_id)
|
||||
return self.url_result(
|
||||
ROOT_BASE_URL + "medias/" + video_id + "?" + "playlist=" + playlist_id,
|
||||
PictaUserPlaylistIE.ie_key(),
|
||||
playlist_id
|
||||
)
|
||||
elif playlist_id and not self._downloader.params.get('noplaylist'):
|
||||
playlist_id = compat_str(playlist_id)
|
||||
self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % playlist_id)
|
||||
return self.url_result(
|
||||
ROOT_BASE_URL + "medias/" + video_id + "?" + "playlist=" + playlist_id,
|
||||
PictaChannelPlaylistIE.ie_key(),
|
||||
playlist_id
|
||||
)
|
||||
elif self._downloader.params.get('noplaylist'):
|
||||
self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
|
||||
|
||||
formats = []
|
||||
# MPD manifest
|
||||
if info.get("manifest_url"):
|
||||
formats.extend(
|
||||
self._extract_mpd_formats(info.get("manifest_url"), video_id)
|
||||
)
|
||||
|
||||
if not formats:
|
||||
raise ExtractorError("Cannot find video formats")
|
||||
|
||||
self._sort_formats(formats)
|
||||
info["formats"] = formats
|
||||
|
||||
# subtitles
|
||||
video_subtitles = self.extract_subtitles(info)
|
||||
info["subtitles"] = video_subtitles
|
||||
return info
|
||||
|
||||
|
||||
# noinspection PyAbstractClass
|
||||
class PictaEmbedIE(InfoExtractor):
|
||||
IE_NAME = "picta:embed"
|
||||
IE_DESC = "Picta embedded videos"
|
||||
_VALID_URL = r"https?://www\.picta\.cu/embed/(?:\?v=)?(?P<id>[\d]+)"
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
"url": "https://www.picta.cu/embed/?v=818",
|
||||
"file": "Orishas - Everyday-orishas-everyday-2019-01-16-16-36-42-443003.webm",
|
||||
"md5": "7ffdeb0043500c4bb660c04e74e90f7a",
|
||||
"info_dict": {
|
||||
"id": "818",
|
||||
"slug_url": "orishas-everyday-2019-01-16-16-36-42-443003",
|
||||
"ext": "webm",
|
||||
"title": "Orishas - Everyday",
|
||||
"thumbnail": r"re:^https?://.*imagen/img.*\.png$",
|
||||
"upload_date": "20190116",
|
||||
"description": "Orishas - Everyday (Video Oficial)",
|
||||
"uploader": "admin",
|
||||
"timestamp": 1547656602,
|
||||
},
|
||||
"params": {"format": "4"},
|
||||
},
|
||||
{
|
||||
"url": ("https://www.picta.cu/embed/"
|
||||
"palmiche-galeno-tercer-lugar-torneo-virtual-robotica-2020-05-21-16-15-31-431895"),
|
||||
"file": ("Palmiche Galeno tercer lugar en torneo virtual de "
|
||||
"robótica-palmiche-galeno-tercer-lugar-torneo-virtual-robotica-2020-05-21-16-15-31-431895.mp4"),
|
||||
"md5": "6031b7a3add2eade9c5bef7ecf5d4b02",
|
||||
"info_dict": {
|
||||
"id": "3500",
|
||||
"slug_url": "palmiche-galeno-tercer-lugar-torneo-virtual-robotica-2020-05-21-16-15-31-431895",
|
||||
"ext": "mp4",
|
||||
"title": "Palmiche Galeno tercer lugar en torneo virtual de robótica",
|
||||
"thumbnail": r"re:^https?://.*imagen/img.*\.jpeg$",
|
||||
"upload_date": "20200521",
|
||||
"description": ("En esta emisión:\r\n"
|
||||
"Iniciará en La Habana nuevo método para medir el consumo "
|
||||
"eléctrico | https://bit.ly/jtlecturacee\r\n"
|
||||
"GICAcovid: nueva aplicación web para los centros de "
|
||||
"aislamiento | https://bit.ly/jtgicacovid\r\n"
|
||||
"Obtuvo Palmiche tercer lugar en la primera competencia "
|
||||
"virtual de robótica | https://bit.ly/jtpalmichegaleno\r\n"
|
||||
"\r\n"
|
||||
"Síguenos en:\r\n"
|
||||
"Facebook: http://www.facebook.com/JuventudTecnicaCuba\r\n"
|
||||
"Twitter e Instagram: @juventudtecnica\r\n"
|
||||
"Telegram: http://t.me/juventudtecnica"),
|
||||
"uploader": "ernestoguerra21",
|
||||
"timestamp": 1590077731,
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
return self.url_result(url, PictaIE.ie_key())
|
||||
|
||||
|
||||
# noinspection PyAbstractClass
|
||||
class PictaPlaylistIE(InfoExtractor):
|
||||
API_PLAYLIST_ENDPOINT = API_BASE_URL + "lista_reproduccion_canal/"
|
||||
IE_NAME = "picta:playlist"
|
||||
IE_DESC = "Picta playlist"
|
||||
_VALID_URL = r"https?://(?:www\.)?picta\.cu/(?:medias|movie|embed)/(?P<id>[\da-z-]+)" \
|
||||
r"\?playlist=(?P<playlist_id>[\da-z-]+)$"
|
||||
|
||||
_NETRC_MACHINE = "picta"
|
||||
|
||||
@classmethod
|
||||
def _match_playlist_id(cls, url):
|
||||
if '_VALID_URL_RE' not in cls.__dict__:
|
||||
cls._VALID_URL_RE = re.compile(cls._VALID_URL)
|
||||
m = cls._VALID_URL_RE.match(url)
|
||||
assert m
|
||||
return m.group('playlist_id')
|
||||
|
||||
def _set_auth_basic(self):
|
||||
header = {}
|
||||
username, password = self._get_login_info()
|
||||
if username is None:
|
||||
return header
|
||||
|
||||
if isinstance(username, str):
|
||||
username = username.encode('latin1')
|
||||
|
||||
if isinstance(password, str):
|
||||
password = password.encode('latin1')
|
||||
|
||||
authstr = "Basic " + compat_str(b64encode(b":".join((username, password))).decode("utf-8"))
|
||||
|
||||
header["Authorization"] = authstr
|
||||
return header
|
||||
|
||||
def _extract_playlist(self, playlist, playlist_id=None, require_title=True):
|
||||
if len(playlist["results"]) == 0:
|
||||
raise ExtractorError("Cannot find playlist!")
|
||||
|
||||
title = (
|
||||
playlist["results"][0]["nombre"]
|
||||
if require_title
|
||||
else playlist.get("results")[0].get("nombre")
|
||||
)
|
||||
thumbnail = try_get(playlist, lambda x: x["results"][0].get("url_imagen"))
|
||||
entries = try_get(playlist, lambda x: x["results"][0]["publicaciones"])
|
||||
|
||||
return {
|
||||
"id": try_get(playlist, lambda x: x["results"][0]["id"], compat_str) or playlist_id,
|
||||
"title": title,
|
||||
"thumbnail": thumbnail,
|
||||
"entries": entries,
|
||||
}
|
||||
|
||||
def _entries(self, playlist_id):
|
||||
json_url = self.API_PLAYLIST_ENDPOINT + "?format=json&id=%s" % playlist_id
|
||||
headers = self._set_auth_basic()
|
||||
playlist = {}
|
||||
try:
|
||||
playlist = self._download_json(json_url, playlist_id, "Downloading playlist JSON", headers=headers)
|
||||
assert playlist.get("count", 0) >= 1
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (403,):
|
||||
raise self.raise_login_required(
|
||||
msg='This playlist is only available for registered users. Check your username and password'
|
||||
)
|
||||
except AssertionError:
|
||||
raise ExtractorError("Playlist no exists!")
|
||||
|
||||
info_playlist = self._extract_playlist(playlist, playlist_id)
|
||||
playlist_entries = info_playlist.get("entries")
|
||||
|
||||
for video in playlist_entries:
|
||||
video_id = video.get("id")
|
||||
video_url = ROOT_BASE_URL + "medias/" + video.get("slug_url") + "?" + "playlist=" + playlist_id
|
||||
yield self.url_result(video_url, PictaIE.ie_key(), video_id)
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_playlist_id(url)
|
||||
entries = self._entries(playlist_id)
|
||||
return self.playlist_result(entries, playlist_id)
|
||||
|
||||
|
||||
# noinspection PyAbstractClass
|
||||
class PictaChannelPlaylistIE(PictaPlaylistIE):
|
||||
IE_NAME = "picta:channel:playlist"
|
||||
IE_DESC = "Picta channel playlist"
|
||||
|
||||
_TEST_CHANNEL = {
|
||||
"url": ("https://www.picta.cu/medias/"
|
||||
"201-paradigma-devops-implementacion-tecnomatica-2020-07-05-22-44-41-299736"),
|
||||
"info_dict": {
|
||||
"id": 4441,
|
||||
"title": "D\u00eda 2: Telecomunicaciones, Redes y Ciberseguridad",
|
||||
"thumbnail": r"re:^https?://.*imagen/img.*\.jpeg$",
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
# noinspection PyAbstractClass
|
||||
class PictaUserPlaylistIE(PictaPlaylistIE, PictaBaseIE):
|
||||
API_PLAYLIST_ENDPOINT = API_BASE_URL + "lista_reproduccion/"
|
||||
IE_NAME = "picta:user:playlist"
|
||||
IE_DESC = "Picta user playlist"
|
||||
|
||||
_TEST_USER = {
|
||||
"url": "https://www.picta.cu/medias/fundamento-big-data-2020-08-09-19-47-15-230297?playlist=129",
|
||||
"info_dict": {
|
||||
"id": 129,
|
||||
"title": "picta-dl",
|
||||
"thumbnail": None,
|
||||
},
|
||||
}
|
||||
|
||||
def _extract_playlist(self, playlist, playlist_id=None, require_title=True):
|
||||
if len(playlist["results"]) == 0:
|
||||
raise ExtractorError("Cannot find playlist!")
|
||||
|
||||
title = (
|
||||
playlist["results"][0]["nombre"]
|
||||
if require_title
|
||||
else playlist.get("results")[0].get("nombre")
|
||||
)
|
||||
thumbnail = None
|
||||
entries = try_get(playlist, lambda x: x["results"][0]["publicacion"])
|
||||
|
||||
# Playlist User need update slug_url video
|
||||
for entry in entries:
|
||||
video_id = entry.get("id")
|
||||
json_url = API_BASE_URL + "publicacion/?format=json&id=%s" % video_id
|
||||
video = self._download_json(json_url, video_id, "Downloading video JSON")
|
||||
info = self._extract_video(video, video_id)
|
||||
entry["slug_url"] = info.get("slug_url")
|
||||
|
||||
return {
|
||||
"id": try_get(playlist, lambda x: x["results"][0]["id"], compat_str) or playlist_id,
|
||||
"title": title,
|
||||
"thumbnail": thumbnail,
|
||||
"entries": entries,
|
||||
}
|
Loading…
Reference in New Issue