1
0
mirror of https://codeberg.org/polarisfm/youtube-dl synced 2024-11-26 18:34:32 +01:00

Add PictaPlaylist extractor

This commit is contained in:
oleksis 2020-07-11 16:55:56 -04:00
parent a8b5bc2e2f
commit 1f034effd8
3 changed files with 134 additions and 46 deletions

19
.gitignore vendored
View File

@ -10,13 +10,13 @@ build/
dist/ dist/
MANIFEST MANIFEST
README.txt README.txt
youtube-dl.1 picta-dl.1
youtube-dl.bash-completion picta-dl.bash-completion
youtube-dl.fish picta-dl.fish
youtube_dl/extractor/lazy_extractors.py picta_dl/extractor/lazy_extractors.py
youtube-dl picta-dl
youtube-dl.exe picta-dl.exe
youtube-dl.tar.gz picta-dl.tar.gz
.coverage .coverage
cover/ cover/
updates_key.pem updates_key.pem
@ -40,7 +40,7 @@ updates_key.pem
*.swp *.swp
test/local_parameters.json test/local_parameters.json
.tox .tox
youtube-dl.zsh picta-dl.zsh
# IntelliJ related files # IntelliJ related files
.idea .idea
@ -48,6 +48,9 @@ youtube-dl.zsh
tmp/ tmp/
venv/ venv/
.env
*.spec
# VS Code related files # VS Code related files
.vscode .vscode

View File

@ -840,7 +840,7 @@ from .picarto import (
PicartoIE, PicartoIE,
PicartoVodIE, PicartoVodIE,
) )
from .picta import PictaIE from .picta import PictaIE, PictaPlaylistIE
from .piksel import PikselIE from .piksel import PikselIE
from .pinkbike import PinkbikeIE from .pinkbike import PinkbikeIE
from .pladform import PladformIE from .pladform import PladformIE

View File

@ -4,9 +4,12 @@ from ..compat import compat_str
from ..utils import int_or_none, unified_timestamp, try_get, ExtractorError from ..utils import int_or_none, unified_timestamp, try_get, ExtractorError
from .common import InfoExtractor from .common import InfoExtractor
ROOT_BASE_URL = "https://www.picta.cu/"
API_BASE_URL = "https://api.picta.cu/api/v2/"
# noinspection PyAbstractClass
class PictaBaseIE(InfoExtractor): class PictaBaseIE(InfoExtractor):
API_BASE_URL = "https://api.picta.cu/api/v2/"
@staticmethod @staticmethod
def _extract_video(video, video_id=None, require_title=True): def _extract_video(video, video_id=None, require_title=True):
@ -29,12 +32,16 @@ class PictaBaseIE(InfoExtractor):
thumbnail = try_get(video, lambda x: x["results"][0]["url_imagen"]) thumbnail = try_get(video, lambda x: x["results"][0]["url_imagen"])
manifest_url = try_get(video, lambda x: x["results"][0]["url_manifiesto"]) manifest_url = try_get(video, lambda x: x["results"][0]["url_manifiesto"])
category = try_get( category = try_get(
video, lambda x: x["results"][0]["canal"]["nombre"], compat_str video, lambda x: x["results"][0]["categoria"]["tipologia"]["nombre"], compat_str
)
playlist_channel = (
video["results"][0]["lista_reproduccion_canal"][0]
if len(video["results"][0]["lista_reproduccion_canal"]) > 0
else None
) )
return { return {
"id": try_get(video, lambda x: x["results"][0]["id"], compat_str) "id": try_get(video, lambda x: x["results"][0]["id"], compat_str) or video_id,
or video_id,
"title": title, "title": title,
"description": description, "description": description,
"thumbnail": thumbnail, "thumbnail": thumbnail,
@ -42,9 +49,11 @@ class PictaBaseIE(InfoExtractor):
"timestamp": timestamp, "timestamp": timestamp,
"category": [category] if category else None, "category": [category] if category else None,
"manifest_url": manifest_url, "manifest_url": manifest_url,
"playlist_channel": playlist_channel
} }
# noinspection PyAbstractClass
class PictaIE(PictaBaseIE): class PictaIE(PictaBaseIE):
IE_NAME = "picta" IE_NAME = "picta"
IE_DESC = "Picta videos" IE_DESC = "Picta videos"
@ -70,8 +79,10 @@ class PictaIE(PictaBaseIE):
"params": {"format": "4"}, "params": {"format": "4"},
}, },
{ {
"url": "https://www.picta.cu/medias/palmiche-galeno-tercer-lugar-torneo-virtual-robotica-2020-05-21-16-15-31-431895", "url": ("https://www.picta.cu/embed/"
"file": "Palmiche Galeno tercer lugar en torneo virtual de robótica-palmiche-galeno-tercer-lugar-torneo-virtual-robotica-2020-05-21-16-15-31-431895.mp4", "palmiche-galeno-tercer-lugar-torneo-virtual-robotica-2020-05-21-16-15-31-431895"),
"file": ("Palmiche Galeno tercer lugar en torneo virtual de "
"robótica-palmiche-galeno-tercer-lugar-torneo-virtual-robotica-2020-05-21-16-15-31-431895.mp4"),
"md5": "6031b7a3add2eade9c5bef7ecf5d4b02", "md5": "6031b7a3add2eade9c5bef7ecf5d4b02",
"info_dict": { "info_dict": {
"id": "palmiche-galeno-tercer-lugar-torneo-virtual-robotica-2020-05-21-16-15-31-431895", "id": "palmiche-galeno-tercer-lugar-torneo-virtual-robotica-2020-05-21-16-15-31-431895",
@ -79,36 +90,52 @@ class PictaIE(PictaBaseIE):
"title": "Palmiche Galeno tercer lugar en torneo virtual de robótica", "title": "Palmiche Galeno tercer lugar en torneo virtual de robótica",
"thumbnail": r"re:^https?://.*imagen/img.*\.jpeg$", "thumbnail": r"re:^https?://.*imagen/img.*\.jpeg$",
"upload_date": "20200521", "upload_date": "20200521",
"description": "En esta emisión:\r\n" "description": ("En esta emisión:\r\n"
"Iniciará en La Habana nuevo método para medir el consumo " "Iniciará en La Habana nuevo método para medir el consumo "
"eléctrico | https://bit.ly/jtlecturacee\r\n" "eléctrico | https://bit.ly/jtlecturacee\r\n"
"GICAcovid: nueva aplicación web para los centros de " "GICAcovid: nueva aplicación web para los centros de "
"aislamiento | https://bit.ly/jtgicacovid\r\n" "aislamiento | https://bit.ly/jtgicacovid\r\n"
"Obtuvo Palmiche tercer lugar en la primera competencia " "Obtuvo Palmiche tercer lugar en la primera competencia "
"virtual de robótica | https://bit.ly/jtpalmichegaleno\r\n" "virtual de robótica | https://bit.ly/jtpalmichegaleno\r\n"
"\r\n" "\r\n"
"Síguenos en:\r\n" "Síguenos en:\r\n"
"Facebook: http://www.facebook.com/JuventudTecnicaCuba\r\n" "Facebook: http://www.facebook.com/JuventudTecnicaCuba\r\n"
"Twitter e Instagram: @juventudtecnica\r\n" "Twitter e Instagram: @juventudtecnica\r\n"
"Telegram: http://t.me/juventudtecnica", "Telegram: http://t.me/juventudtecnica"),
"uploader": "ernestoguerra21", "uploader": "ernestoguerra21",
"timestamp": 1590077731, "timestamp": 1590077731,
}, },
}, },
{"url": "https://www.picta.cu/embed/?v=818", "only_matching": True}, {"url": "https://www.picta.cu/embed/?v=818", "only_matching": True},
{ {
"url": "https://www.picta.cu/embed/palmiche-galeno-tercer-lugar-torneo-virtual-robotica-2020-05-21-16-15-31-431895", "url": ("https://www.picta.cu/embed/"
"palmiche-galeno-tercer-lugar-torneo-virtual-robotica-2020-05-21-16-15-31-431895"),
"only_matching": True, "only_matching": True,
}, },
] ]
def _real_initialize(self):
self.playlist_id = None
def _real_extract(self, url): def _real_extract(self, url):
playlist_id = None
video_id = self._match_id(url) video_id = self._match_id(url)
json_url = ( json_url = API_BASE_URL + "publicacion/?format=json&slug_url_raw=%s" % video_id
self.API_BASE_URL + "publicacion/?format=json&slug_url_raw=%s" % video_id
)
video = self._download_json(json_url, video_id, "Downloading video JSON") video = self._download_json(json_url, video_id, "Downloading video JSON")
info = self._extract_video(video, video_id) info = self._extract_video(video, video_id)
if info["playlist_channel"] and self.playlist_id is None:
playlist_id = info["playlist_channel"].get("id")
self.playlist_id = playlist_id
# Download Playlist (--yes-playlist) in first place
if playlist_id and not self._downloader.params.get('noplaylist'):
self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % playlist_id)
return self.url_result(
ROOT_BASE_URL + "playlist/" + str(playlist_id),
PictaPlaylistIE.ie_key(),
playlist_id
)
elif self._downloader.params.get('noplaylist'):
self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
formats = [] formats = []
# MPD manifest # MPD manifest
@ -126,6 +153,7 @@ class PictaIE(PictaBaseIE):
return info return info
# noinspection PyAbstractClass
class PictaEmbedIE(InfoExtractor): class PictaEmbedIE(InfoExtractor):
IE_NAME = "picta:embed" IE_NAME = "picta:embed"
IE_DESC = "Picta embedded videos" IE_DESC = "Picta embedded videos"
@ -149,8 +177,10 @@ class PictaEmbedIE(InfoExtractor):
"params": {"format": "4"}, "params": {"format": "4"},
}, },
{ {
"url": "https://www.picta.cu/embed/palmiche-galeno-tercer-lugar-torneo-virtual-robotica-2020-05-21-16-15-31-431895", "url": ("https://www.picta.cu/embed/"
"file": "Palmiche Galeno tercer lugar en torneo virtual de robótica-palmiche-galeno-tercer-lugar-torneo-virtual-robotica-2020-05-21-16-15-31-431895.mp4", "palmiche-galeno-tercer-lugar-torneo-virtual-robotica-2020-05-21-16-15-31-431895"),
"file": ("Palmiche Galeno tercer lugar en torneo virtual de "
"robótica-palmiche-galeno-tercer-lugar-torneo-virtual-robotica-2020-05-21-16-15-31-431895.mp4"),
"md5": "6031b7a3add2eade9c5bef7ecf5d4b02", "md5": "6031b7a3add2eade9c5bef7ecf5d4b02",
"info_dict": { "info_dict": {
"id": "palmiche-galeno-tercer-lugar-torneo-virtual-robotica-2020-05-21-16-15-31-431895", "id": "palmiche-galeno-tercer-lugar-torneo-virtual-robotica-2020-05-21-16-15-31-431895",
@ -158,18 +188,18 @@ class PictaEmbedIE(InfoExtractor):
"title": "Palmiche Galeno tercer lugar en torneo virtual de robótica", "title": "Palmiche Galeno tercer lugar en torneo virtual de robótica",
"thumbnail": r"re:^https?://.*imagen/img.*\.jpeg$", "thumbnail": r"re:^https?://.*imagen/img.*\.jpeg$",
"upload_date": "20200521", "upload_date": "20200521",
"description": "En esta emisión:\r\n" "description": ("En esta emisión:\r\n"
"Iniciará en La Habana nuevo método para medir el consumo " "Iniciará en La Habana nuevo método para medir el consumo "
"eléctrico | https://bit.ly/jtlecturacee\r\n" "eléctrico | https://bit.ly/jtlecturacee\r\n"
"GICAcovid: nueva aplicación web para los centros de " "GICAcovid: nueva aplicación web para los centros de "
"aislamiento | https://bit.ly/jtgicacovid\r\n" "aislamiento | https://bit.ly/jtgicacovid\r\n"
"Obtuvo Palmiche tercer lugar en la primera competencia " "Obtuvo Palmiche tercer lugar en la primera competencia "
"virtual de robótica | https://bit.ly/jtpalmichegaleno\r\n" "virtual de robótica | https://bit.ly/jtpalmichegaleno\r\n"
"\r\n" "\r\n"
"Síguenos en:\r\n" "Síguenos en:\r\n"
"Facebook: http://www.facebook.com/JuventudTecnicaCuba\r\n" "Facebook: http://www.facebook.com/JuventudTecnicaCuba\r\n"
"Twitter e Instagram: @juventudtecnica\r\n" "Twitter e Instagram: @juventudtecnica\r\n"
"Telegram: http://t.me/juventudtecnica", "Telegram: http://t.me/juventudtecnica"),
"uploader": "ernestoguerra21", "uploader": "ernestoguerra21",
"timestamp": 1590077731, "timestamp": 1590077731,
}, },
@ -178,3 +208,58 @@ class PictaEmbedIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
return self.url_result(url, PictaIE.ie_key()) return self.url_result(url, PictaIE.ie_key())
# noinspection PyAbstractClass
class PictaPlaylistIE(InfoExtractor):
API_PLAYLIST_ENDPOINT = API_BASE_URL + "lista_reproduccion_canal/"
IE_NAME = "picta:playlist"
IE_DESC = "Picta playlist videos"
_VALID_URL = r"https?://www\.picta\.cu/playlist/(?P<id>[0-9]+)"
_TESTS = [
{
"url": "https://www.picta.cu/playlist/4441",
"info_dict": {
"id": 4441,
"title": "D\u00eda 2: Telecomunicaciones, Redes y Ciberseguridad",
"thumbnail": r"re:^https?://.*imagen/img.*\.jpeg$",
},
},
]
@staticmethod
def _extract_playlist(playlist, playlist_id=None, require_title=True):
if len(playlist["results"]) == 0:
raise ExtractorError("Cannot find playlist!")
title = (
playlist["results"][0]["nombre"]
if require_title
else playlist.get("results")[0].get("nombre")
)
thumbnail = try_get(playlist, lambda x: x["results"][0]["url_imagen"])
entries = try_get(playlist, lambda x: x["results"][0]["publicaciones"])
return {
"id": try_get(playlist, lambda x: x["results"][0]["id"], compat_str) or playlist_id,
"title": title,
"thumbnail": thumbnail,
"entries": entries,
}
def _entries(self, playlist_id):
json_url = self.API_PLAYLIST_ENDPOINT + "?format=json&id=%s" % playlist_id
playlist = self._download_json(json_url, playlist_id, "Downloading playlist JSON")
info_playlist = self._extract_playlist(playlist, playlist_id)
playlist_entries = info_playlist.get("entries")
for video in playlist_entries:
video_id = video.get("id")
video_url = ROOT_BASE_URL + "medias/" + video.get("slug_url")
yield self.url_result(video_url, PictaIE.ie_key(), video_id)
def _real_extract(self, url):
playlist_id = self._match_id(url)
entries = self._entries(playlist_id)
return self.playlist_result(entries, playlist_id)