diff --git a/.gitignore b/.gitignore index c4870a6ba..246f74d90 100644 --- a/.gitignore +++ b/.gitignore @@ -10,13 +10,13 @@ build/ dist/ MANIFEST README.txt -youtube-dl.1 -youtube-dl.bash-completion -youtube-dl.fish -youtube_dl/extractor/lazy_extractors.py -youtube-dl -youtube-dl.exe -youtube-dl.tar.gz +picta-dl.1 +picta-dl.bash-completion +picta-dl.fish +picta_dl/extractor/lazy_extractors.py +picta-dl +picta-dl.exe +picta-dl.tar.gz .coverage cover/ updates_key.pem @@ -40,7 +40,7 @@ updates_key.pem *.swp test/local_parameters.json .tox -youtube-dl.zsh +picta-dl.zsh # IntelliJ related files .idea @@ -48,6 +48,9 @@ youtube-dl.zsh tmp/ venv/ +.env + +*.spec # VS Code related files -.vscode +.vscode \ No newline at end of file diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 7371fdf0f..8b03aef4b 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -840,7 +840,7 @@ from .picarto import ( PicartoIE, PicartoVodIE, ) -from .picta import PictaIE +from .picta import PictaIE, PictaPlaylistIE from .piksel import PikselIE from .pinkbike import PinkbikeIE from .pladform import PladformIE diff --git a/youtube_dl/extractor/picta.py b/youtube_dl/extractor/picta.py index 960aadad5..7dc4657f5 100644 --- a/youtube_dl/extractor/picta.py +++ b/youtube_dl/extractor/picta.py @@ -4,9 +4,12 @@ from ..compat import compat_str from ..utils import int_or_none, unified_timestamp, try_get, ExtractorError from .common import InfoExtractor +ROOT_BASE_URL = "https://www.picta.cu/" +API_BASE_URL = "https://api.picta.cu/api/v2/" + +# noinspection PyAbstractClass class PictaBaseIE(InfoExtractor): - API_BASE_URL = "https://api.picta.cu/api/v2/" @staticmethod def _extract_video(video, video_id=None, require_title=True): @@ -29,12 +32,16 @@ class PictaBaseIE(InfoExtractor): thumbnail = try_get(video, lambda x: x["results"][0]["url_imagen"]) manifest_url = try_get(video, lambda x: x["results"][0]["url_manifiesto"]) category = try_get( - video, lambda x: x["results"][0]["canal"]["nombre"], compat_str + video, lambda x: x["results"][0]["categoria"]["tipologia"]["nombre"], compat_str + ) + playlist_channel = ( + video["results"][0]["lista_reproduccion_canal"][0] + if len(video["results"][0]["lista_reproduccion_canal"]) > 0 + else None ) return { - "id": try_get(video, lambda x: x["results"][0]["id"], compat_str) - or video_id, + "id": try_get(video, lambda x: x["results"][0]["id"], compat_str) or video_id, "title": title, "description": description, "thumbnail": thumbnail, @@ -42,9 +49,11 @@ class PictaBaseIE(InfoExtractor): "timestamp": timestamp, "category": [category] if category else None, "manifest_url": manifest_url, + "playlist_channel": playlist_channel } +# noinspection PyAbstractClass class PictaIE(PictaBaseIE): IE_NAME = "picta" IE_DESC = "Picta videos" @@ -70,8 +79,10 @@ class PictaIE(PictaBaseIE): "params": {"format": "4"}, }, { - "url": "https://www.picta.cu/medias/palmiche-galeno-tercer-lugar-torneo-virtual-robotica-2020-05-21-16-15-31-431895", - "file": "Palmiche Galeno tercer lugar en torneo virtual de robótica-palmiche-galeno-tercer-lugar-torneo-virtual-robotica-2020-05-21-16-15-31-431895.mp4", + "url": ("https://www.picta.cu/embed/" + "palmiche-galeno-tercer-lugar-torneo-virtual-robotica-2020-05-21-16-15-31-431895"), + "file": ("Palmiche Galeno tercer lugar en torneo virtual de " + "robótica-palmiche-galeno-tercer-lugar-torneo-virtual-robotica-2020-05-21-16-15-31-431895.mp4"), "md5": "6031b7a3add2eade9c5bef7ecf5d4b02", "info_dict": { "id": "palmiche-galeno-tercer-lugar-torneo-virtual-robotica-2020-05-21-16-15-31-431895", @@ -79,36 +90,52 @@ class PictaIE(PictaBaseIE): "title": "Palmiche Galeno tercer lugar en torneo virtual de robótica", "thumbnail": r"re:^https?://.*imagen/img.*\.jpeg$", "upload_date": "20200521", - "description": "En esta emisión:\r\n" - "Iniciará en La Habana nuevo método para medir el consumo " - "eléctrico | https://bit.ly/jtlecturacee\r\n" - "GICAcovid: nueva aplicación web para los centros de " - "aislamiento | https://bit.ly/jtgicacovid\r\n" - "Obtuvo Palmiche tercer lugar en la primera competencia " - "virtual de robótica | https://bit.ly/jtpalmichegaleno\r\n" - "\r\n" - "Síguenos en:\r\n" - "Facebook: http://www.facebook.com/JuventudTecnicaCuba\r\n" - "Twitter e Instagram: @juventudtecnica\r\n" - "Telegram: http://t.me/juventudtecnica", + "description": ("En esta emisión:\r\n" + "Iniciará en La Habana nuevo método para medir el consumo " + "eléctrico | https://bit.ly/jtlecturacee\r\n" + "GICAcovid: nueva aplicación web para los centros de " + "aislamiento | https://bit.ly/jtgicacovid\r\n" + "Obtuvo Palmiche tercer lugar en la primera competencia " + "virtual de robótica | https://bit.ly/jtpalmichegaleno\r\n" + "\r\n" + "Síguenos en:\r\n" + "Facebook: http://www.facebook.com/JuventudTecnicaCuba\r\n" + "Twitter e Instagram: @juventudtecnica\r\n" + "Telegram: http://t.me/juventudtecnica"), "uploader": "ernestoguerra21", "timestamp": 1590077731, }, }, {"url": "https://www.picta.cu/embed/?v=818", "only_matching": True}, { - "url": "https://www.picta.cu/embed/palmiche-galeno-tercer-lugar-torneo-virtual-robotica-2020-05-21-16-15-31-431895", + "url": ("https://www.picta.cu/embed/" + "palmiche-galeno-tercer-lugar-torneo-virtual-robotica-2020-05-21-16-15-31-431895"), "only_matching": True, }, ] + def _real_initialize(self): + self.playlist_id = None + def _real_extract(self, url): + playlist_id = None video_id = self._match_id(url) - json_url = ( - self.API_BASE_URL + "publicacion/?format=json&slug_url_raw=%s" % video_id - ) + json_url = API_BASE_URL + "publicacion/?format=json&slug_url_raw=%s" % video_id video = self._download_json(json_url, video_id, "Downloading video JSON") info = self._extract_video(video, video_id) + if info["playlist_channel"] and self.playlist_id is None: + playlist_id = info["playlist_channel"].get("id") + self.playlist_id = playlist_id + # Download Playlist (--yes-playlist) in first place + if playlist_id and not self._downloader.params.get('noplaylist'): + self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % playlist_id) + return self.url_result( + ROOT_BASE_URL + "playlist/" + str(playlist_id), + PictaPlaylistIE.ie_key(), + playlist_id + ) + elif self._downloader.params.get('noplaylist'): + self.to_screen('Downloading just video %s because of --no-playlist' % video_id) formats = [] # MPD manifest @@ -126,6 +153,7 @@ class PictaIE(PictaBaseIE): return info +# noinspection PyAbstractClass class PictaEmbedIE(InfoExtractor): IE_NAME = "picta:embed" IE_DESC = "Picta embedded videos" @@ -149,8 +177,10 @@ class PictaEmbedIE(InfoExtractor): "params": {"format": "4"}, }, { - "url": "https://www.picta.cu/embed/palmiche-galeno-tercer-lugar-torneo-virtual-robotica-2020-05-21-16-15-31-431895", - "file": "Palmiche Galeno tercer lugar en torneo virtual de robótica-palmiche-galeno-tercer-lugar-torneo-virtual-robotica-2020-05-21-16-15-31-431895.mp4", + "url": ("https://www.picta.cu/embed/" + "palmiche-galeno-tercer-lugar-torneo-virtual-robotica-2020-05-21-16-15-31-431895"), + "file": ("Palmiche Galeno tercer lugar en torneo virtual de " + "robótica-palmiche-galeno-tercer-lugar-torneo-virtual-robotica-2020-05-21-16-15-31-431895.mp4"), "md5": "6031b7a3add2eade9c5bef7ecf5d4b02", "info_dict": { "id": "palmiche-galeno-tercer-lugar-torneo-virtual-robotica-2020-05-21-16-15-31-431895", @@ -158,18 +188,18 @@ class PictaEmbedIE(InfoExtractor): "title": "Palmiche Galeno tercer lugar en torneo virtual de robótica", "thumbnail": r"re:^https?://.*imagen/img.*\.jpeg$", "upload_date": "20200521", - "description": "En esta emisión:\r\n" - "Iniciará en La Habana nuevo método para medir el consumo " - "eléctrico | https://bit.ly/jtlecturacee\r\n" - "GICAcovid: nueva aplicación web para los centros de " - "aislamiento | https://bit.ly/jtgicacovid\r\n" - "Obtuvo Palmiche tercer lugar en la primera competencia " - "virtual de robótica | https://bit.ly/jtpalmichegaleno\r\n" - "\r\n" - "Síguenos en:\r\n" - "Facebook: http://www.facebook.com/JuventudTecnicaCuba\r\n" - "Twitter e Instagram: @juventudtecnica\r\n" - "Telegram: http://t.me/juventudtecnica", + "description": ("En esta emisión:\r\n" + "Iniciará en La Habana nuevo método para medir el consumo " + "eléctrico | https://bit.ly/jtlecturacee\r\n" + "GICAcovid: nueva aplicación web para los centros de " + "aislamiento | https://bit.ly/jtgicacovid\r\n" + "Obtuvo Palmiche tercer lugar en la primera competencia " + "virtual de robótica | https://bit.ly/jtpalmichegaleno\r\n" + "\r\n" + "Síguenos en:\r\n" + "Facebook: http://www.facebook.com/JuventudTecnicaCuba\r\n" + "Twitter e Instagram: @juventudtecnica\r\n" + "Telegram: http://t.me/juventudtecnica"), "uploader": "ernestoguerra21", "timestamp": 1590077731, }, @@ -178,3 +208,58 @@ class PictaEmbedIE(InfoExtractor): def _real_extract(self, url): return self.url_result(url, PictaIE.ie_key()) + + +# noinspection PyAbstractClass +class PictaPlaylistIE(InfoExtractor): + API_PLAYLIST_ENDPOINT = API_BASE_URL + "lista_reproduccion_canal/" + IE_NAME = "picta:playlist" + IE_DESC = "Picta playlist videos" + _VALID_URL = r"https?://www\.picta\.cu/playlist/(?P[0-9]+)" + + _TESTS = [ + { + "url": "https://www.picta.cu/playlist/4441", + "info_dict": { + "id": 4441, + "title": "D\u00eda 2: Telecomunicaciones, Redes y Ciberseguridad", + "thumbnail": r"re:^https?://.*imagen/img.*\.jpeg$", + }, + }, + ] + + @staticmethod + def _extract_playlist(playlist, playlist_id=None, require_title=True): + if len(playlist["results"]) == 0: + raise ExtractorError("Cannot find playlist!") + + title = ( + playlist["results"][0]["nombre"] + if require_title + else playlist.get("results")[0].get("nombre") + ) + thumbnail = try_get(playlist, lambda x: x["results"][0]["url_imagen"]) + entries = try_get(playlist, lambda x: x["results"][0]["publicaciones"]) + + return { + "id": try_get(playlist, lambda x: x["results"][0]["id"], compat_str) or playlist_id, + "title": title, + "thumbnail": thumbnail, + "entries": entries, + } + + def _entries(self, playlist_id): + json_url = self.API_PLAYLIST_ENDPOINT + "?format=json&id=%s" % playlist_id + playlist = self._download_json(json_url, playlist_id, "Downloading playlist JSON") + info_playlist = self._extract_playlist(playlist, playlist_id) + playlist_entries = info_playlist.get("entries") + + for video in playlist_entries: + video_id = video.get("id") + video_url = ROOT_BASE_URL + "medias/" + video.get("slug_url") + yield self.url_result(video_url, PictaIE.ie_key(), video_id) + + def _real_extract(self, url): + playlist_id = self._match_id(url) + entries = self._entries(playlist_id) + return self.playlist_result(entries, playlist_id)