From 5aa598e9633feedc1cde1967315bf87fb02fd232 Mon Sep 17 00:00:00 2001 From: Lame Lemon Date: Tue, 9 Jul 2019 11:14:48 +0300 Subject: [PATCH] [khinsider] Add new extractor --- youtube_dl/extractor/extractors.py | 4 ++ youtube_dl/extractor/khinsider.py | 92 ++++++++++++++++++++++++++++++ 2 files changed, 96 insertions(+) create mode 100644 youtube_dl/extractor/khinsider.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 02f17cf0d..c15f751a2 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -522,6 +522,10 @@ from .karrierevideos import KarriereVideosIE from .keezmovies import KeezMoviesIE from .ketnet import KetnetIE from .khanacademy import KhanAcademyIE +from .khinsider import ( + KhinsiderTrackIE, + KhinsiderAlbumIE, +) from .kickstarter import KickStarterIE from .kinopoisk import KinoPoiskIE from .keek import KeekIE diff --git a/youtube_dl/extractor/khinsider.py b/youtube_dl/extractor/khinsider.py new file mode 100644 index 000000000..851c3f178 --- /dev/null +++ b/youtube_dl/extractor/khinsider.py @@ -0,0 +1,92 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class KhinsiderBaseIE(InfoExtractor): + def _extract_track_info(self, webpage): + track_title = self._search_regex(r'Song name: (?P.+)', webpage, 'track_title', group='song_name') + album_title = self._search_regex(r'Album name: (?P.+)', webpage, 'album_title', group='album_name') + track_url = self._search_regex(r'href=\"(?P[a-zA-z\-%0-9/\.\\:]+.mp3)\"', webpage, 'url', group='link') + + return(track_title, album_title, track_url) + + +class KhinsiderTrackIE(KhinsiderBaseIE): + _VALID_URL = r'https?://(?:www\.)?downloads\.khinsider\.com/game-soundtracks/album/(?P[a-zA-Z0-9\-]+)/(?P.+)\.mp3' + _TEST = { + 'url': 'https://downloads.khinsider.com/game-soundtracks/album/fighter-s-history-arcade-gamerip/033%20%5bVoice%5d.mp3', + 'info_dict': { + 'id': '[Voice]', + 'ext': 'mp3', + 'title': '[Voice]', + 'album': "Fighter's History (Arcade) (gamerip)", + } + } + + def _real_extract(self, url): + track_title = self._search_regex(self._VALID_URL, url, 'track_title', group='track_name') + album_title = self._search_regex(self._VALID_URL, url, 'album_title', group='album_name') + webpage = self._download_webpage(url, track_title) + + track_title, album_title, track_url = self._extract_track_info(webpage) + + formats = [] + formats.append({ + 'format_id': 'mp3', + 'vcodec': 'none', + 'acodec': 'mp3', + 'url': track_url, + }) + return { + 'id': track_title + album_title, + 'url': track_url, + 'formats': formats, + 'title': track_title, + 'album': album_title + } + + +class KhinsiderAlbumIE(KhinsiderBaseIE): + _VALID_URL = r'https?://(?:www\.)?downloads\.khinsider\.com/game-soundtracks/album/(?P[a-zA-Z0-9\-]+)$' + _TEST = { + 'url': 'https://downloads.khinsider.com/game-soundtracks/album/r-racing-evolution', + 'info_dict': { + 'id': '3901', + 'title': 'R-Racing Evolution', + }, + 'playlist-count': 17 + } + + def _real_extract(self, url): + album_title = self._search_regex(self._VALID_URL, url, 'name', group='name') + webpage = self._download_webpage(url, album_title) + + album_id = self._search_regex(r'src=\"/album_views\.php\?a=(?P[0-9]+)\"', webpage, album_title, group='playlist_id') + songs = re.findall(r'\"clickable-row\">[a-zA-z\-%0-9/\.]+.mp3)\">', webpage) + + entries = [] + for song in songs: + track_title = self._search_regex(r'/game-soundtracks/album/(?P[a-zA-Z0-9\-]+)/(?P.+)\.mp3', + 'https://downloads.khinsider.com%s' % song, 'url', group='track_name') + song_webpage = self._download_webpage('https://downloads.khinsider.com%s' % song, track_title) + track_title, album_title, track_url = self._extract_track_info(song_webpage) + + formats = [] + formats.append({ + 'format_id': 'mp3', + 'vcodec': 'none', + 'acodec': 'mp3', + 'url': track_url, + }) + entries.append({ + 'id': track_title + album_title, + 'url': track_url, + 'formats': formats, + 'title': track_title, + 'album': album_title + }) + + return self.playlist_result(entries, album_id, album_title)