From ecf5638a44dbe39284f36574b00bb6c0d8f7a3df Mon Sep 17 00:00:00 2001 From: Gouranga Das <38029649+GourangaDas@users.noreply.github.com> Date: Wed, 19 Dec 2018 11:53:22 +0530 Subject: [PATCH] gaana.py updated --- youtube_dl/extractor/gaana.py | 250 ++++++++++++++++++---------------- 1 file changed, 130 insertions(+), 120 deletions(-) diff --git a/youtube_dl/extractor/gaana.py b/youtube_dl/extractor/gaana.py index c5e15b97c..b691a53a6 100644 --- a/youtube_dl/extractor/gaana.py +++ b/youtube_dl/extractor/gaana.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals import re +import hashlib from ..aes import aes_cbc_decrypt from ..compat import ( compat_b64decode, @@ -17,158 +18,167 @@ from ..utils import ( class GaanaBaseIE(InfoExtractor): _BASE_URL = 'https://gaana.com' + _API_URL = 'https://apiv2.gaana.com/track/stream' + _COOKIE = '' def _Decrypt(self, data): - key = 'Z0AxbiEoZjEjci4wJCkmJQ==' - iv = 'YXNkIUAjIUAjQCExMjMxMg==' - + key = b'g@1n!(f1#r.0$)&%' + iv = b'asd!@#!@#@!12312' stream_url = intlist_to_bytes(aes_cbc_decrypt( bytes_to_intlist(compat_b64decode(data)), - bytes_to_intlist(compat_b64decode(key)), - bytes_to_intlist(compat_b64decode(iv)))).decode() - + bytes_to_intlist(key), + bytes_to_intlist(iv))).decode() + # unpad s = stream_url[:-ord(stream_url[len(stream_url) - 1:])] return s + def _Create_ht(self, track_id): + if not self._COOKIE: + self._COOKIE = self._get_cookies(self._BASE_URL)['PHPSESSID'].value + + mess = track_id + '|' + self._COOKIE + '|03:40:31 sec' + ht = hashlib.md5(mess.encode()).hexdigest() + self._COOKIE[3:9] + '=' + return ht + def _create_entry(self, data, video_id): raw_data = self._parse_json(data, video_id) - video_data = raw_data.get('path') title = raw_data.get('title') - if not title: - print("No title found.") - thumbnail = raw_data.get('atw', '') or raw_data.get('albumartwork', '') + thumbnail = raw_data.get('albumartwork') duration = raw_data.get('duration') + artist = raw_data.get('artist') + + def _format_artist(art): + r_sample = r'#..(\d+)#..(\w+)[^|,]*' + res = re.sub(r_sample, '', art) + return re.sub(r',', ', ', res) + + artist = _format_artist(artist) formats = [] if isinstance(video_data, dict): for value in video_data.keys(): - # need to skip auto - # this format and quaity is too dificult to handle for audio player. - # currently, it has been skipped - # in future this format also be going to available - if not value == 'auto': - content = video_data.get(value) - for k in content: - format_url = self._Decrypt(k.get('message')) + content = video_data.get(value) + for k in content: + format_url = self._Decrypt(k.get('message')) + + if value == 'auto': + format_id = 'normal' + else: format_id = value - formats.append({ - 'url': format_url, - 'format_id': format_id, - 'ext': 'mp4' - }) + info = { + 'url': format_url, + 'format_id': format_id, + 'ext': 'mp4', + 'abr': int_or_none(k.get('bitRate')), + 'format_note': 'mp4-aac' + } - artist = raw_data.get('artist') + if format_id == 'normal': + formats.insert(0, info) + else: + formats.append(info) - # Remove unwanted # character from string - if not artist: - artist = artist.replace(artist, "###", ', ') - - return { - 'id': video_id, - 'title': title, - 'description': raw_data.get('description'), - 'duration': int_or_none(duration), - 'formats': formats, - 'album': raw_data.get('albumtitle'), - 'thumbnail': thumbnail, - 'artist': artist, - 'release_date': raw_data.get('release_date'), - 'language': raw_data.get('language') - } else: - # we are here, beacause gaana.com uses cloudfont.com also - # alongwith some other sites for storage purpose. - # that will be implemented soon. - return None + track_id = raw_data.get('track_ids') + ht = self._Create_ht(track_id) + + for g in ('normal', 'medium', 'high'): + js = self._download_json(self._API_URL, title, headers={ + 'async': '1', + 'method': 'POST'}, query={ + 'ht': ht, + 'request_type': 'web', + 'track_id': track_id, + 'quality': g + }) + format_url = js.get('stream_path') + + formats.append({ + 'url': format_url, + 'format_id': g, + 'ext': 'mp4', + 'abr': int_or_none(js.get('bit_rate')), + 'format_note': 'mp4-aac' + }) + + return { + 'id': video_id, + 'title': title, + 'duration': int_or_none(duration), + 'formats': formats, + 'album': raw_data.get('albumtitle'), + 'thumbnail': thumbnail, + 'artist': artist, + 'release_date': raw_data.get('release_date'), + 'language': raw_data.get('language') + } class GaanaIE(GaanaBaseIE): IE_NAME = 'gaana' - _VALID_URL = r'https?://(?:www\.)?gaana\.com/song/(?P[^/#?]+)' + _VALID_URL = r'https?://(?:www\.)?gaana\.com/(?P(song|album|artist|playlist))/(?P[^/#?]+)' _TESTS = [{ - 'url': 'https://gaana.com/song/jeeye-to-jeeye-kaise', - 'only_matching': True, + 'url': 'https://gaana.com/song/chamma-chamma-5', + 'md5': '11f926ce191e008fb82a3200ba047626', + 'info_dict': { + 'id': 'chamma-chamma-5', + 'ext': 'mp4', + 'title': 'Chamma Chamma', + 'thumbnail': r'https://a10.gaanacdn.com/images/song/86/24725286/crop_175x175_1544754220.jpg', + 'duration': '3:16', + 'album': 'Fraud Saiyaan', + 'artist': 'Neha Kakkar, Romi, Arun, Ikka', + 'language': 'Hindi', + 'release_date': 'Dec 14, 2018' + } + }, + { + 'url': 'https://gaana.com/album/simmba', + 'info_dict': [{ + 'md5': '7f2ce83e8f6df7ed2cb50ce29d9f5d7f', + 'id': 'aankh-marey', + 'ext': 'mp4', + 'title': 'Aankh Marey', + 'thumbnail': r'https://a10.gaanacdn.com/images/song/88/24643688/crop_175x175_1544078542.jpg', + 'duration': '3:33', + 'album': 'Simmba', + 'artist': 'Neha Kakkar, Mika Singh, Kumar Sanu', + 'language': 'Hindi', + 'release_date': 'Dec 06, 2018' + }, + { + 'md5': 'cdd78c5f3749ee0b114a26dfa454c64f', + 'id': 'tere-bin-111', + 'ext': 'mp4', + 'title': 'Tere Bin', + 'thumbnail': r'https://a10.gaanacdn.com/images/song/85/24725285/crop_175x175_1544753630.jpg', + 'duration': '3:51', + 'album': 'Simmba', + 'artist': 'Rahat Fateh Ali Khan, Asees Kaur, Tanishk Bagchi', + 'language': 'Hindi', + 'release_date': 'Dec 14, 2018' + }] }] def _real_extract(self, url): - video_id = self._match_id(url) + r_match = re.match(self._VALID_URL, url) + video_id = r_match.group('id') + type_id = r_match.group('idtype') + self.IE_NAME += ':' + type_id + + self._set_cookie(self._BASE_URL, 'PHPSESSID', 'val') webpage = self._download_webpage(url, video_id) - raw_data = self._search_regex( - r'class="parentnode sourcelist_\d+"> (.*?) ', - webpage, 'raw data') - entry = self._create_entry(raw_data, video_id) - if entry: - return entry - -class GaanaAlbumIE(GaanaBaseIE): - IE_NAME = 'gaana:album' - _VALID_URL = r'https?://(?:www\.)?gaana\.com/album/(?P[^/#?]+)' - _TESTS = [{ - 'url': 'https://gaana.com/album/saajan-hindi', - 'playlist_mincount': 1, - 'only_matching': True, - }] - - def _real_extract(self, url): - playlist_id = self._match_id(url) - webpage = self._download_webpage(url, playlist_id) - # print(webpage) - matchobj = re.findall(r'class="parentnode sourcelist_\d+"> (.*?) ', webpage) + matchobj = re.findall(r'class="parentnode sourcelist_\d+">(.*?)', webpage) entries = [] - for g in matchobj: - entry = self._create_entry(g, playlist_id) - if entry: - entries.append(self._create_entry(g, playlist_id)) - - return self.playlist_result(entries, playlist_id) - - -class GaanaArtistIE(GaanaBaseIE): - IE_NAME = 'gaana:artist' - _VALID_URL = r'https?://(?:www\.)?gaana\.com/artist/(?P[^/#?]+)' - _TESTS = [{ - 'url': 'https://gaana.com/artist/kumar-sanu', - 'playlist_mincount': 1, - 'only_matching': True, - }] - - def _real_extract(self, url): - playlist_id = self._match_id(url) - webpage = self._download_webpage(url, playlist_id) - urls = self._read_entry(webpage, playlist_id) - entries = [] - - for g in urls: - video_id = g.replace('https://gaana.com/song/', '') - webpage = self._download_webpage(g, video_id) - - raw_data = self._search_regex( - r'class="parentnode sourcelist_\d+"> (.*?) ', - webpage, 'raw data') - - entry = self._create_entry(raw_data, playlist_id) - - if entry: - entries.append(entry) - - return self.playlist_result(entries, playlist_id) - - def _read_entry(self, webpage, video_id): - entries = [] - matchobj = re.findall(r'class="parentnode sourcelist_\d+"> (.*?) ', webpage) - - for g in matchobj: - raw_data = self._parse_json(g, video_id) - new_url = raw_data.get('share_url') - - if new_url: - new_url = self._BASE_URL + new_url - entries.append(new_url) - - return entries + if len(matchobj) > 1: + for g in matchobj: + entries.append(self._create_entry(g, video_id)) + return self.playlist_result(entries, video_id) + else: + return self._create_entry(matchobj[0], video_id)