mirror of
https://codeberg.org/polarisfm/youtube-dl
synced 2024-11-22 16:44:32 +01:00
[JioSaavn] Add extractor
This commit is contained in:
parent
45f6362464
commit
6635b191fc
@ -495,6 +495,10 @@ from .jamendo import (
|
||||
JamendoAlbumIE,
|
||||
)
|
||||
from .jeuxvideo import JeuxVideoIE
|
||||
from .jiosaavn import (
|
||||
JioSaavnSongIE,
|
||||
JioSaavnAlbumIE,
|
||||
)
|
||||
from .jove import JoveIE
|
||||
from .joj import JojIE
|
||||
from .jwplatform import JWPlatformIE
|
||||
|
118
youtube_dl/extractor/jiosaavn.py
Normal file
118
youtube_dl/extractor/jiosaavn.py
Normal file
@ -0,0 +1,118 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import random
|
||||
import re
|
||||
import string
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
unescapeHTML,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class JioSaavnBaseInfoExtractor(InfoExtractor):
|
||||
def extract_initial_data_as_json(self, url, id):
|
||||
webpage = self._download_webpage(url, id)
|
||||
init_data_re = r'window.__INITIAL_DATA__\s*=\s*(?P<json>.+?);*\s*\</script>'
|
||||
|
||||
def sanitize_init_data(init_data):
|
||||
new_date_call_re = r'new Date\(.+?\)'
|
||||
return re.sub(new_date_call_re, '"%s"' % time.ctime(), unescapeHTML(init_data))
|
||||
|
||||
init_data = self._parse_json(self._search_regex(init_data_re, webpage,
|
||||
'init-json'),
|
||||
id, transform_source=sanitize_init_data)
|
||||
return init_data
|
||||
|
||||
|
||||
class JioSaavnSongIE(JioSaavnBaseInfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:www\.)?
|
||||
(?:
|
||||
jiosaavn\.com/song/[^/]+/|
|
||||
saavn.com/s/song/(?:[^/]+/){3}
|
||||
)
|
||||
(?P<id>[\w\d]+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.jiosaavn.com/song/leja-re/OQsEfQFVUXk',
|
||||
'md5': '7b1f70de088ede3a152ea34aece4df42',
|
||||
'info_dict': {
|
||||
'id': 'OQsEfQFVUXk',
|
||||
'ext': 'mp3',
|
||||
'title': 'Leja Re',
|
||||
'album': 'Leja Re',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.saavn.com/s/song/hindi/Saathiya/O-Humdum-Suniyo-Re/KAMiazoCblU',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
audio_id = self._match_id(url)
|
||||
fp = ''.join([random.choice(string.hexdigits) for _ in range(32)])
|
||||
|
||||
# This is required, else server won't return the media_url (extracted below)
|
||||
self._download_webpage('https://www.jiosaavn.com/stats.php', audio_id,
|
||||
query={
|
||||
'ev': 'site:browser:fp',
|
||||
'fp': fp,
|
||||
'_t': int(time.time()),
|
||||
'ct': '00000000',
|
||||
})
|
||||
|
||||
song_data = self.extract_initial_data_as_json(url, audio_id)['song']['song']
|
||||
|
||||
data = urlencode_postdata({'__call': 'song.generateAuthToken',
|
||||
'_format': 'json',
|
||||
'bitrate': '128',
|
||||
'url': song_data['encrypted_media_url'],
|
||||
})
|
||||
|
||||
def clean_api_json(resp):
|
||||
return self._search_regex(r'(?P<json>\{.+?})', resp, 'api-json')
|
||||
|
||||
media_url = self._download_json('https://www.jiosaavn.com/api.php',
|
||||
audio_id, data=data,
|
||||
transform_source=clean_api_json,
|
||||
)['auth_url']
|
||||
|
||||
return {
|
||||
'id': audio_id,
|
||||
'title': song_data['title']['text'],
|
||||
'formats': [{
|
||||
'url': media_url,
|
||||
'ext': 'mp3',
|
||||
'format_note': 'MPEG audio',
|
||||
'format_id': '128',
|
||||
'vcodec': 'none',
|
||||
}],
|
||||
'album': song_data.get('album', {}).get('text'),
|
||||
'thumbnail': song_data.get('image', [None])[0],
|
||||
}
|
||||
|
||||
|
||||
class JioSaavnAlbumIE(JioSaavnBaseInfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:www\.)?
|
||||
(?:(?:jio)?saavn\.com/album/[^/]+/)
|
||||
(?P<id>[\w\d]+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.jiosaavn.com/album/96/buIOjYZDrNA_',
|
||||
'info_dict': {
|
||||
'id': 'buIOjYZDrNA_',
|
||||
'title': '96',
|
||||
},
|
||||
'playlist_count': 10,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
album_id = self._match_id(url)
|
||||
album_data = self.extract_initial_data_as_json(url, album_id)['albumView']['album']
|
||||
songs = [self.url_result(song['perma_url']) for song in album_data['songs']]
|
||||
|
||||
return self.playlist_result(songs, album_id, album_data.get('title', {}).get('text'))
|
Loading…
Reference in New Issue
Block a user