This commit is contained in:
Ha Tien Loi 2020-09-27 19:36:35 +00:00 committed by GitHub
commit 280c576088
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 1111 additions and 1 deletions

View File

@ -0,0 +1,137 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
try_get,
mimetype2ext,
clean_html
)
class AxiosIE(InfoExtractor):
IE_NAME = 'axios'
IE_DESC = 'www.axios.com'
_VALID_URL = r"""(?x)^
((http[s]?|fpt):)\/?\/(www\.|m\.|)
(?P<site>
(www\.axios\.com)
)\/
(?P<slug>.*?)\-
(?P<id>[0-9a-z]{8}\-[0-9a-z]{4}-[a-z0-9]{4}.+?)\.
"""
__TESTS = [
{
"url": r"https://www.axios.com/trump-coronavirus-restrictions-c3da2d28-b761-4b62-b6d6-734c059c6dba.html",
"info_dict": {
"id": "c3da2d28-b761-4b62-b6d6-734c059c6dba",
"title": '''Trump says he wants to "open" the country by Easter''',
"ext": "mp4",
"description": str,
'thumbnails': [],
}
},
{
"url": r"https://www.axios.com/coronavirus-texas-official-grandparents-die-172ca951-891c-44e7-a9ec-77c486e0c5c3.html",
"info_dict": {
"id": "172ca951-891c-44e7-a9ec-77c486e0c5c3",
"title": '''Texas Lt. Gov.: Grandparents would be willing to die to save the economy''',
"ext": "mp4",
"description": str,
'thumbnails': [],
}
},
{
"url": r"https://www.axios.com/cuomo-trump-mandatory-quarantine-panic-35ae54a1-0aa9-4a38-910d-647293002fc2.html",
"info_dict": {
"id": "35ae54a1-0aa9-4a38-910d-647293002fc2",
"title": '''Cuomo: Trump's mandatory quarantine comments "really panicked people"''',
"ext": "mp4",
"description": str,
'thumbnails': [],
}
},
{
"url": r"https://www.axios.com/coronavirus-louisiana-bel-edwards-ventilators-7810fc76-1825-41b2-8b22-f1cfc14e2ffe.html",
"info_dict": {
"id": "7810fc76-1825-41b2-8b22-f1cfc14e2ffe",
"title": '''Louisiana on track to exceed ventilator capacity this week, governor says''',
"ext": "mp4",
"description": str,
'thumbnails': [],
}
},
]
api_jwplayer = r'http://content.jwplatform.com/v2/media/%s'
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(
url_or_request=url,
video_id=video_id
)
jwplayer_mobj = re.search(
r'''<amp-jwplayer.+?data-player-id=\"(?P<player_id>.+?)\".+?data-media-id=\"(?P<media_id>.+?)\".+?\>\<\/amp-jwplayer>''',
webpage
)
description = self._search_regex(
r'''<div\s+class=\"b0w77w-0 jctzOA gtm-story-text\"\>(?P<description>.*?)\<\/div\>''',
webpage, "Description", group="description"
)
title = self._search_regex(
r'''<h1\s+class="sc-31t5q3-0 sc-1fjk95c-2 guveJc"\>(?P<title>.*?)\<\/h1\>''',
webpage, "Title", group="title"
)
description = clean_html(description)
# player_id = jwplayer_mobj.group("player_id")
media_id = jwplayer_mobj.group("media_id")
json_jwplayer = self._download_json(
url_or_request=self.api_jwplayer % media_id,
video_id=media_id,
)
playlist = try_get(json_jwplayer, lambda x: x['playlist'][0])
if playlist:
images = playlist.get('images')
thumbnails = [
{
"url": img.get('src'),
"width": img.get('width')
} for img in images if img.get('src')
]
sources = playlist.get('sources') or []
formats = []
for sour in sources:
if not sour:
continue
_type = sour.get('type')
ext = mimetype2ext(_type)
file = sour.get('file')
if ext == 'm3u8':
m3u8_doc = self._download_webpage(file, video_id=media_id)
formats.extend(self._parse_m3u8_formats(m3u8_doc, file))
elif ext == 'mp4':
formats.append({
"url": file,
"ext": ext,
"height": sour.get('height'),
"width": sour.get('width'),
'protocol': 'http',
"label": sour.get("label")
})
else:
formats.append({
"url": file,
"ext": ext,
'protocol': 'http',
"label": sour.get("label")
})
formats.sort(key=lambda x: x.get("height") if x.get("height") else -1)
return {
"id": video_id,
"title": title.strip(),
"thumbnails": thumbnails,
"formats": formats,
"description": description
}

View File

@ -1515,3 +1515,19 @@ from .zattoo import (
from .zdf import ZDFIE, ZDFChannelIE
from .zingmp3 import ZingMp3IE
from .zype import ZypeIE
from .axios import (
AxiosIE
)
from .likee import (
LikeeIE,
LikeeUserIE
)
from .zingmp3_vn import (
Zingmp3_vnIE,
Zingmp3_vnPlaylistIE,
Zingmp3_vnChartIE,
Zingmp3_vnUserIE,
)

View File

@ -0,0 +1,203 @@
# coding: utf-8
# Code by hatienl0i261299 - fb.com/100011734236090 - hatienloi261299@gmail.com
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse_urlencode
)
from ..utils import (
js_to_json,
int_or_none,
try_get
)
class LikeeIE(InfoExtractor):
_VALID_URL = r'''(?x)^((http[s]?|fpt):)\/?\/(www\.|m\.|)
(?P<site>
(likee\.com)
)\/(?P<user>@.+?)\/(video)\/(?P<id>[0-9]+)$
'''
IE_NAME = 'likee'
IE_DESC = 'likee.com'
_TESTS = [
{
"url": "https://likee.com/@Inayat95/video/6808497581927578387",
"info_dict": {
"id": "6808497581927578387",
"ext": "mp4",
"title": "@Inayat95_6808497581927578387",
"description": str,
"thumbnail": r"re:^https?:.+?.jpg",
"uploader": str,
"uploader_id": int,
"like_count": int,
"comment_count": int,
"share_count": int,
"view_count": int,
"download_count": int
}
},
{
"url": "https://likee.com/@Inayat95/video/6792552721999608595",
"info_dict": {
"id": "6792552721999608595",
"ext": "mp4",
"title": "@Inayat95_6792552721999608595",
"description": str,
"thumbnail": r"re:^https?:.+?.jpg",
"uploader": str,
"uploader_id": int,
"like_count": int,
"comment_count": int,
"share_count": int,
"view_count": int,
"download_count": int
}
},
{
"url": "https://likee.com/@435421183/video/6802046076516688592",
"info_dict": {
"id": "6802046076516688592",
"ext": "mp4",
"title": "@435421183_6802046076516688592",
"description": str,
"thumbnail": r"re:^https?:.+?.jpg",
"uploader": str,
"uploader_id": int,
"like_count": int,
"comment_count": int,
"share_count": int,
"view_count": int,
"download_count": int
}
},
]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group("id")
user = mobj.group("user")
webpage = self._download_webpage(
url_or_request=url,
video_id=video_id
)
info_video = self._regex_data(webpage, video_id)
if info_video.get("post_id") == video_id:
formats = [{
"url": info_video.get("video_url") or info_video.get("video_water_url"),
"ext": "mp4",
"height": int_or_none(info_video.get("video_height")),
"width": int_or_none(info_video.get("video_width")),
"protocol": "http",
}]
def get_count(name):
return int_or_none(info_video.get(name), default=0)
return {
"id": video_id,
"title": "%s_%s" % (user, video_id),
"description": info_video.get("msg_text") or '',
"thumbnail": info_video.get("image1") or info_video.get("image2") or info_video.get("image3"),
"like_count": get_count("like_count"),
"view_count": get_count("play_count"),
"share_count": get_count("share_count"),
"download_count": get_count("download_count"),
"comment_count": get_count("comment_count"),
"uploader": info_video.get("nick_name"),
"uploader_id": int_or_none(info_video.get("poster_uid")),
"formats": formats
}
def _regex_data(self, webpage, video_id):
info_video = self._parse_json(self._search_regex(
r'''<script>window.data\s+=\s+(\{.+?\})\;''',
webpage,
"info video",
), video_id, transform_source=js_to_json)
return info_video
class LikeeUserIE(LikeeIE):
_VALID_URL = r'''(?x)^((http[s]?|fpt):)\/?\/(www\.|m\.|)
(?P<site>
(likee\.com)
)\/(user)\/(?P<user>@.*?)(\W|$)
'''
IE_NAME = "likee:user"
_TESTS = [
{
"url": "https://likee.com/user/@Inayat95",
"info_dict": {
"id": "1357265683",
"title": "@Inayat95",
},
"playlist_mincount": 10
},
{
"url": "https://likee.com/user/@435421183/",
"info_dict": {
"id": "681435856",
"title": "@435421183",
},
"playlist_mincount": 5
},
{
"url": "https://likee.com/user/@52710468/",
"info_dict": {
"id": "1300330468",
"title": "@52710468",
},
"playlist_mincount": 10
}
]
def _real_extract(self, url):
mobj = re.search(self._VALID_URL, url)
user = mobj.group("user")
webpage = self._download_webpage(
url_or_request=url,
video_id=user
)
info_playlist = self._regex_data(webpage, user)
uid = try_get(info_playlist, lambda x: x['userinfo']['uid'])
return self.playlist_result(entries=self._entries(uid, user), playlist_id=uid, playlist_title=user)
def _entries(self, uid, user):
count = 50
lastPostId = ""
while True:
info = self._download_json(
url_or_request="https://likee.com/official_website/VideoApi/getUserVideo",
video_id=lastPostId or uid,
data=compat_urllib_parse_urlencode({
"uid": uid,
"count": count,
"lastPostId": lastPostId
}).encode()
)
if info.get("msg") != "success":
break
videoList = try_get(info, lambda x: x['data']['videoList'])
video_id = ''
for video in videoList:
if not video:
continue
video_id = video.get("postId")
yield self.url_result(
url="https://likee.com/%s/video/%s" % (user, video_id),
ie=LikeeIE.ie_key(),
video_id=video_id
)
lastPostId = video_id
if len(videoList) != count:
break

View File

@ -219,7 +219,6 @@ class LyndaIE(LyndaBaseIE):
self._sort_formats(formats)
subtitles = self.extract_subtitles(video_id)
return {
'id': video_id,
'title': title,

View File

@ -0,0 +1,755 @@
# coding: utf-8
from __future__ import unicode_literals
import datetime
import hashlib
import hmac
import re
import time
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse_urlencode,
compat_urllib_parse
)
from ..utils import (
url_or_none,
int_or_none,
try_get,
js_to_json
)
class Zingmp3_vnIE(InfoExtractor):
_VALID_URL = r'''(?x)^
((http[s]?|fpt):)\/?\/(www\.|m\.|)
(?P<site>
(zingmp3\.vn)
)\/(?P<type>bai-hat|video-clip|embed)\/(?P<slug>.*?)\/(?P<id>.*?)\W
'''
IE_NAME = 'zingmp3_vn'
IE_DESC = 'zingmp3.vn'
_TESTS = [{
'url': 'https://zingmp3.vn/bai-hat/Khoc-Cung-Em-Mr-Siro-Gray-Wind/ZWBI0DFI.html',
'info_dict': {
'id': 'ZWBI0DFI',
'ext': 'mp3',
'title': 'Khóc Cùng Em',
'thumbnail': r're:^https?://.*\.jpg$',
'description': str,
'like_count': int,
'comment_count': int,
'view_count': int,
'duration': 236,
},
}, {
'url': "https://zingmp3.vn/video-clip/Em-Gi-Oi-Jack-K-ICM/ZWAEFWIF.html",
'info_dict': {
'id': 'ZWAEFWIF',
'ext': 'mp4',
'title': "Em Gì Ơi",
'thumbnail': r're:^https?://.*\.jpg$',
'description': str,
'like_count': int,
'comment_count': int,
'view_count': int,
}
}, {
"url": 'https://zingmp3.vn/video-clip/Simple-Love-Obito-Seachains-Davis/ZWAFIZAZ.html',
'info_dict': {
'id': 'ZWAFIZAZ',
'ext': 'mp4',
'title': 'Simple Love',
'thumbnail': r're:^https?://.*\.jpg$',
'description': str,
'like_count': int,
'comment_count': int,
'view_count': int
}
}, {
"url": 'https://zingmp3.vn/bai-hat/Marry-You-Bruno-Mars/ZWZE8I76.html',
'info_dict': {
'id': 'ZWZE8I76',
'ext': 'mp3',
'title': "Marry You",
'thumbnail': r're:^https?://.*\.jpg$',
'description': str,
'like_count': int,
'comment_count': int,
'view_count': int,
}
}, {
'url': 'https://zingmp3.vn/bai-hat/Dap-An-Cua-Ban-A-Nhung/ZWB0OWIZ.html',
'info_dict': {
'id': 'ZWB0OWIZ',
'ext': 'mp3',
'title': "Đáp Án Của Bạn / 你的答案",
'thumbnail': r're:^https?://.*\.jpg$',
'description': str,
'like_count': int,
'comment_count': int,
'view_count': int,
}
}, {
'url': 'https://zingmp3.vn/embed/song/ZWB06FEA?start=false',
'info_dict': {
'id': 'ZWB06FEA',
'ext': 'mp3',
'title': "Life Is Good",
'thumbnail': r're:^https?://.*\.jpg$',
'like_count': int,
'comment_count': int,
'view_count': int,
}
}, {
# This url can not listen if don't have account vip, use --cookies FILE to login
'url': 'https://zingmp3.vn/bai-hat/The-Box-Roddy-Ricch/ZWB0ZF9Z.html',
"only_matching": True
}]
_default_host = "https://zingmp3.vn/"
def _real_extract(self, url):
mobj = re.search(self._VALID_URL, url)
video_id = mobj.group('id')
_type = mobj.group('type')
slug = mobj.group('slug')
return self.extract_info_media(_type, slug, video_id)
def extract_info_media(self, _type, slug, video_id):
formats = []
name_api = ''
if _type == 'bai-hat':
name_api = '/song/get-song-info'
elif _type == 'embed':
if slug and slug == 'song':
name_api = '/song/get-song-info'
elif _type == 'video-clip':
name_api = "/video/get-video-detail"
api = self.get_api_with_signature(name_api=name_api, video_id=video_id)
info = self._download_json(url_or_request=api, video_id=video_id)
if _type == 'video-clip' and not self._downloader.params.get("cookiefile"):
# TODO: Have api can get best quality like 1080p, 720p, default if dont have VIP just 480p is best quality.
# If requests are continuous without downtime,
# you may be blocked IP for a short period of time,
# So => time.sleep(2)
_api_video = """http://api.mp3.zing.vn/api/mobile/video/getvideoinfo?requestdata={"id":"%s"}"""
_json_video = self._download_json(url_or_request=_api_video % video_id, video_id=video_id)
info['data']['streaming']['data']['default'] = _json_video.get("source")
time.sleep(2)
if self._downloader.params.get("cookiefile"):
# TODO: if have vip account can get 320 or lossless media, default is 128
# TODO: Just support login with cookie file, use --cookies FILE.
api_download = self.get_api_with_signature(name_api='/download/get-streamings', video_id=video_id)
_json = self._download_json(url_or_request=api_download, video_id=video_id)
if _json and _json.get('msg') != 'Chỉ tài khoản VIP có thể tải bài hát này':
info['data']['streaming']['default'] = _json.get('data')
def get_lyric(data):
"""
- Lyric is description and subtitle for media, use --get-description or --all-subs to get.
:param data:
:return: str
"""
lyric = data.get('lyric') or try_get(data, lambda x: x['lyrics'][0]['content'])
if url_or_none(lyric):
lyric = self._download_webpage(url_or_request=lyric, video_id=video_id)
if lyric:
return lyric
def add_protocol(url):
if not url.startswith("http"):
return 'https:' + url
return url
def convert_thumbnail(url):
if url:
return re.sub(r'w[0-9]+', 'w1080', url)
if info.get('msg') == 'Success':
data = info.get('data')
title = data.get('title')
artists_names = data.get('artists_names')
thumbnail = convert_thumbnail(data.get('thumbnail'))
lyric = get_lyric(data)
like_count = int_or_none(data.get('like'))
comment_count = int_or_none(data.get('total_comment'))
view_count = int_or_none(data.get('listen'))
duration = int_or_none(data.get('duration'))
timestamp = int_or_none(data.get('timestamp'))
artists = [{
"id": i.get('id'),
"name": i.get('name'),
'link': compat_urllib_parse.urljoin(self._default_host, i.get('link')),
'thumbnail': convert_thumbnail(i.get('thumbnail')),
'follow_count': int_or_none(i.get('follow'))
} for i in data.get('artists', [])]
created_at = int_or_none(data.get('created_at'))
released_at = int_or_none(data.get('released_at'))
streaming = data.get('streaming')
if streaming.get('msg'):
if _type == 'video-clip':
stream_data = streaming.get('data', dict)
for protocol, stream in stream_data.items():
if protocol == 'default':
protocol = 'http'
for quality, url in stream.items():
if url and url != "ERROR":
if protocol == 'hls':
m3u8_url = add_protocol(url)
m3u8_doc = self._download_webpage(m3u8_url, video_id)
m3u8 = self._parse_m3u8_formats(m3u8_doc, m3u8_url)
for i in m3u8:
i['ext'] = "mp4"
formats.extend(m3u8)
elif protocol == 'http':
formats.append({
'url': add_protocol(url),
'ext': 'mp4',
'protocol': protocol,
'height': int_or_none(quality) or int_or_none(quality[:-1])
})
formats = sorted(formats, key=lambda x: x['height'])
else:
if streaming.get('msg') != "Success":
self.to_screen(
"Zingmp3_vn requires authentication."
"Because This media need VIP account to listen or watch."
"You may want to use --cookies FILE."
)
return
default = streaming.get('default')
for quality, url in default.items():
if url:
if quality == 'lossless':
formats.append({
'url': add_protocol(url),
'ext': 'flac',
'protocol': 'http'
})
else:
formats.append({
'url': add_protocol(url),
'ext': 'mp3',
'protocol': 'http'
})
return {
'id': video_id,
'title': title,
'artists_names': artists_names,
'artists': artists,
'thumbnail': thumbnail,
'description': lyric,
"subtitles": {
"lyric": [
{
"ext": "txt",
"data": lyric
}
],
},
'created_at': created_at,
'released_at': released_at,
'like_count': like_count,
'comment_count': comment_count,
'view_count': view_count,
'duration': duration,
'timestamp': timestamp,
'formats': formats
}
def get_api_with_signature(self, name_api, video_id='', alias='', _type='', new_release=False):
"""
- The api of this site has 1 param named sig => signature
- It uses the hash function of the variables ctime, id, and name_api.
- Sone api don't need id, just need ctime and name_api,
:param _type:
:param alias:
:param name_api:
:param video_id:
:param type:
:param new_release:
:return: api
"""
API_KEY = '38e8643fb0dc04e8d65b99994d3dafff'
SECRET_KEY = b'10a01dcf33762d3a204cb96429918ff6'
if not name_api:
return
_time = str(int(datetime.datetime.now().timestamp()))
def get_hash256(string):
return hashlib.sha256(string.encode('utf-8')).hexdigest()
def get_hmac512(string):
return hmac.new(SECRET_KEY, string.encode('utf-8'), hashlib.sha512).hexdigest()
def get_api_by_id(_id):
url = r"https://zingmp3.vn/api%s?id=%s&" % (name_api, _id)
sha256 = get_hash256(r"ctime=%sid=%s" % (_time, _id))
data = {
'ctime': _time,
'api_key': API_KEY,
'sig': get_hmac512(r"%s%s" % (name_api, sha256))
}
return url + compat_urllib_parse_urlencode(data)
def get_api_chart(_type):
url = r"https://zingmp3.vn/api%s?type=%s&" % (name_api, _type)
sha256 = get_hash256(r"ctime=%s" % _time)
data = {
'ctime': _time,
'api_key': API_KEY,
'sig': get_hmac512(r"%s%s" % (name_api, sha256))
}
return url + compat_urllib_parse_urlencode(data)
def get_api_new_release():
url = r"https://zingmp3.vn/api%s?" % name_api
sha256 = get_hash256(r"ctime=%s" % _time)
data = {
'ctime': _time,
'api_key': API_KEY,
'sig': get_hmac512(r"%s%s" % (name_api, sha256))
}
return url + compat_urllib_parse_urlencode(data)
def get_api_download(_id):
url = r"https://download.zingmp3.vn/api%s?id=%s&" % (name_api, _id)
sha256 = get_hash256(r"ctime=%sid=%s" % (_time, _id))
data = {
'ctime': _time,
'api_key': API_KEY,
'sig': get_hmac512(r"%s%s" % (name_api, sha256))
}
return url + compat_urllib_parse_urlencode(data)
def get_api_info_alias(alias):
url = r"https://zingmp3.vn/api%s?alias=%s&" % (name_api, alias)
sha256 = get_hash256(r"ctime=%s" % (_time))
data = {
'ctime': _time,
'api_key': API_KEY,
'sig': get_hmac512(r"%s%s" % (name_api, sha256))
}
return url + compat_urllib_parse_urlencode(data)
if 'download' in name_api:
return get_api_download(_id=video_id)
if alias:
return get_api_info_alias(alias)
if video_id:
return get_api_by_id(video_id)
if _type:
return get_api_chart(_type)
if new_release:
return get_api_new_release()
return
class Zingmp3_vnPlaylistIE(Zingmp3_vnIE):
IE_NAME = "zingmp3_vn:playlist"
_VALID_URL = r'''(?x)^
((http[s]?|fpt):)\/?\/(www\.|m\.|)
(?P<site>
(zingmp3\.vn)
)\/(?P<type>album|playlist|chu-de)\/(?P<slug>.*?)\/(?P<playlist_id>.*?)\W
'''
_TESTS = [
{
'url': 'https://zingmp3.vn/album/Top-100-Nhac-Hoa-Hay-NhatVarious-Artists/ZWZB96EI.html',
"info_dict": {
'id': "ZWZB96EI",
'title': 'Top 100 Nhạc Hoa Hay Nhất'
},
"playlist_count": 100
},
{
"url": "https://zingmp3.vn/album/Chi-Co-The-La-Mr-Siro-Mr-Siro/ZBWIA9DI.html",
"info_dict": {
"id": "ZBWIA9DI",
"title": 'Chỉ Có Thể Là Mr Siro'
},
"playlist_mincount": 10
},
{
'url': 'https://zingmp3.vn/playlist/Sofm-s-playlist/IWE606EA.html',
"info_dict": {
'id': "IWE606EA",
'title': "Sofm's playlist"
},
"playlist_count": 69
},
{
"url": "https://zingmp3.vn/album/Top-100-Pop-Au-My-Hay-Nhat-Various-Artists/ZWZB96AB.html",
"info_dict": {
"id": "ZWZB96AB",
"title": "Top 100 Pop Âu Mỹ Hay Nhất"
},
"playlist_count": 100,
},
{
"url": "https://zingmp3.vn/chu-de/Acoustic/IWZ977C8.html",
"info_dict": {
"id": "IWZ977C8",
"title": "Acoustic"
},
"playlist_mincount": 3
},
{
"url": "https://zingmp3.vn/chu-de/Nhac-Khong-Loi/IWZ9ZIOI.html",
"info_dict": {
"id": "IWZ9ZIOI",
"title": "Nhac-Khong-Loi"
},
"playlist_mincount": 10
}
]
name_api_album_or_playlist = '/playlist/get-playlist-detail'
name_api_topic = "/topic/get-detail"
def _real_extract(self, url):
mobj = re.search(self._VALID_URL, url)
_type = mobj.group('type')
playlist_id = mobj.group('playlist_id')
slug = mobj.group('slug')
if _type == 'chu-de':
return self.playlist_result(
entries=self._entries_for_chu_de(id_chu_de=playlist_id),
playlist_id=playlist_id,
playlist_title=slug
)
return self._extract_playlist(id_playlist=playlist_id)
def _entries_for_chu_de(self, id_chu_de):
self.IE_NAME = "zingmp3_vn:chu-de"
api = self.get_api_with_signature(name_api=self.name_api_topic, video_id=id_chu_de)
info = self._download_json(url_or_request=api, video_id=id_chu_de)
if info.get('msg') != "Success":
return
items = try_get(info, lambda x: x['data']['playlist']['items'])
for item in items:
url = compat_urllib_parse.urljoin(self._default_host, item.get('link'))
media_id = item.get('id')
if 'album' in url or 'playlist' in url:
name_api = '/playlist/get-playlist-detail'
api = self.get_api_with_signature(name_api=name_api, video_id=media_id)
info_playlist = self._download_json(url_or_request=api, video_id=media_id)
items_playlist = try_get(info_playlist, lambda x: x['data']['song']['items'])
for item_pl in items_playlist:
url = compat_urllib_parse.urljoin(self._default_host, item_pl.get('link'))
video_id = item_pl.get('id')
yield self.url_result(
url=url,
ie=Zingmp3_vnIE.ie_key(),
video_id=video_id
)
def _extract_playlist(self, id_playlist):
api = self.get_api_with_signature(name_api=self.name_api_album_or_playlist, video_id=id_playlist)
info = self._download_json(url_or_request=api, video_id=id_playlist)
title_playlist = try_get(info, lambda x: x['data']['title'])
items = try_get(info, lambda x: x['data']['song']['items'])
entries = []
for item in items:
url = compat_urllib_parse.urljoin(self._default_host, item.get('link'))
video_id = item.get('id')
entry = self.url_result(
url=url,
ie=Zingmp3_vnIE.ie_key(),
video_id=video_id
)
entries.append(entry)
return {
'_type': 'playlist',
'id': id_playlist,
'title': title_playlist,
'entries': entries,
}
class Zingmp3_vnChartIE(Zingmp3_vnIE):
IE_NAME = "zingmp3_vn:#zingchart"
_VALID_URL = r'''(?x)^
((http[s]?|fpt):)\/?\/(www\.|m\.|)
(?P<site>
(zingmp3\.vn)
)\/(?P<name>zing-chart-tuan|zing-chart|top-new-release)\/
(?P<slug_name>.*?)(\.|\/)(?P<id_name>.*?\.)?
'''
_TESTS = [
{
"url": "https://zingmp3.vn/zing-chart/bai-hat.html",
"info_dict": {
"title": "zing-chart-bai-hat",
},
"playlist_count": 100
},
{
"url": "https://zingmp3.vn/zing-chart/video.html",
"info_dict": {
"title": "zing-chart-video",
},
"playlist_count": 100
},
{
"url": "https://zingmp3.vn/zing-chart-tuan/bai-hat-US-UK/IWZ9Z0BW.html",
"info_dict": {
"title": "zing-chart-tuan-bai-hat-US-UK"
},
"playlist_mincount": 20
},
{
"url": "https://zingmp3.vn/zing-chart-tuan/video-Kpop/IWZ9Z0BZ.html",
"info_dict": {
"title": "zing-chart-tuan-video-Kpop"
},
"playlist_mincount": 20
},
{
"url": "https://zingmp3.vn/top-new-release/index.html",
"info_dict": {
"title": "top-new-release-index"
},
"playlist_count": 100
}
]
list_name_api = {
'zing-chart': {
'name': '/chart-realtime/get-detail',
'bai-hat': 'song',
'index': 'song',
'video': 'video',
},
'zing-chart-tuan': {
'name': '/chart/get-chart',
},
'top-new-release': {
'name': '/chart/get-chart-new-release'
}
}
def _real_extract(self, url):
mobj = re.search(self._VALID_URL, url)
name = mobj.group('name')
slug_name = mobj.group('slug_name')
if name == 'zing-chart':
api = self.get_api_with_signature(
name_api=self.list_name_api.get(name).get('name'),
_type=self.list_name_api.get(name).get(slug_name)
)
elif name == 'zing-chart-tuan':
api = self.get_api_with_signature(
name_api=self.list_name_api.get(name).get('name'),
video_id=mobj.group('id_name')
)
else:
api = self.get_api_with_signature(
name_api=self.list_name_api.get(name).get('name'),
new_release=True
)
count = 0
info = None
while count != 3:
webpage = self._download_webpage(url_or_request=api, video_id=name)
if webpage:
info = self._parse_json(webpage, name, transform_source=js_to_json)
break
count += 1
if info:
return self.playlist_result(
entries=self._entries(try_get(info, lambda x: x['data']['items'])),
playlist_title=r"%s-%s" % (name, slug_name)
)
def _entries(self, items):
for item in items:
url = compat_urllib_parse.urljoin(self._default_host, item.get('link'))
video_id = item.get('id')
yield self.url_result(url, ie=Zingmp3_vnIE.ie_key(), video_id=video_id)
class Zingmp3_vnUserIE(Zingmp3_vnIE):
_VALID_URL = r'''(?x)^
((http[s]?|fpt):)\/?\/(www\.|m\.|)
(?P<site>
(zingmp3\.vn)
)\/(?P<nghe_si>nghe-si\/|)(?P<name>.*?)
(?:$|\/)
(?P<slug_name>bai-hat|album|video|playlist)$
'''
IE_NAME = "zingmp3_vn:user"
_TESTS = [
{
'url': "https://zingmp3.vn/nghe-si/Huong-Giang-Idol/bai-hat",
"info_dict": {
"id": "IWZ9CUWA",
"title": "Huong-Giang-Idol-bai-hat",
},
"playlist_mincount": 5
},
{
"url": "https://zingmp3.vn/nghe-si/Huong-Giang-Idol/album",
"info_dict": {
"id": "IWZ9CUWA",
"title": "Huong-Giang-Idol-album",
},
"playlist_mincount": 5
},
{
"url": "https://zingmp3.vn/nghe-si/Huong-Giang-Idol/video",
"info_dict": {
"id": "IWZ9CUWA",
"title": "Huong-Giang-Idol-video"
},
"playlist_mincount": 5
},
{
"url": "https://zingmp3.vn/Mr-Siro/bai-hat",
"info_dict": {
"id": "IWZ98609",
"title": "Mr-Siro-bai-hat",
},
"playlist_mincount": 5
},
{
"url": "https://zingmp3.vn/Mr-Siro/playlist",
"info_dict": {
"id": "IWZ98609",
"title": "Mr-Siro-playlist",
},
"playlist_mincount": 5
},
{
"url": "https://zingmp3.vn/Mr-Siro/video",
"info_dict": {
"id": "IWZ98609",
"title": "Mr-Siro-video",
},
"playlist_mincount": 5
},
]
list_name_api_user = {
'bai-hat': "/song/get-list",
"playlist": "/playlist/get-list",
"album": "/playlist/get-list",
"video": "/video/get-list",
}
def _real_extract(self, url):
mobj = re.search(self._VALID_URL, url)
name = mobj.group('name')
slug_name = mobj.group('slug_name')
nghe_si = mobj.group('nghe_si')
if not slug_name:
slug_name = "bai-hat"
name_api = self.list_name_api_user.get(slug_name) or None
self.id_artist = None
if nghe_si:
webpage = self._download_webpage(url_or_request=r"https://mp3.zing.vn/nghe-si/%s" % name, video_id=name)
self.id_artist = self._search_regex(r'''(?x)
\<a.*?tracking=\"\_frombox=artist_artistfollow\"
\s+data-id=\"(?P<id_artist>.*?)\"
\s+data-type=\"(?P<data_type>.*?)\"
\s+data-name=\"(?P<data_name>.*?)\".*?\>
''', webpage, "artist id", group="id_artist")
else:
api = self.get_api_with_signature(name_api="/oa/get-artist-info", alias=name)
info = self._download_json(url_or_request=api, video_id=name)
if info.get('msg') == 'Success':
self.id_artist = try_get(info, lambda x: x['data']['artist_id'])
if self.id_artist:
self.api = self.get_api_with_signature(name_api=name_api, video_id=self.id_artist)
return self.playlist_result(
entries=self._entries(),
playlist_id=self.id_artist,
playlist_title=r"%s-%s" % (name, slug_name)
)
elif name == 'chu-de':
self.IE_NAME = "zingmp3_vn:chu-de"
rex = re.match(r"(?P<name_chu_de>.*)\/(?P<id_chu_de>.*?)\.", slug_name)
name_chu_de = rex.group('name_chu_de')
self.id_chu_de = rex.group('id_chu_de')
self.api = self.get_api_with_signature(name_api="/topic/get-detail", video_id=self.id_chu_de)
return self.playlist_result(
entries=self._entries_for_chu_de(),
playlist_id=self.id_chu_de,
playlist_title=name_chu_de,
)
def _entries_for_chu_de(self):
info = self._download_json(url_or_request=self.api, video_id=self.id_chu_de)
if info.get('msg') != "Success":
return
items = try_get(info, lambda x: x['data']['playlist']['items'])
for item in items:
url = compat_urllib_parse.urljoin(self._default_host, item.get('link'))
media_id = item.get('id')
if 'album' in url or 'playlist' in url:
name_api = '/playlist/get-playlist-detail'
api = self.get_api_with_signature(name_api=name_api, video_id=media_id)
info_playlist = self._download_json(url_or_request=api, video_id=media_id)
items_playlist = try_get(info_playlist, lambda x: x['data']['song']['items'])
for item_pl in items_playlist:
url = compat_urllib_parse.urljoin(self._default_host, item_pl.get('link'))
video_id = item_pl.get('id')
yield self.url_result(
url=url,
ie=Zingmp3_vnIE.ie_key(),
video_id=video_id
)
def _entries(self):
start = 0
count = 30
while True:
info = self._download_json(url_or_request=self.api, video_id=self.id_artist, query={
'type': 'artist',
'start': start,
'count': count,
'sort': 'hot'
})
if info.get('msg').lower() != "success":
break
items = try_get(info, lambda x: x['data']['items'])
for item in items:
url = compat_urllib_parse.urljoin(self._default_host, item.get('link'))
media_id = item.get('id')
if 'album' in url or 'playlist' in url:
name_api = '/playlist/get-playlist-detail'
api = self.get_api_with_signature(name_api=name_api, video_id=media_id)
info_playlist = self._download_json(url_or_request=api, video_id=media_id)
items_playlist = try_get(info_playlist, lambda x: x['data']['song']['items'])
for item_pl in items_playlist:
url = compat_urllib_parse.urljoin(self._default_host, item_pl.get('link'))
video_id = item_pl.get('id')
yield self.url_result(
url=url,
ie=Zingmp3_vnIE.ie_key(),
video_id=video_id
)
else:
yield self.url_result(
url=url,
ie=Zingmp3_vnIE.ie_key(),
video_id=media_id
)
total = int_or_none(try_get(info, lambda x: x['data']['total']))
start += count
if total <= start:
break