1
0
mirror of https://codeberg.org/polarisfm/youtube-dl synced 2024-11-22 08:34:32 +01:00

[tiktok] rewrite for new API and website

This commit is contained in:
Stephen Fluin 2020-08-08 01:03:33 -07:00
parent a4ed50bb84
commit b78835f384
2 changed files with 32 additions and 126 deletions

View File

@ -1143,10 +1143,7 @@ from .thisamericanlife import ThisAmericanLifeIE
from .thisav import ThisAVIE from .thisav import ThisAVIE
from .thisoldhouse import ThisOldHouseIE from .thisoldhouse import ThisOldHouseIE
from .threeqsdn import ThreeQSDNIE from .threeqsdn import ThreeQSDNIE
from .tiktok import ( from .tiktok import TikTokIE
TikTokIE,
TikTokUserIE,
)
from .tinypic import TinyPicIE from .tinypic import TinyPicIE
from .tmz import ( from .tmz import (
TMZIE, TMZIE,

View File

@ -1,138 +1,47 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
compat_str,
ExtractorError,
int_or_none, int_or_none,
str_or_none,
try_get,
url_or_none,
) )
class TikTokBaseIE(InfoExtractor): class TikTokIE(InfoExtractor):
def _extract_aweme(self, data): _VALID_URL = r'https?://(?:www\.)?tiktok\.com/.*/video/(?P<id>[^/?#&]+)'
video = data['video']
description = str_or_none(try_get(data, lambda x: x['desc']))
width = int_or_none(try_get(data, lambda x: video['width']))
height = int_or_none(try_get(data, lambda x: video['height']))
format_urls = set()
formats = []
for format_id in (
'play_addr_lowbr', 'play_addr', 'play_addr_h264',
'download_addr'):
for format in try_get(
video, lambda x: x[format_id]['url_list'], list) or []:
format_url = url_or_none(format)
if not format_url:
continue
if format_url in format_urls:
continue
format_urls.add(format_url)
formats.append({
'url': format_url,
'ext': 'mp4',
'height': height,
'width': width,
})
self._sort_formats(formats)
thumbnail = url_or_none(try_get(
video, lambda x: x['cover']['url_list'][0], compat_str))
uploader = try_get(data, lambda x: x['author']['nickname'], compat_str)
timestamp = int_or_none(data.get('create_time'))
comment_count = int_or_none(data.get('comment_count')) or int_or_none(
try_get(data, lambda x: x['statistics']['comment_count']))
repost_count = int_or_none(try_get(
data, lambda x: x['statistics']['share_count']))
aweme_id = data['aweme_id']
return {
'id': aweme_id,
'title': uploader or aweme_id,
'description': description,
'thumbnail': thumbnail,
'uploader': uploader,
'timestamp': timestamp,
'comment_count': comment_count,
'repost_count': repost_count,
'formats': formats,
}
class TikTokIE(TikTokBaseIE):
_VALID_URL = r'''(?x)
https?://
(?:
(?:m\.)?tiktok\.com/v|
(?:www\.)?tiktok\.com/share/video
)
/(?P<id>\d+)
'''
_TESTS = [{ _TESTS = [{
'url': 'https://m.tiktok.com/v/6606727368545406213.html', 'url': 'https://www.tiktok.com/@ben_brainard/video/6858321293117312262',
'md5': 'd584b572e92fcd48888051f238022420', # 'md5': 'd584b572e92fcd48888051f238022420',
'info_dict': { 'info_dict': {
'id': '6606727368545406213', 'id': '6858321293117312262',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Zureeal', 'title': 'ben_brainard',
'description': '#bowsette#mario#cosplay#uk#lgbt#gaming#asian#bowsettecosplay', 'description': 'Hurricane... hurricane... hurricane... I got nothing.',
'thumbnail': r're:^https?://.*~noop.image', 'uploader': 'ben_brainard',
'uploader': 'Zureeal', 'timestamp': 1596827366,
'timestamp': 1538248586, 'upload_date': '20200807',
'upload_date': '20180929', 'comment_count': int
'comment_count': int,
'repost_count': int,
} }
}, {
'url': 'https://www.tiktok.com/share/video/6606727368545406213',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage( webpage = self._download_webpage(url, video_id)
'https://m.tiktok.com/v/%s.html' % video_id, video_id) # print webpage
data = self._parse_json(self._search_regex( description = self._search_regex(r'<title[^>]*>([^<]+)</title>', webpage, 'description')
r'\bdata\s*=\s*({.+?})\s*;', webpage, 'data'), video_id) mediaUrl = self._search_regex(r'"video":{"urls":\["([^"]+)"', webpage, 'url')
return self._extract_aweme(data) # data = self._parse_html5_media_entries(url,webpage,video_id)
uploader = self._search_regex(r'"authorName":"([^"]+)"', webpage, 'uploader')
commentCount = self._search_regex(r'"commentCount":([^,]+)', webpage, 'comment_count')
viewCount = self._search_regex(r'"playCount":([^,]+)', webpage, 'view_count')
timestamp = self._search_regex(r'"createTime":"([^"]+)"', webpage, 'timestamp')
return {
class TikTokUserIE(TikTokBaseIE): 'id': video_id,
_VALID_URL = r'''(?x) 'url': mediaUrl,
https?:// 'title': uploader,
(?: 'description': description,
(?:m\.)?tiktok\.com/h5/share/usr| 'ext': 'mp4',
(?:www\.)?tiktok\.com/share/user 'uploader': uploader,
) 'comment_count': int_or_none(commentCount),
/(?P<id>\d+) 'view_count': int_or_none(viewCount),
''' 'timestamp': int_or_none(timestamp)
_TESTS = [{ }
'url': 'https://m.tiktok.com/h5/share/usr/188294915489964032.html',
'info_dict': {
'id': '188294915489964032',
},
'playlist_mincount': 24,
}, {
'url': 'https://www.tiktok.com/share/user/188294915489964032',
'only_matching': True,
}]
def _real_extract(self, url):
user_id = self._match_id(url)
data = self._download_json(
'https://m.tiktok.com/h5/share/usr/list/%s/' % user_id, user_id,
query={'_signature': '_'})
entries = []
for aweme in data['aweme_list']:
try:
entry = self._extract_aweme(aweme)
except ExtractorError:
continue
entry['extractor_key'] = TikTokIE.ie_key()
entries.append(entry)
return self.playlist_result(entries, user_id)