mirror of
https://codeberg.org/polarisfm/youtube-dl
synced 2025-02-17 01:17:54 +01:00
Reduce amount of variables
This commit is contained in:
parent
fdaae7c2a4
commit
4b4839cba6
@ -28,48 +28,36 @@ class BoundHubIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
webpage = self._download_webpage(url, self._match_id(url))
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
|
|
||||||
# Parse duration
|
# Parse duration
|
||||||
duration_text = self._search_regex(r'<span>\s*Duration:\s*<em>([\w ]*)</em>', webpage, 'duration_text', fatal=False)
|
duration_text = self._search_regex(r'<span>\s*Duration:\s*<em>([\w ]*)</em>', webpage, 'duration_text', fatal=False)
|
||||||
minutes = self._html_search_regex(r'(\d*)min', duration_text, 'minutes', fatal=False)
|
minutes = self._html_search_regex(r'(\d*)min', duration_text, 'minutes', fatal=False)
|
||||||
seconds = self._html_search_regex(r'(\d*)sec', duration_text, 'seconds', fatal=False)
|
seconds = self._html_search_regex(r'(\d*)sec', duration_text, 'seconds', fatal=False)
|
||||||
duration = (int(minutes) * 60) + int(seconds)
|
|
||||||
|
|
||||||
# Parse views
|
# Get uploader url
|
||||||
views_text = self._search_regex(r'<span>\s*Views:\s*<em>([\w ]*)</em>', webpage, 'views_text', fatal=False)
|
|
||||||
views = int_or_none(views_text.replace(' ', ''))
|
|
||||||
|
|
||||||
# Get uploader url and id
|
|
||||||
uploader_url = self._search_regex(r'<div\s*class=[\"\']username[\"\']>\s*<a href=[\"\']([^\"\']*)[\"\']', webpage, 'uploader_url', fatal=False)
|
uploader_url = self._search_regex(r'<div\s*class=[\"\']username[\"\']>\s*<a href=[\"\']([^\"\']*)[\"\']', webpage, 'uploader_url', fatal=False)
|
||||||
uploader_id = self._html_search_regex(r'https?://(?:www\.)?boundhub\.com/members/(\d+)', uploader_url, 'uploader_id', fatal=False)
|
|
||||||
uploader_id = int_or_none(uploader_id)
|
|
||||||
|
|
||||||
# Get screenshots
|
# Get screenshots
|
||||||
html_screenshots = self._search_regex(r'<div\s*class=[\"\']block-screenshots[\"\']>([\s\S]+?)</div>', webpage, 'html_screenshots', fatal=False)
|
|
||||||
regex_screenshots = r'<a href=[\"\']([^\"\']*)[\"\']'
|
|
||||||
|
|
||||||
thumbnails = list()
|
thumbnails = list()
|
||||||
|
for match in re.findall(r'<a href=[\"\']([^\"\']*)[\"\']', self._search_regex(r'<div\s*class=[\"\']block-screenshots[\"\']>([\s\S]+?)</div>', webpage, 'html_screenshots', fatal=False)):
|
||||||
for match in re.findall(regex_screenshots, html_screenshots):
|
|
||||||
img = dict()
|
img = dict()
|
||||||
img['url'] = match.rstrip('/')
|
img['url'] = match.rstrip('/')
|
||||||
img['id'] = int_or_none(os.path.splitext(os.path.basename(img['url']))[0])
|
img['id'] = int_or_none(os.path.splitext(os.path.basename(img['url']))[0])
|
||||||
thumbnails.append(img)
|
thumbnails.append(img)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': self._match_id(url),
|
||||||
'title': self._search_regex(r'<div\s*class=[\"\']headline[\"\']>\s*<h2>(.*)</h2>', webpage, 'title', default=None) or self._og_search_title(webpage),
|
'title': self._search_regex(r'<div\s*class=[\"\']headline[\"\']>\s*<h2>(.*)</h2>', webpage, 'title', default=None) or self._og_search_title(webpage),
|
||||||
'url': self._search_regex(r'video_url: [\"\']([^\"\']*)[\"\']', webpage, 'url'),
|
'url': self._search_regex(r'video_url: [\"\']([^\"\']*)[\"\']', webpage, 'url'),
|
||||||
'description': self._search_regex(r'<div\s*class=[\"\']item[\"\']>\s*Description:\s*<em>(.*)<\/em>', webpage, 'description', fatal=False),
|
'description': self._search_regex(r'<div\s*class=[\"\']item[\"\']>\s*Description:\s*<em>(.*)<\/em>', webpage, 'description', fatal=False),
|
||||||
'display_id': self._html_search_regex(r'https?://(?:www\.)?boundhub\.com/videos/[0-9]+/([\w-]*)', url, 'display_id', fatal=False),
|
'display_id': self._html_search_regex(r'https?://(?:www\.)?boundhub\.com/videos/[0-9]+/([\w-]*)', url, 'display_id', fatal=False),
|
||||||
'duration': duration,
|
'duration': (int(minutes) * 60) + int(seconds),
|
||||||
'ext': self._html_search_regex(r'postfix:\s*[\"\']\.([^\"\']*)[\"\']', webpage, 'ext', fatal=False),
|
'ext': self._html_search_regex(r'postfix:\s*[\"\']\.([^\"\']*)[\"\']', webpage, 'ext', fatal=False),
|
||||||
'thumbnail': self._html_search_regex(r'preview_url:\s*[\"\']([^\"\']*)[\"\']', webpage, 'thumbnail', fatal=False),
|
'thumbnail': self._html_search_regex(r'preview_url:\s*[\"\']([^\"\']*)[\"\']', webpage, 'thumbnail', fatal=False),
|
||||||
'thumbnails': thumbnails,
|
'thumbnails': thumbnails,
|
||||||
'uploader': self._search_regex(r'<div\s*class=[\"\']username[\"\']>\s*<a.*>\s*(.*)\s*</a>', webpage, 'uploader', fatal=False),
|
'uploader': self._search_regex(r'<div\s*class=[\"\']username[\"\']>\s*<a.*>\s*(.*)\s*</a>', webpage, 'uploader', fatal=False),
|
||||||
'uploader_id': uploader_id,
|
'uploader_id': int_or_none(self._html_search_regex(r'https?://(?:www\.)?boundhub\.com/members/(\d+)', uploader_url, 'uploader_id', fatal=False)),
|
||||||
'uploader_url': uploader_url,
|
'uploader_url': uploader_url,
|
||||||
'views': views,
|
'views': int_or_none(self._search_regex(r'<span>\s*Views:\s*<em>([\w ]*)</em>', webpage, 'views_text', fatal=False).replace(' ', '')),
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user