1
0
mirror of https://codeberg.org/polarisfm/youtube-dl synced 2025-02-16 17:07:54 +01:00

Remove long lines, relax reqs on optional fields, and simplify regexes.

This commit is contained in:
rubyist 2020-03-02 17:48:22 -08:00
parent c1020cf113
commit 19ba4ec218

View File

@ -32,31 +32,40 @@ class MatterIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
track_id = self._match_id(url) track_id = self._match_id(url)
webpage = self._download_webpage(
"https://api.matter.online/api/v1/open-graph/tracks/%s/embedded" % track_id, track_id
)
author = self._search_regex( # Fetch page with metadata and download URLs.
r'<a href="https://app.matter.online/artists/[^"]+" target="[^"]+">([^<]+)</a>', api = "https://api.matter.online/api/v1/open-graph/tracks/%s/embedded"
webpage, "author" webpage = self._download_webpage(api % track_id, track_id)
)
# Extract required fields
title = self._search_regex( title = self._search_regex(
r'<a href="https://app.matter.online/tracks/\d+" target="[^"]+">([^<]+)</a>', r'tracks/\d+" target="[^"]+">([^<]+)</a>',
webpage, "title" webpage, "title"
) )
download_url = self._search_regex( download_url = self._search_regex(
r'<source src="(https://matter-production.s3.amazonaws.com/audios/[^\.]+\.[^"]+)"/>', r'(https://[^/]+/audios/[^\.]+\.[^"]+)"/>',
webpage, "download_url" webpage, "download_url"
) )
artwork = self._search_regex(
r'style="background: url\((https://matter-production.s3.amazonaws.com/images/[^\.]+\.[^\)]+)\)',
webpage, "artwork"
)
return { extracted = {
'id': track_id, 'id': track_id,
'url': download_url, 'url': download_url,
'title': title, 'title': title,
'uploader': author,
'thumbnail': artwork,
} }
# Extract optional fields
author = self._search_regex(
r'artists/[^"]+" target="[^"]+">([^<]+)</a>',
webpage, "author", fatal=False
)
artwork = self._search_regex(
r'(https://[^/]+/images/[^\.]+\.[^\)]+)\)',
webpage, "artwork", fatal=False
)
if artwork:
extracted['thumbnail'] = artwork
if author:
extracted['uploader'] = author
return extracted