From 19ba4ec21833fccd3bb6f3b441a6afa24418a6ee Mon Sep 17 00:00:00 2001 From: rubyist Date: Mon, 2 Mar 2020 17:48:22 -0800 Subject: [PATCH] Remove long lines, relax reqs on optional fields, and simplify regexes. --- youtube_dl/extractor/matter.py | 41 +++++++++++++++++++++------------- 1 file changed, 25 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/matter.py b/youtube_dl/extractor/matter.py index a0e96d278..2295cafa5 100644 --- a/youtube_dl/extractor/matter.py +++ b/youtube_dl/extractor/matter.py @@ -32,31 +32,40 @@ class MatterIE(InfoExtractor): def _real_extract(self, url): track_id = self._match_id(url) - webpage = self._download_webpage( - "https://api.matter.online/api/v1/open-graph/tracks/%s/embedded" % track_id, track_id - ) - author = self._search_regex( - r'([^<]+)', - webpage, "author" - ) + # Fetch page with metadata and download URLs. + api = "https://api.matter.online/api/v1/open-graph/tracks/%s/embedded" + webpage = self._download_webpage(api % track_id, track_id) + + # Extract required fields title = self._search_regex( - r'([^<]+)', + r'tracks/\d+" target="[^"]+">([^<]+)', webpage, "title" ) download_url = self._search_regex( - r'', + r'(https://[^/]+/audios/[^\.]+\.[^"]+)"/>', webpage, "download_url" ) - artwork = self._search_regex( - r'style="background: url\((https://matter-production.s3.amazonaws.com/images/[^\.]+\.[^\)]+)\)', - webpage, "artwork" - ) - return { + extracted = { 'id': track_id, 'url': download_url, 'title': title, - 'uploader': author, - 'thumbnail': artwork, } + + # Extract optional fields + author = self._search_regex( + r'artists/[^"]+" target="[^"]+">([^<]+)', + webpage, "author", fatal=False + ) + artwork = self._search_regex( + r'(https://[^/]+/images/[^\.]+\.[^\)]+)\)', + webpage, "artwork", fatal=False + ) + + if artwork: + extracted['thumbnail'] = artwork + if author: + extracted['uploader'] = author + + return extracted