From 7642b142b406bbc630bfaa38a6e34e10496360f5 Mon Sep 17 00:00:00 2001 From: ZerataX Date: Wed, 19 Dec 2018 00:04:03 +0100 Subject: [PATCH] [soundgasm] add internet archive support --- youtube_dl/extractor/soundgasm.py | 57 +++++++++++++++++++++++-------- 1 file changed, 43 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/soundgasm.py b/youtube_dl/extractor/soundgasm.py index 3d78a9d76..e83e2c0f0 100644 --- a/youtube_dl/extractor/soundgasm.py +++ b/youtube_dl/extractor/soundgasm.py @@ -8,18 +8,33 @@ from .common import InfoExtractor class SoundgasmIE(InfoExtractor): IE_NAME = 'soundgasm' - _VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P[0-9a-zA-Z_-]+)/(?P[0-9a-zA-Z_-]+)' - _TEST = { - 'url': 'http://soundgasm.net/u/ytdl/Piano-sample', - 'md5': '010082a2c802c5275bb00030743e75ad', - 'info_dict': { - 'id': '88abd86ea000cafe98f96321b23cc1206cbcbcc9', - 'ext': 'm4a', - 'title': 'Piano sample', - 'description': 'Royalty Free Sample Music', - 'uploader': 'ytdl', + _VALID_URL = r'(?Phttps?://web\.archive\.org/web/\d+/)?' + \ + r'https?://(?:www\.)?soundgasm\.net(?::80)?/u/' + \ + r'(?P[0-9a-zA-Z_-]+)/(?P[0-9a-zA-Z_-]+)' + _TESTS = [{ + 'url': 'http://soundgasm.net/u/ytdl/Piano-sample', + 'md5': '010082a2c802c5275bb00030743e75ad', + 'info_dict': { + 'id': '88abd86ea000cafe98f96321b23cc1206cbcbcc9', + 'ext': 'm4a', + 'title': 'Piano sample', + 'description': 'Royalty Free Sample Music', + 'uploader': 'ytdl', + } + }, + { + 'url': 'http://web.archive.org/web/20181218221507/' + + 'https://soundgasm.net/u/ytdl/Piano-sample', + 'md5': '010082a2c802c5275bb00030743e75ad', + 'info_dict': { + 'id': '88abd86ea000cafe98f96321b23cc1206cbcbcc9', + 'ext': 'm4a', + 'title': 'Piano sample', + 'description': 'Royalty Free Sample Music', + 'uploader': 'ytdl', + } } - } + ] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -31,6 +46,9 @@ class SoundgasmIE(InfoExtractor): r'(?s)m4a\s*:\s*(["\'])(?P(?:(?!\1).)+)\1', webpage, 'audio URL', group='url') + if mobj.group('archive'): + audio_url = audio_url[:41] + "if_" + audio_url[41:] + title = self._search_regex( r']+\bclass=["\']jp-title[^>]+>([^<]+)', webpage, 'title', default=display_id) @@ -56,14 +74,25 @@ class SoundgasmIE(InfoExtractor): class SoundgasmProfileIE(InfoExtractor): IE_NAME = 'soundgasm:profile' - _VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P[^/]+)/?(?:\#.*)?$' - _TEST = { + _VALID_URL = r'(?Phttps?://web\.archive\.org/web/\d+/)?' + \ + r'https?://(?:www\.)?soundgasm\.net/u/' + \ + r'(?P[^/]+)/?(?:\#.*)?$' + _TESTS = [{ 'url': 'http://soundgasm.net/u/ytdl', 'info_dict': { 'id': 'ytdl', }, 'playlist_count': 1, - } + }, + { + 'url': 'http://web.archive.org/web/20181218222843/' + + 'https://soundgasm.net/u/ytdl', + 'info_dict': { + 'id': 'ytdl', + }, + 'playlist_count': 1, + } + ] def _real_extract(self, url): profile_id = self._match_id(url)