[soundgasm] add internet archive support

2024-11-23 08:54:33 +01:00 · 2018-12-19 00:04:03 +01:00 · 2018-12-19 00:04:03 +01:00 · 7642b142b4
commit 7642b142b4
parent 65e29cdac3
1 changed files with 43 additions and 14 deletions
--- a/youtube_dl/extractor/soundgasm.py
+++ b/youtube_dl/extractor/soundgasm.py
@ -8,18 +8,33 @@ from .common import InfoExtractor
 class SoundgasmIE(InfoExtractor):
    IE_NAME = 'soundgasm'
-    _VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P<user>[0-9a-zA-Z_-]+)/(?P<display_id>[0-9a-zA-Z_-]+)'
+    _VALID_URL = r'(?P<archive>https?://web\.archive\.org/web/\d+/)?' + \
-    _TEST = {
+                 r'https?://(?:www\.)?soundgasm\.net(?::80)?/u/' + \
-        'url': 'http://soundgasm.net/u/ytdl/Piano-sample',
+                 r'(?P<user>[0-9a-zA-Z_-]+)/(?P<display_id>[0-9a-zA-Z_-]+)'
-        'md5': '010082a2c802c5275bb00030743e75ad',
+    _TESTS = [{
-        'info_dict': {
+            'url': 'http://soundgasm.net/u/ytdl/Piano-sample',
-            'id': '88abd86ea000cafe98f96321b23cc1206cbcbcc9',
+            'md5': '010082a2c802c5275bb00030743e75ad',
-            'ext': 'm4a',
+            'info_dict': {
-            'title': 'Piano sample',
+                'id': '88abd86ea000cafe98f96321b23cc1206cbcbcc9',
-            'description': 'Royalty Free Sample Music',
+                'ext': 'm4a',
-            'uploader': 'ytdl',
+                'title': 'Piano sample',
                'description': 'Royalty Free Sample Music',
                'uploader': 'ytdl',
            }
        },
        {
            'url': 'http://web.archive.org/web/20181218221507/' +
                   'https://soundgasm.net/u/ytdl/Piano-sample',
            'md5': '010082a2c802c5275bb00030743e75ad',
            'info_dict': {
                'id': '88abd86ea000cafe98f96321b23cc1206cbcbcc9',
                'ext': 'm4a',
                'title': 'Piano sample',
                'description': 'Royalty Free Sample Music',
                'uploader': 'ytdl',
            }
        }
-    }
+    ]
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
@ -31,6 +46,9 @@ class SoundgasmIE(InfoExtractor):
            r'(?s)m4a\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
            'audio URL', group='url')
        if mobj.group('archive'):
            audio_url = audio_url[:41] + "if_" + audio_url[41:]
        title = self._search_regex(
            r'<div[^>]+\bclass=["\']jp-title[^>]+>([^<]+)',
            webpage, 'title', default=display_id)
@ -56,14 +74,25 @@ class SoundgasmIE(InfoExtractor):
 class SoundgasmProfileIE(InfoExtractor):
    IE_NAME = 'soundgasm:profile'
-    _VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P<id>[^/]+)/?(?:\#.*)?$'
+    _VALID_URL = r'(?P<archive>https?://web\.archive\.org/web/\d+/)?' + \
-    _TEST = {
+                 r'https?://(?:www\.)?soundgasm\.net/u/' + \
                 r'(?P<id>[^/]+)/?(?:\#.*)?$'
    _TESTS = [{
        'url': 'http://soundgasm.net/u/ytdl',
        'info_dict': {
            'id': 'ytdl',
        },
        'playlist_count': 1,
-    }
+        },
        {
            'url': 'http://web.archive.org/web/20181218222843/' +
                   'https://soundgasm.net/u/ytdl',
            'info_dict': {
                'id': 'ytdl',
            },
            'playlist_count': 1,
        }
    ]
    def _real_extract(self, url):
        profile_id = self._match_id(url)