diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index f14b407dc..275d44ad7 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -4,86 +4,80 @@ import random import re import time +from ..compat import compat_str, compat_urlparse +from ..utils import (KNOWN_EXTENSIONS, ExtractorError, float_or_none, + int_or_none, parse_filesize, str_or_none, try_get, + unescapeHTML, unified_strdate, unified_timestamp, + update_url_query, url_or_none) from .common import InfoExtractor -from ..compat import ( - compat_str, - compat_urlparse, -) -from ..utils import ( - ExtractorError, - float_or_none, - int_or_none, - KNOWN_EXTENSIONS, - parse_filesize, - str_or_none, - try_get, - unescapeHTML, - update_url_query, - unified_strdate, - unified_timestamp, - url_or_none, -) class BandcampIE(InfoExtractor): - _VALID_URL = r'https?://[^/]+\.bandcamp\.com/track/(?P[^/?#&]+)' - _TESTS = [{ - 'url': 'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song', - 'md5': 'c557841d5e50261777a6585648adf439', - 'info_dict': { - 'id': '1812978515', - 'ext': 'mp3', - 'title': "youtube-dl \"'/\\\u00e4\u21ad - youtube-dl test song \"'/\\\u00e4\u21ad", - 'duration': 9.8485, + _VALID_URL = r"https?://[^/]+\.bandcamp\.com/track/(?P<title>[^/?#&]+)" + _TESTS = [ + { + "url": "http://youtube-dl.bandcamp.com/track/youtube-dl-test-song", + "md5": "c557841d5e50261777a6585648adf439", + "info_dict": { + "id": "1812978515", + "ext": "mp3", + "title": ( + "youtube-dl \"'/\\\u00e4\u21ad - youtube-dl test song" + " \"'/\\\u00e4\u21ad" + ), + "duration": 9.8485, + }, + "_skip": "There is a limit of 200 free downloads / month for the test song", }, - '_skip': 'There is a limit of 200 free downloads / month for the test song' - }, { - # free download - 'url': 'http://benprunty.bandcamp.com/track/lanius-battle', - 'md5': '853e35bf34aa1d6fe2615ae612564b36', - 'info_dict': { - 'id': '2650410135', - 'ext': 'aiff', - 'title': 'Ben Prunty - Lanius (Battle)', - 'thumbnail': r're:^https?://.*\.jpg$', - 'uploader': 'Ben Prunty', - 'timestamp': 1396508491, - 'upload_date': '20140403', - 'release_date': '20140403', - 'duration': 260.877, - 'track': 'Lanius (Battle)', - 'track_number': 1, - 'track_id': '2650410135', - 'artist': 'Ben Prunty', - 'album': 'FTL: Advanced Edition Soundtrack', + { + # free download + "url": "http://benprunty.bandcamp.com/track/lanius-battle", + "md5": "853e35bf34aa1d6fe2615ae612564b36", + "info_dict": { + "id": "2650410135", + "ext": "aiff", + "title": "Ben Prunty - Lanius (Battle)", + "thumbnail": r"re:^https?://.*\.jpg$", + "uploader": "Ben Prunty", + "timestamp": 1396508491, + "upload_date": "20140403", + "release_date": "20140403", + "duration": 260.877, + "track": "Lanius (Battle)", + "track_number": 1, + "track_id": "2650410135", + "artist": "Ben Prunty", + "album": "FTL: Advanced Edition Soundtrack", + }, }, - }, { - # no free download, mp3 128 - 'url': 'https://relapsealumni.bandcamp.com/track/hail-to-fire', - 'md5': 'fec12ff55e804bb7f7ebeb77a800c8b7', - 'info_dict': { - 'id': '2584466013', - 'ext': 'mp3', - 'title': 'Mastodon - Hail to Fire', - 'thumbnail': r're:^https?://.*\.jpg$', - 'uploader': 'Mastodon', - 'timestamp': 1322005399, - 'upload_date': '20111122', - 'release_date': '20040207', - 'duration': 120.79, - 'track': 'Hail to Fire', - 'track_number': 5, - 'track_id': '2584466013', - 'artist': 'Mastodon', - 'album': 'Call of the Mastodon', + { + # no free download, mp3 128 + "url": "https://relapsealumni.bandcamp.com/track/hail-to-fire", + "md5": "fec12ff55e804bb7f7ebeb77a800c8b7", + "info_dict": { + "id": "2584466013", + "ext": "mp3", + "title": "Mastodon - Hail to Fire", + "thumbnail": r"re:^https?://.*\.jpg$", + "uploader": "Mastodon", + "timestamp": 1322005399, + "upload_date": "20111122", + "release_date": "20040207", + "duration": 120.79, + "track": "Hail to Fire", + "track_number": 5, + "track_id": "2584466013", + "artist": "Mastodon", + "album": "Call of the Mastodon", + }, }, - }] + ] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - title = mobj.group('title') + title = mobj.group("title") webpage = self._download_webpage(url, title) - thumbnail = self._html_search_meta('og:image', webpage, default=None) + thumbnail = self._html_search_meta("og:image", webpage, default=None) track_id = None track = None @@ -91,218 +85,250 @@ class BandcampIE(InfoExtractor): duration = None formats = [] - track_info = self._parse_json( - self._search_regex( - r'trackinfo\s*:\s*\[\s*({.+?})\s*\]\s*,\s*?\n', - webpage, 'track info', default='{}'), title) + trackinfo_block = self._search_regex( + r"trackinfo":\[\s*({.+?})\s*\],"", + webpage, + "track info", + default="{}", + ) + quoted_json = trackinfo_block.replace(""", '"') + track_info = self._parse_json(quoted_json, title) if track_info: - file_ = track_info.get('file') + file_ = track_info.get("file") if isinstance(file_, dict): for format_id, format_url in file_.items(): if not url_or_none(format_url): continue - ext, abr_str = format_id.split('-', 1) - formats.append({ - 'format_id': format_id, - 'url': self._proto_relative_url(format_url, 'http:'), - 'ext': ext, - 'vcodec': 'none', - 'acodec': ext, - 'abr': int_or_none(abr_str), - }) - track = track_info.get('title') - track_id = str_or_none(track_info.get('track_id') or track_info.get('id')) - track_number = int_or_none(track_info.get('track_num')) - duration = float_or_none(track_info.get('duration')) + ext, abr_str = format_id.split("-", 1) + formats.append( + { + "format_id": format_id, + "url": self._proto_relative_url(format_url, "http:"), + "ext": ext, + "vcodec": "none", + "acodec": ext, + "abr": int_or_none(abr_str), + } + ) + track = track_info.get("title") + track_id = str_or_none(track_info.get("track_id") or track_info.get("id")) + track_number = int_or_none(track_info.get("track_num")) + duration = float_or_none(track_info.get("duration")) + # r'\b%s\s*["\']?\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1' % key, def extract(key): return self._search_regex( - r'\b%s\s*["\']?\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1' % key, - webpage, key, default=None, group='value') + r","%s":(")(?P<value>(?:(?!").)+)"" % key, + webpage, + key, + default=None, + group="value", + ) - artist = extract('artist') - album = extract('album_title') + artist = extract("artist") + + album = extract("album_title") timestamp = unified_timestamp( - extract('publish_date') or extract('album_publish_date')) - release_date = unified_strdate(extract('album_release_date')) + extract("publish_date") or extract("album_publish_date") + ) + release_date = unified_strdate(extract("album_release_date")) download_link = self._search_regex( - r'freeDownloadPage\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, - 'download link', default=None, group='url') + r'freeDownloadPage\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', + webpage, + "download link", + default=None, + group="url", + ) if download_link: track_id = self._search_regex( - r'(?ms)var TralbumData = .*?[{,]\s*id: (?P<id>\d+),?$', - webpage, 'track id') + r"(?ms)var TralbumData = .*?[{,]\s*id: (?P<id>\d+),?$", + webpage, + "track id", + ) download_webpage = self._download_webpage( - download_link, track_id, 'Downloading free downloads page') + download_link, track_id, "Downloading free downloads page" + ) blob = self._parse_json( self._search_regex( - r'data-blob=(["\'])(?P<blob>{.+?})\1', download_webpage, - 'blob', group='blob'), - track_id, transform_source=unescapeHTML) + r'data-blob=(["\'])(?P<blob>{.+?})\1', + download_webpage, + "blob", + group="blob", + ), + track_id, + transform_source=unescapeHTML, + ) info = try_get( - blob, (lambda x: x['digital_items'][0], - lambda x: x['download_items'][0]), dict) + blob, + (lambda x: x["digital_items"][0], lambda x: x["download_items"][0]), + dict, + ) if info: - downloads = info.get('downloads') + downloads = info.get("downloads") if isinstance(downloads, dict): if not track: - track = info.get('title') + track = info.get("title") if not artist: - artist = info.get('artist') + artist = info.get("artist") if not thumbnail: - thumbnail = info.get('thumb_url') + thumbnail = info.get("thumb_url") download_formats = {} - download_formats_list = blob.get('download_formats') + download_formats_list = blob.get("download_formats") if isinstance(download_formats_list, list): - for f in blob['download_formats']: - name, ext = f.get('name'), f.get('file_extension') + for f in blob["download_formats"]: + name, ext = f.get("name"), f.get("file_extension") if all(isinstance(x, compat_str) for x in (name, ext)): - download_formats[name] = ext.strip('.') + download_formats[name] = ext.strip(".") for format_id, f in downloads.items(): - format_url = f.get('url') + format_url = f.get("url") if not format_url: continue # Stat URL generation algorithm is reverse engineered from # download_*_bundle_*.js stat_url = update_url_query( - format_url.replace('/download/', '/statdownload/'), { - '.rand': int(time.time() * 1000 * random.random()), - }) - format_id = f.get('encoding_name') or format_id + format_url.replace("/download/", "/statdownload/"), + {".rand": int(time.time() * 1000 * random.random())}, + ) + format_id = f.get("encoding_name") or format_id stat = self._download_json( - stat_url, track_id, 'Downloading %s JSON' % format_id, - transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1], - fatal=False) + stat_url, + track_id, + "Downloading %s JSON" % format_id, + transform_source=lambda s: s[ + s.index("{") : s.rindex("}") + 1 + ], + fatal=False, + ) if not stat: continue - retry_url = url_or_none(stat.get('retry_url')) + retry_url = url_or_none(stat.get("retry_url")) if not retry_url: continue - formats.append({ - 'url': self._proto_relative_url(retry_url, 'http:'), - 'ext': download_formats.get(format_id), - 'format_id': format_id, - 'format_note': f.get('description'), - 'filesize': parse_filesize(f.get('size_mb')), - 'vcodec': 'none', - }) + formats.append( + { + "url": self._proto_relative_url(retry_url, "http:"), + "ext": download_formats.get(format_id), + "format_id": format_id, + "format_note": f.get("description"), + "filesize": parse_filesize(f.get("size_mb")), + "vcodec": "none", + } + ) self._sort_formats(formats) - title = '%s - %s' % (artist, track) if artist else track + title = "%s - %s" % (artist, track) if artist else track if not duration: - duration = float_or_none(self._html_search_meta( - 'duration', webpage, default=None)) + duration = float_or_none( + self._html_search_meta("duration", webpage, default=None) + ) return { - 'id': track_id, - 'title': title, - 'thumbnail': thumbnail, - 'uploader': artist, - 'timestamp': timestamp, - 'release_date': release_date, - 'duration': duration, - 'track': track, - 'track_number': track_number, - 'track_id': track_id, - 'artist': artist, - 'album': album, - 'formats': formats, + "id": track_id, + "title": title, + "thumbnail": thumbnail, + "uploader": artist, + "timestamp": timestamp, + "release_date": release_date, + "duration": duration, + "track": track, + "track_number": track_number, + "track_id": track_id, + "artist": artist, + "album": album, + "formats": formats, } class BandcampAlbumIE(InfoExtractor): - IE_NAME = 'Bandcamp:album' - _VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<album_id>[^/?#&]+))?' + IE_NAME = "Bandcamp:album" + _VALID_URL = r"https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com/album/(?P<album_id>[^/?#&]+)" - _TESTS = [{ - 'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1', - 'playlist': [ - { - 'md5': '39bc1eded3476e927c724321ddf116cf', - 'info_dict': { - 'id': '1353101989', - 'ext': 'mp3', - 'title': 'Intro', - } + _TESTS = [ + { + "url": "http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1", + "playlist": [ + { + "md5": "39bc1eded3476e927c724321ddf116cf", + "info_dict": {"id": "1353101989", "ext": "mp3", "title": "Intro",}, + }, + { + "md5": "1a2c32e2691474643e912cc6cd4bffaa", + "info_dict": { + "id": "38097443", + "ext": "mp3", + "title": "Kero One - Keep It Alive (Blazo remix)", + }, + }, + ], + "info_dict": { + "title": "Jazz Format Mixtape vol.1", + "id": "jazz-format-mixtape-vol-1", + "uploader_id": "blazo", }, - { - 'md5': '1a2c32e2691474643e912cc6cd4bffaa', - 'info_dict': { - 'id': '38097443', - 'ext': 'mp3', - 'title': 'Kero One - Keep It Alive (Blazo remix)', - } + "params": {"playlistend": 2}, + "skip": "Bandcamp imposes download limits.", + }, + { + "url": ( + "http://nightbringer.bandcamp.com/album/hierophany-of-the-open-grave" + ), + "info_dict": { + "title": "Hierophany of the Open Grave", + "uploader_id": "nightbringer", + "id": "hierophany-of-the-open-grave", }, - ], - 'info_dict': { - 'title': 'Jazz Format Mixtape vol.1', - 'id': 'jazz-format-mixtape-vol-1', - 'uploader_id': 'blazo', + "playlist_mincount": 9, }, - 'params': { - 'playlistend': 2 + { + # with escaped quote in title + "url": "https://jstrecords.bandcamp.com/album/entropy-ep", + "info_dict": { + "title": '"Entropy" EP', + "uploader_id": "jstrecords", + "id": "entropy-ep", + }, + "playlist_mincount": 3, }, - 'skip': 'Bandcamp imposes download limits.' - }, { - 'url': 'http://nightbringer.bandcamp.com/album/hierophany-of-the-open-grave', - 'info_dict': { - 'title': 'Hierophany of the Open Grave', - 'uploader_id': 'nightbringer', - 'id': 'hierophany-of-the-open-grave', + { + # not all tracks have songs + "url": "https://insulters.bandcamp.com/album/we-are-the-plague", + "info_dict": { + "id": "we-are-the-plague", + "title": "WE ARE THE PLAGUE", + "uploader_id": "insulters", + }, + "playlist_count": 2, }, - 'playlist_mincount': 9, - }, { - 'url': 'http://dotscale.bandcamp.com', - 'info_dict': { - 'title': 'Loom', - 'id': 'dotscale', - 'uploader_id': 'dotscale', - }, - 'playlist_mincount': 7, - }, { - # with escaped quote in title - 'url': 'https://jstrecords.bandcamp.com/album/entropy-ep', - 'info_dict': { - 'title': '"Entropy" EP', - 'uploader_id': 'jstrecords', - 'id': 'entropy-ep', - }, - 'playlist_mincount': 3, - }, { - # not all tracks have songs - 'url': 'https://insulters.bandcamp.com/album/we-are-the-plague', - 'info_dict': { - 'id': 'we-are-the-plague', - 'title': 'WE ARE THE PLAGUE', - 'uploader_id': 'insulters', - }, - 'playlist_count': 2, - }] + ] @classmethod def suitable(cls, url): - return (False - if BandcampWeeklyIE.suitable(url) or BandcampIE.suitable(url) - else super(BandcampAlbumIE, cls).suitable(url)) + return ( + False + if BandcampWeeklyIE.suitable(url) or BandcampIE.suitable(url) + else super(BandcampAlbumIE, cls).suitable(url) + ) def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - uploader_id = mobj.group('subdomain') - album_id = mobj.group('album_id') + uploader_id = mobj.group("subdomain") + album_id = mobj.group("album_id") playlist_id = album_id or uploader_id webpage = self._download_webpage(url, playlist_id) track_elements = re.findall( - r'(?s)<div[^>]*>(.*?<a[^>]+href="([^"]+?)"[^>]+itemprop="url"[^>]*>.*?)</div>', webpage) + r'(?s)<div[^>]*>(.*?<a[^>]+href="([^"]+?)"[^>]+itemprop="url"[^>]*>.*?)</div>', + webpage, + ) if not track_elements: - raise ExtractorError('The page doesn\'t contain any tracks') + raise ExtractorError("The page doesn't contain any tracks") # Only tracks with duration info have songs entries = [ self.url_result( @@ -310,46 +336,51 @@ class BandcampAlbumIE(InfoExtractor): ie=BandcampIE.ie_key(), video_title=self._search_regex( r'<span\b[^>]+\bitemprop=["\']name["\'][^>]*>([^<]+)', - elem_content, 'track title', fatal=False)) + elem_content, + "track title", + fatal=False, + ), + ) for elem_content, t_path in track_elements - if self._html_search_meta('duration', elem_content, default=None)] + if self._html_search_meta("duration", elem_content, default=None) + ] title = self._html_search_regex( - r'album_title\s*:\s*"((?:\\.|[^"\\])+?)"', - webpage, 'title', fatal=False) + r'album_title\s*:\s*"((?:\\.|[^"\\])+?)"', webpage, "title", fatal=False + ) if title: - title = title.replace(r'\"', '"') + title = title.replace(r"\"", '"') return { - '_type': 'playlist', - 'uploader_id': uploader_id, - 'id': playlist_id, - 'title': title, - 'entries': entries, + "_type": "playlist", + "uploader_id": uploader_id, + "id": playlist_id, + "title": title, + "entries": entries, } class BandcampWeeklyIE(InfoExtractor): - IE_NAME = 'Bandcamp:weekly' - _VALID_URL = r'https?://(?:www\.)?bandcamp\.com/?\?(?:.*?&)?show=(?P<id>\d+)' - _TESTS = [{ - 'url': 'https://bandcamp.com/?show=224', - 'md5': 'b00df799c733cf7e0c567ed187dea0fd', - 'info_dict': { - 'id': '224', - 'ext': 'opus', - 'title': 'BC Weekly April 4th 2017 - Magic Moments', - 'description': 'md5:5d48150916e8e02d030623a48512c874', - 'duration': 5829.77, - 'release_date': '20170404', - 'series': 'Bandcamp Weekly', - 'episode': 'Magic Moments', - 'episode_number': 208, - 'episode_id': '224', - } - }, { - 'url': 'https://bandcamp.com/?blah/blah@&show=228', - 'only_matching': True - }] + IE_NAME = "Bandcamp:weekly" + _VALID_URL = r"https?://(?:www\.)?bandcamp\.com/?\?(?:.*?&)?show=(?P<id>\d+)" + _TESTS = [ + { + "url": "https://bandcamp.com/?show=224", + "md5": "b00df799c733cf7e0c567ed187dea0fd", + "info_dict": { + "id": "224", + "ext": "opus", + "title": "BC Weekly April 4th 2017 - Magic Moments", + "description": "md5:5d48150916e8e02d030623a48512c874", + "duration": 5829.77, + "release_date": "20170404", + "series": "Bandcamp Weekly", + "episode": "Magic Moments", + "episode_number": 208, + "episode_id": "224", + }, + }, + {"url": "https://bandcamp.com/?blah/blah@&show=228", "only_matching": True}, + ] def _real_extract(self, url): video_id = self._match_id(url) @@ -357,18 +388,20 @@ class BandcampWeeklyIE(InfoExtractor): blob = self._parse_json( self._search_regex( - r'data-blob=(["\'])(?P<blob>{.+?})\1', webpage, - 'blob', group='blob'), - video_id, transform_source=unescapeHTML) + r'data-blob=(["\'])(?P<blob>{.+?})\1', webpage, "blob", group="blob" + ), + video_id, + transform_source=unescapeHTML, + ) - show = blob['bcw_show'] + show = blob["bcw_show"] # This is desired because any invalid show id redirects to `bandcamp.com` # which happens to expose the latest Bandcamp Weekly episode. - show_id = int_or_none(show.get('show_id')) or int_or_none(video_id) + show_id = int_or_none(show.get("show_id")) or int_or_none(video_id) formats = [] - for format_id, format_url in show['audio_stream'].items(): + for format_id, format_url in show["audio_stream"].items(): if not url_or_none(format_url): continue for known_ext in KNOWN_EXTENSIONS: @@ -377,41 +410,139 @@ class BandcampWeeklyIE(InfoExtractor): break else: ext = None - formats.append({ - 'format_id': format_id, - 'url': format_url, - 'ext': ext, - 'vcodec': 'none', - }) + formats.append( + { + "format_id": format_id, + "url": format_url, + "ext": ext, + "vcodec": "none", + } + ) self._sort_formats(formats) - title = show.get('audio_title') or 'Bandcamp Weekly' - subtitle = show.get('subtitle') + title = show.get("audio_title") or "Bandcamp Weekly" + subtitle = show.get("subtitle") if subtitle: - title += ' - %s' % subtitle + title += " - %s" % subtitle episode_number = None - seq = blob.get('bcw_seq') + seq = blob.get("bcw_seq") if seq and isinstance(seq, list): try: episode_number = next( - int_or_none(e.get('episode_number')) + int_or_none(e.get("episode_number")) for e in seq - if isinstance(e, dict) and int_or_none(e.get('id')) == show_id) + if isinstance(e, dict) and int_or_none(e.get("id")) == show_id + ) except StopIteration: pass return { - 'id': video_id, - 'title': title, - 'description': show.get('desc') or show.get('short_desc'), - 'duration': float_or_none(show.get('audio_duration')), - 'is_live': False, - 'release_date': unified_strdate(show.get('published_date')), - 'series': 'Bandcamp Weekly', - 'episode': show.get('subtitle'), - 'episode_number': episode_number, - 'episode_id': compat_str(video_id), - 'formats': formats + "id": video_id, + "title": title, + "description": show.get("desc") or show.get("short_desc"), + "duration": float_or_none(show.get("audio_duration")), + "is_live": False, + "release_date": unified_strdate(show.get("published_date")), + "series": "Bandcamp Weekly", + "episode": show.get("subtitle"), + "episode_number": episode_number, + "episode_id": compat_str(video_id), + "formats": formats, + } + + +class BandcampUserIE(InfoExtractor): + IE_NAME = "Bandcamp:user" + _VALID_URL = r"https?://(?:(?P<id>[^.]+)\.)?bandcamp\.com" + + _TESTS = [ + { + "url": "https://adrianvonziegler.bandcamp.com", + "info_dict": { + "id": "adrianvonziegler", + "title": "Discography of adrianvonziegler", + }, + "playlist_mincount": 23, + }, + { + "url": "http://dotscale.bandcamp.com", + "info_dict": {"id": "dotscale", "title": "Discography of dotscale",}, + "playlist_count": 1, + }, + { + "url": "https://nightcallofficial.bandcamp.com", + "info_dict": { + "id": "nightcallofficial", + "title": "Discography of nightcallofficial", + }, + "playlist_count": 4, + }, + ] + + @classmethod + def suitable(cls, url): + return ( + False + if BandcampAlbumIE.suitable(url) + or BandcampIE.suitable(url) + or BandcampWeeklyIE.suitable(url) + else super(BandcampUserIE, cls).suitable(url) + ) + + def _real_extract(self, url): + uploader = self._match_id(url) + webpage = self._download_webpage(url, uploader) + entries = [] + + # Bandcamp User type 1 page + discography_data = re.findall( + r'<li data-item-id="([^"]+)[^>]+>\s*<a href="(/[^/]+/[^/"]+)">', + webpage, + re.MULTILINE, + ) + + if len(discography_data) > 0: + for match in discography_data: + element_id = match[0] + element_url = match[1] + if element_url.split("/")[1] == "album": + ie = BandcampAlbumIE.ie_key() + else: + ie = BandcampIE.ie_key() + + entries.append( + self.url_result( + compat_urlparse.urljoin(url, element_url), + ie=ie, + video_id=element_id, + video_title=element_url.split("/")[2], + ) + ) + else: + # Bandcamp user type 2 page + discography_data = re.findall( + r'<div[^>]+trackTitle["\'][^"\']+["\']([^"\']+)', webpage + ) + + for element in discography_data: + if re.match("/album/+", element): + ie = BandcampAlbumIE.ie_key() + else: + ie = BandcampIE.ie_key() + + entries.append( + self.url_result( + compat_urlparse.urljoin(url, element), + ie=ie, + video_title=element, + ) + ) + + return { + "_type": "playlist", + "id": uploader, + "title": "Discography of %s" % uploader, + "entries": entries, } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index ae7079a6a..531a11f15 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -80,7 +80,12 @@ from .awaan import ( ) from .azmedien import AZMedienIE from .baidu import BaiduVideoIE -from .bandcamp import BandcampIE, BandcampAlbumIE, BandcampWeeklyIE +from .bandcamp import ( + BandcampIE, + BandcampAlbumIE, + BandcampWeeklyIE, + BandcampUserIE, +) from .bbc import ( BBCCoUkIE, BBCCoUkArticleIE, @@ -1405,7 +1410,7 @@ from .webofstories import ( WebOfStoriesPlaylistIE, ) from .weibo import ( - WeiboIE, + WeiboIE, WeiboMobileIE ) from .weiqitv import WeiqiTVIE