From 1782b39a08ab781e4dbb49bda5093270fb45ff98 Mon Sep 17 00:00:00 2001 From: Lyz Date: Thu, 7 Nov 2019 22:48:13 +0100 Subject: [PATCH 01/44] Merge master --- youtube_dl/extractor/bandcamp.py | 50 ++++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 7 ++++- 2 files changed, 56 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index f14b407dc..32b0313bf 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -22,6 +22,7 @@ from ..utils import ( unified_strdate, unified_timestamp, url_or_none, + RegexNotFoundError, ) @@ -415,3 +416,52 @@ class BandcampWeeklyIE(InfoExtractor): 'episode_id': compat_str(video_id), 'formats': formats } + + +class BandcampUserIE(InfoExtractor): + IE_NAME = 'Bandcamp:user' + _VALID_URL = r'https?://(?:(?P[^.]+)\.)?bandcamp\.com' + + _TESTS = [{ + 'url': 'https://adrianvonziegler.bandcamp.com', + 'info_dict': { + 'id': 'adrianvonziegler', + 'title': 'Albums of adrianvonziegler', + }, + 'playlist_mincount': 20, + }, { + 'url': 'http://dotscale.bandcamp.com', + 'info_dict': { + 'id': 'dotscale', + 'title': 'Albums of dotscale', + }, + 'playlist_count': 1, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + uploader = mobj.group('subdomain') + + webpage = self._download_webpage(url, uploader) + + album_elements = re.findall(r'', webpage) + + if not album_elements: + raise ExtractorError('The page doesn\'t contain any albums') + + entries = [ + self.url_result( + compat_urlparse.urljoin(url, 'album/{}'.format(album_id)), + ie=BandcampAlbumIE.ie_key(), + video_id='{}-{}'.format(uploader, album_id), + video_title=album_id, + ) + for album_id in album_elements + ] + + return { + '_type': 'playlist', + 'id': uploader, + 'title': 'Albums of {}'.format(uploader), + 'entries': entries, + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index e407ab3d9..8ead72005 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -80,7 +80,12 @@ from .awaan import ( ) from .azmedien import AZMedienIE from .baidu import BaiduVideoIE -from .bandcamp import BandcampIE, BandcampAlbumIE, BandcampWeeklyIE +from .bandcamp import ( + BandcampIE, + BandcampAlbumIE, + BandcampWeeklyIE, + BandcampUserIE, +) from .bbc import ( BBCCoUkIE, BBCCoUkArticleIE, From def281bfdd101fadaa1b03386f3fb9ade7a16172 Mon Sep 17 00:00:00 2001 From: Lyz Date: Mon, 4 Mar 2019 11:41:41 +0100 Subject: [PATCH 02/44] Correct BandcampUserIE regexp --- youtube_dl/extractor/bandcamp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index 32b0313bf..12b079b0a 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -420,7 +420,7 @@ class BandcampWeeklyIE(InfoExtractor): class BandcampUserIE(InfoExtractor): IE_NAME = 'Bandcamp:user' - _VALID_URL = r'https?://(?:(?P[^.]+)\.)?bandcamp\.com' + _VALID_URL = r'https?://(?:(?P[^.]+)\.)?bandcamp\.com/?$' _TESTS = [{ 'url': 'https://adrianvonziegler.bandcamp.com', From e3e622310db00314dda2ddfda7fea4a705929fd2 Mon Sep 17 00:00:00 2001 From: Lyz Date: Mon, 4 Mar 2019 12:12:38 +0100 Subject: [PATCH 03/44] Fix BandcampAlbum _VALID_URL regexp --- youtube_dl/extractor/bandcamp.py | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index 12b079b0a..35ccb4a56 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -221,7 +221,7 @@ class BandcampIE(InfoExtractor): class BandcampAlbumIE(InfoExtractor): IE_NAME = 'Bandcamp:album' - _VALID_URL = r'https?://(?:(?P[^.]+)\.)?bandcamp\.com(?:/album/(?P[^/?#&]+))?' + _VALID_URL = r'https?://(?:(?P[^.]+)\.)?bandcamp\.com/album/(?P[^/?#&]+)' _TESTS = [{ 'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1', @@ -260,14 +260,6 @@ class BandcampAlbumIE(InfoExtractor): 'id': 'hierophany-of-the-open-grave', }, 'playlist_mincount': 9, - }, { - 'url': 'http://dotscale.bandcamp.com', - 'info_dict': { - 'title': 'Loom', - 'id': 'dotscale', - 'uploader_id': 'dotscale', - }, - 'playlist_mincount': 7, }, { # with escaped quote in title 'url': 'https://jstrecords.bandcamp.com/album/entropy-ep', From 2d77ca611e244b54848592d19c289a7b4246e91a Mon Sep 17 00:00:00 2001 From: Lyz Date: Fri, 8 Mar 2019 12:53:20 +0100 Subject: [PATCH 04/44] Remove trailing $ in VALID_URL --- youtube_dl/extractor/bandcamp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index 35ccb4a56..d9e9537cf 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -412,7 +412,7 @@ class BandcampWeeklyIE(InfoExtractor): class BandcampUserIE(InfoExtractor): IE_NAME = 'Bandcamp:user' - _VALID_URL = r'https?://(?:(?P[^.]+)\.)?bandcamp\.com/?$' + _VALID_URL = r'https?://(?:(?P[^.]+)\.)?bandcamp\.com/?' _TESTS = [{ 'url': 'https://adrianvonziegler.bandcamp.com', From 10db6fba80e8d91c2d380c56f528e1661569b7ed Mon Sep 17 00:00:00 2001 From: Lyz Date: Fri, 8 Mar 2019 12:59:58 +0100 Subject: [PATCH 05/44] Switch from .format to % --- youtube_dl/extractor/bandcamp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index d9e9537cf..422a2867f 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -443,7 +443,7 @@ class BandcampUserIE(InfoExtractor): entries = [ self.url_result( - compat_urlparse.urljoin(url, 'album/{}'.format(album_id)), + compat_urlparse.urljoin(url, 'album/%s' % album_id), ie=BandcampAlbumIE.ie_key(), video_id='{}-{}'.format(uploader, album_id), video_title=album_id, From e7bbe73f9dcd3472d042ca1af138532af99654af Mon Sep 17 00:00:00 2001 From: Lyz Date: Fri, 8 Mar 2019 13:05:35 +0100 Subject: [PATCH 06/44] Capture non-greedy the album elements --- youtube_dl/extractor/bandcamp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index 422a2867f..0906ec8f5 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -436,7 +436,7 @@ class BandcampUserIE(InfoExtractor): webpage = self._download_webpage(url, uploader) - album_elements = re.findall(r'', webpage) + album_elements = re.findall(r'', webpage) if not album_elements: raise ExtractorError('The page doesn\'t contain any albums') From 09066bec550913fd52e994fe99d079013580d993 Mon Sep 17 00:00:00 2001 From: Lyz Date: Fri, 8 Mar 2019 13:08:03 +0100 Subject: [PATCH 07/44] Added _match_id --- youtube_dl/extractor/bandcamp.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index 0906ec8f5..9e299b7bf 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -412,7 +412,7 @@ class BandcampWeeklyIE(InfoExtractor): class BandcampUserIE(InfoExtractor): IE_NAME = 'Bandcamp:user' - _VALID_URL = r'https?://(?:(?P[^.]+)\.)?bandcamp\.com/?' + _VALID_URL = r'https?://(?:(?P[^.]+)\.)?bandcamp\.com/?' _TESTS = [{ 'url': 'https://adrianvonziegler.bandcamp.com', @@ -431,8 +431,7 @@ class BandcampUserIE(InfoExtractor): }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - uploader = mobj.group('subdomain') + uploader = self._match_id(url) webpage = self._download_webpage(url, uploader) From 39506ecd2cbfc4184c0b5c7c279546b635b87cde Mon Sep 17 00:00:00 2001 From: Lyz Date: Fri, 8 Mar 2019 13:08:35 +0100 Subject: [PATCH 08/44] Removed test if no albums exist --- youtube_dl/extractor/bandcamp.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index 9e299b7bf..fbe834f91 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -437,9 +437,6 @@ class BandcampUserIE(InfoExtractor): album_elements = re.findall(r'', webpage) - if not album_elements: - raise ExtractorError('The page doesn\'t contain any albums') - entries = [ self.url_result( compat_urlparse.urljoin(url, 'album/%s' % album_id), From dcec01dc129478d3b5089cbbaa8b1fd7cd812c2d Mon Sep 17 00:00:00 2001 From: Lyz Date: Fri, 8 Mar 2019 13:18:01 +0100 Subject: [PATCH 09/44] Add suitable method for BandcampUser --- youtube_dl/extractor/bandcamp.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index fbe834f91..d0415267c 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -430,6 +430,12 @@ class BandcampUserIE(InfoExtractor): 'playlist_count': 1, }] + @classmethod + def suitable(cls, url): + return (False + if BandcampAlbumIE.suitable(url) + else super(BandcampUserIE, cls).suitable(url)) + def _real_extract(self, url): uploader = self._match_id(url) From 6c73c2b14508615a5f7a6193d375ee9652664651 Mon Sep 17 00:00:00 2001 From: Lyz Date: Fri, 8 Mar 2019 13:30:05 +0100 Subject: [PATCH 10/44] Add BandcampWeeklyIE and BandcampIE to BandcampUser suitable method --- youtube_dl/extractor/bandcamp.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index d0415267c..016294136 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -433,7 +433,8 @@ class BandcampUserIE(InfoExtractor): @classmethod def suitable(cls, url): return (False - if BandcampAlbumIE.suitable(url) + if BandcampAlbumIE.suitable(url) or BandcampIE.suitable(url) or + BandcampWeeklyIE.suitable(url) else super(BandcampUserIE, cls).suitable(url)) def _real_extract(self, url): From dcace8017d9578651f1b3a497f3634818aea5e80 Mon Sep 17 00:00:00 2001 From: Lyz Date: Fri, 8 Mar 2019 16:52:25 +0100 Subject: [PATCH 11/44] Captured album element with album --- youtube_dl/extractor/bandcamp.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index 016294136..91a2b1eaf 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -442,11 +442,11 @@ class BandcampUserIE(InfoExtractor): webpage = self._download_webpage(url, uploader) - album_elements = re.findall(r'', webpage) + album_elements = re.findall(r'', webpage) entries = [ self.url_result( - compat_urlparse.urljoin(url, 'album/%s' % album_id), + compat_urlparse.urljoin(url, album_id), ie=BandcampAlbumIE.ie_key(), video_id='{}-{}'.format(uploader, album_id), video_title=album_id, From 055b42542ccc210d95715b84f800b87be4a29bfb Mon Sep 17 00:00:00 2001 From: Lyz Date: Mon, 18 Mar 2019 11:27:48 +0100 Subject: [PATCH 12/44] Switched from format to % --- youtube_dl/extractor/bandcamp.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index 91a2b1eaf..1d131f16d 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -448,7 +448,7 @@ class BandcampUserIE(InfoExtractor): self.url_result( compat_urlparse.urljoin(url, album_id), ie=BandcampAlbumIE.ie_key(), - video_id='{}-{}'.format(uploader, album_id), + video_id='%s-%s' % (uploader, album_id), video_title=album_id, ) for album_id in album_elements @@ -457,6 +457,6 @@ class BandcampUserIE(InfoExtractor): return { '_type': 'playlist', 'id': uploader, - 'title': 'Albums of {}'.format(uploader), + 'title': 'Albums of %s' % (uploader), 'entries': entries, } From faf074154ad0215493b098014c3edefcb251717b Mon Sep 17 00:00:00 2001 From: Lyz Date: Fri, 25 Oct 2019 17:09:27 +0200 Subject: [PATCH 13/44] * Rewrite extraction logic * Fix errors --- youtube_dl/extractor/bandcamp.py | 69 ++++++++++++++++++++++---------- 1 file changed, 47 insertions(+), 22 deletions(-) diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index 1d131f16d..0ea245c03 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -1,5 +1,6 @@ from __future__ import unicode_literals +import json import random import re import time @@ -15,6 +16,7 @@ from ..utils import ( int_or_none, KNOWN_EXTENSIONS, parse_filesize, + RegexNotFoundError, str_or_none, try_get, unescapeHTML, @@ -22,7 +24,6 @@ from ..utils import ( unified_strdate, unified_timestamp, url_or_none, - RegexNotFoundError, ) @@ -337,7 +338,6 @@ class BandcampWeeklyIE(InfoExtractor): 'series': 'Bandcamp Weekly', 'episode': 'Magic Moments', 'episode_number': 208, - 'episode_id': '224', } }, { 'url': 'https://bandcamp.com/?blah/blah@&show=228', @@ -405,36 +405,44 @@ class BandcampWeeklyIE(InfoExtractor): 'series': 'Bandcamp Weekly', 'episode': show.get('subtitle'), 'episode_number': episode_number, - 'episode_id': compat_str(video_id), 'formats': formats } class BandcampUserIE(InfoExtractor): IE_NAME = 'Bandcamp:user' - _VALID_URL = r'https?://(?:(?P[^.]+)\.)?bandcamp\.com/?' + _VALID_URL = r'https?://(?:(?P[^.]+)\.)?bandcamp\.com' _TESTS = [{ 'url': 'https://adrianvonziegler.bandcamp.com', 'info_dict': { 'id': 'adrianvonziegler', - 'title': 'Albums of adrianvonziegler', + 'title': 'Discography of adrianvonziegler', }, - 'playlist_mincount': 20, + 'playlist_mincount': 22, }, { 'url': 'http://dotscale.bandcamp.com', 'info_dict': { 'id': 'dotscale', - 'title': 'Albums of dotscale', + 'title': 'Discography of dotscale', }, 'playlist_count': 1, - }] + }, { + 'url': 'https://nightcallofficial.bandcamp.com', + 'info_dict': { + 'id': 'nightcallofficial', + 'title': 'Discography of nightcallofficial', + }, + 'playlist_count': 4, + + }, + ] @classmethod def suitable(cls, url): - return (False - if BandcampAlbumIE.suitable(url) or BandcampIE.suitable(url) or - BandcampWeeklyIE.suitable(url) + return (False if BandcampAlbumIE.suitable(url) + or BandcampIE.suitable(url) + or BandcampWeeklyIE.suitable(url) else super(BandcampUserIE, cls).suitable(url)) def _real_extract(self, url): @@ -442,21 +450,38 @@ class BandcampUserIE(InfoExtractor): webpage = self._download_webpage(url, uploader) - album_elements = re.findall(r'', webpage) + # Bandcamp User type 1 page + try: + discography_data = json.loads(self._search_regex( + r'data-edit-callback="/music_reorder" data-initial-values="([^"]+)">', + webpage, 'raw_data').replace('"', '"')) - entries = [ - self.url_result( - compat_urlparse.urljoin(url, album_id), - ie=BandcampAlbumIE.ie_key(), - video_id='%s-%s' % (uploader, album_id), - video_title=album_id, - ) - for album_id in album_elements - ] + entries = [ + self.url_result( + compat_urlparse.urljoin(url, element['page_url']), + ie=BandcampAlbumIE.ie_key(), + video_id=element['id'], + video_title=element['title'], + ) + for element in discography_data + ] + except RegexNotFoundError: + # Bandcamp user type 2 page + discography_data = re.findall( + r']+trackTitle["\'][^"\']+["\']([^"\']+)', webpage) + + entries = [ + self.url_result( + compat_urlparse.urljoin(url, element), + ie=BandcampAlbumIE.ie_key(), + video_title=element, + ) + for element in discography_data + ] return { '_type': 'playlist', 'id': uploader, - 'title': 'Albums of %s' % (uploader), + 'title': 'Discography of %s' % uploader, 'entries': entries, } From 895f5e8b048330729f0270dd255e30f14a5d7955 Mon Sep 17 00:00:00 2001 From: Lyz Date: Fri, 25 Oct 2019 17:35:27 +0200 Subject: [PATCH 14/44] Added ie for tracks --- youtube_dl/extractor/bandcamp.py | 35 +++++++++++++++++--------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index 0ea245c03..d488c8622 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -447,8 +447,8 @@ class BandcampUserIE(InfoExtractor): def _real_extract(self, url): uploader = self._match_id(url) - webpage = self._download_webpage(url, uploader) + entries = [] # Bandcamp User type 1 page try: @@ -456,29 +456,32 @@ class BandcampUserIE(InfoExtractor): r'data-edit-callback="/music_reorder" data-initial-values="([^"]+)">', webpage, 'raw_data').replace('"', '"')) - entries = [ - self.url_result( + for element in discography_data: + if element['type'] == 'album': + ie = BandcampAlbumIE.ie_key() + else: + ie = BandcampIE.ie_key() + + entries.append(self.url_result( compat_urlparse.urljoin(url, element['page_url']), - ie=BandcampAlbumIE.ie_key(), + ie=ie, video_id=element['id'], - video_title=element['title'], - ) - for element in discography_data - ] + video_title=element['title'])) except RegexNotFoundError: # Bandcamp user type 2 page discography_data = re.findall( r']+trackTitle["\'][^"\']+["\']([^"\']+)', webpage) - entries = [ - self.url_result( - compat_urlparse.urljoin(url, element), - ie=BandcampAlbumIE.ie_key(), - video_title=element, - ) - for element in discography_data - ] + for element in discography_data: + if re.match('album', element): + ie = BandcampAlbumIE.ie_key() + else: + ie = BandcampIE.ie_key() + entries.append(self.url_result( + compat_urlparse.urljoin(url, element), + ie=ie, + video_title=element)) return { '_type': 'playlist', 'id': uploader, From 61c6d221e98c8cd10aa067762cb0dd4473da2b49 Mon Sep 17 00:00:00 2001 From: Lyz Date: Fri, 25 Oct 2019 17:40:45 +0200 Subject: [PATCH 15/44] Fix regular expression --- youtube_dl/extractor/bandcamp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index d488c8622..ba4440c57 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -473,7 +473,7 @@ class BandcampUserIE(InfoExtractor): r']+trackTitle["\'][^"\']+["\']([^"\']+)', webpage) for element in discography_data: - if re.match('album', element): + if re.match('/album/+', element): ie = BandcampAlbumIE.ie_key() else: ie = BandcampIE.ie_key() From 9e4764faac7ec232d3ac7f2bc968d17fce348060 Mon Sep 17 00:00:00 2001 From: Lyz Date: Fri, 25 Oct 2019 18:03:12 +0200 Subject: [PATCH 16/44] Convert video_id from int to string --- youtube_dl/extractor/bandcamp.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index ba4440c57..ddbd1179b 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -465,7 +465,7 @@ class BandcampUserIE(InfoExtractor): entries.append(self.url_result( compat_urlparse.urljoin(url, element['page_url']), ie=ie, - video_id=element['id'], + video_id=str(element['id']), video_title=element['title'])) except RegexNotFoundError: # Bandcamp user type 2 page @@ -482,6 +482,7 @@ class BandcampUserIE(InfoExtractor): compat_urlparse.urljoin(url, element), ie=ie, video_title=element)) + return { '_type': 'playlist', 'id': uploader, From d897244f051c762899c95177f1094a779dc637ee Mon Sep 17 00:00:00 2001 From: Lyz Date: Thu, 7 Nov 2019 22:42:13 +0100 Subject: [PATCH 17/44] trigger CI --- youtube_dl/extractor/bandcamp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index ddbd1179b..9fe107e2c 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -40,7 +40,7 @@ class BandcampIE(InfoExtractor): }, '_skip': 'There is a limit of 200 free downloads / month for the test song' }, { - # free download + # free download 'url': 'http://benprunty.bandcamp.com/track/lanius-battle', 'md5': '853e35bf34aa1d6fe2615ae612564b36', 'info_dict': { From 5cdb5b2842f468ad54b925d491b3c96e53387adb Mon Sep 17 00:00:00 2001 From: Lyz Date: Tue, 7 Apr 2020 20:46:15 +0200 Subject: [PATCH 18/44] Update BandcampUser extractor to match the webpage changes --- youtube_dl/extractor/bandcamp.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index 9fe107e2c..4801cd319 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -419,7 +419,7 @@ class BandcampUserIE(InfoExtractor): 'id': 'adrianvonziegler', 'title': 'Discography of adrianvonziegler', }, - 'playlist_mincount': 22, + 'playlist_mincount': 23, }, { 'url': 'http://dotscale.bandcamp.com', 'info_dict': { @@ -451,23 +451,25 @@ class BandcampUserIE(InfoExtractor): entries = [] # Bandcamp User type 1 page - try: - discography_data = json.loads(self._search_regex( - r'data-edit-callback="/music_reorder" data-initial-values="([^"]+)">', - webpage, 'raw_data').replace('"', '"')) + discography_data = re.findall( + r'
  • ]+>\s*', + webpage, re.MULTILINE) - for element in discography_data: - if element['type'] == 'album': + if len(discography_data) > 0: + for match in discography_data: + element_id = match[0] + element_url = match[1] + if element_url.split('/')[1] == 'album': ie = BandcampAlbumIE.ie_key() else: ie = BandcampIE.ie_key() entries.append(self.url_result( - compat_urlparse.urljoin(url, element['page_url']), + compat_urlparse.urljoin(url, element_url), ie=ie, - video_id=str(element['id']), - video_title=element['title'])) - except RegexNotFoundError: + video_id=element_id, + video_title=element_url.split('/')[2])) + else: # Bandcamp user type 2 page discography_data = re.findall( r']+trackTitle["\'][^"\']+["\']([^"\']+)', webpage) From 6541969d2517b8f6b4334099da8007605118ef62 Mon Sep 17 00:00:00 2001 From: Lyz Date: Tue, 7 Apr 2020 21:27:53 +0200 Subject: [PATCH 19/44] Fix merge request changes --- youtube_dl/extractor/bandcamp.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index 4801cd319..773ebe283 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -1,6 +1,5 @@ from __future__ import unicode_literals -import json import random import re import time @@ -16,7 +15,6 @@ from ..utils import ( int_or_none, KNOWN_EXTENSIONS, parse_filesize, - RegexNotFoundError, str_or_none, try_get, unescapeHTML, @@ -40,7 +38,7 @@ class BandcampIE(InfoExtractor): }, '_skip': 'There is a limit of 200 free downloads / month for the test song' }, { - # free download + # free download 'url': 'http://benprunty.bandcamp.com/track/lanius-battle', 'md5': '853e35bf34aa1d6fe2615ae612564b36', 'info_dict': { @@ -338,6 +336,7 @@ class BandcampWeeklyIE(InfoExtractor): 'series': 'Bandcamp Weekly', 'episode': 'Magic Moments', 'episode_number': 208, + 'episode_id': '224', } }, { 'url': 'https://bandcamp.com/?blah/blah@&show=228', @@ -404,6 +403,7 @@ class BandcampWeeklyIE(InfoExtractor): 'release_date': unified_strdate(show.get('published_date')), 'series': 'Bandcamp Weekly', 'episode': show.get('subtitle'), + 'episode_id': compat_str(video_id), 'episode_number': episode_number, 'formats': formats } From 858f0386f20e684984bc806d1cd4b390ffbb0c03 Mon Sep 17 00:00:00 2001 From: Lyz Date: Tue, 7 Apr 2020 21:28:53 +0200 Subject: [PATCH 20/44] Change episode_id possition --- youtube_dl/extractor/bandcamp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index 773ebe283..102339334 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -403,8 +403,8 @@ class BandcampWeeklyIE(InfoExtractor): 'release_date': unified_strdate(show.get('published_date')), 'series': 'Bandcamp Weekly', 'episode': show.get('subtitle'), - 'episode_id': compat_str(video_id), 'episode_number': episode_number, + 'episode_id': compat_str(video_id), 'formats': formats } From 9c1f99402fa25a5a691944c133432741af19829b Mon Sep 17 00:00:00 2001 From: Gilles Pietri Date: Wed, 23 Sep 2020 23:09:00 +0200 Subject: [PATCH 21/44] [bandcamp] fix regexp for JSON matching on bandcamp --- youtube_dl/extractor/bandcamp.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index f14b407dc..ad1812320 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -91,10 +91,11 @@ class BandcampIE(InfoExtractor): duration = None formats = [] - track_info = self._parse_json( - self._search_regex( - r'trackinfo\s*:\s*\[\s*({.+?})\s*\]\s*,\s*?\n', - webpage, 'track info', default='{}'), title) + trackinfo_block = self._search_regex( + r'trackinfo":\[\s*({.+?})\s*\],"', + webpage, 'track info', default='{}') + quoted_json = trackinfo_block.replace('"', '"') + track_info = self._parse_json(quoted_json, title) if track_info: file_ = track_info.get('file') if isinstance(file_, dict): @@ -117,7 +118,7 @@ class BandcampIE(InfoExtractor): def extract(key): return self._search_regex( - r'\b%s\s*["\']?\s*:\s*(["\'])(?P(?:(?!\1).)+)\1' % key, + r',"%s":(")(?P(?:(?!").)+)"' % key, webpage, key, default=None, group='value') artist = extract('artist') From d7a19b8e5446d75f792e5307799331110a09d1ec Mon Sep 17 00:00:00 2001 From: Lyz Date: Thu, 7 Nov 2019 22:48:13 +0100 Subject: [PATCH 22/44] Merge master --- youtube_dl/extractor/bandcamp.py | 50 ++++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 7 ++++- 2 files changed, 56 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index f14b407dc..32b0313bf 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -22,6 +22,7 @@ from ..utils import ( unified_strdate, unified_timestamp, url_or_none, + RegexNotFoundError, ) @@ -415,3 +416,52 @@ class BandcampWeeklyIE(InfoExtractor): 'episode_id': compat_str(video_id), 'formats': formats } + + +class BandcampUserIE(InfoExtractor): + IE_NAME = 'Bandcamp:user' + _VALID_URL = r'https?://(?:(?P[^.]+)\.)?bandcamp\.com' + + _TESTS = [{ + 'url': 'https://adrianvonziegler.bandcamp.com', + 'info_dict': { + 'id': 'adrianvonziegler', + 'title': 'Albums of adrianvonziegler', + }, + 'playlist_mincount': 20, + }, { + 'url': 'http://dotscale.bandcamp.com', + 'info_dict': { + 'id': 'dotscale', + 'title': 'Albums of dotscale', + }, + 'playlist_count': 1, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + uploader = mobj.group('subdomain') + + webpage = self._download_webpage(url, uploader) + + album_elements = re.findall(r'', webpage) + + if not album_elements: + raise ExtractorError('The page doesn\'t contain any albums') + + entries = [ + self.url_result( + compat_urlparse.urljoin(url, 'album/{}'.format(album_id)), + ie=BandcampAlbumIE.ie_key(), + video_id='{}-{}'.format(uploader, album_id), + video_title=album_id, + ) + for album_id in album_elements + ] + + return { + '_type': 'playlist', + 'id': uploader, + 'title': 'Albums of {}'.format(uploader), + 'entries': entries, + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index ae7079a6a..f106f0d02 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -80,7 +80,12 @@ from .awaan import ( ) from .azmedien import AZMedienIE from .baidu import BaiduVideoIE -from .bandcamp import BandcampIE, BandcampAlbumIE, BandcampWeeklyIE +from .bandcamp import ( + BandcampIE, + BandcampAlbumIE, + BandcampWeeklyIE, + BandcampUserIE, +) from .bbc import ( BBCCoUkIE, BBCCoUkArticleIE, From db204d22d20ec1534f0b34cbcab133a97dd5762d Mon Sep 17 00:00:00 2001 From: Lyz Date: Mon, 4 Mar 2019 11:41:41 +0100 Subject: [PATCH 23/44] Correct BandcampUserIE regexp --- youtube_dl/extractor/bandcamp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index 32b0313bf..12b079b0a 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -420,7 +420,7 @@ class BandcampWeeklyIE(InfoExtractor): class BandcampUserIE(InfoExtractor): IE_NAME = 'Bandcamp:user' - _VALID_URL = r'https?://(?:(?P[^.]+)\.)?bandcamp\.com' + _VALID_URL = r'https?://(?:(?P[^.]+)\.)?bandcamp\.com/?$' _TESTS = [{ 'url': 'https://adrianvonziegler.bandcamp.com', From 65ba90f65c58dcab650012a029cbdfaed65cf395 Mon Sep 17 00:00:00 2001 From: Lyz Date: Mon, 4 Mar 2019 12:12:38 +0100 Subject: [PATCH 24/44] Fix BandcampAlbum _VALID_URL regexp --- youtube_dl/extractor/bandcamp.py | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index 12b079b0a..35ccb4a56 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -221,7 +221,7 @@ class BandcampIE(InfoExtractor): class BandcampAlbumIE(InfoExtractor): IE_NAME = 'Bandcamp:album' - _VALID_URL = r'https?://(?:(?P[^.]+)\.)?bandcamp\.com(?:/album/(?P[^/?#&]+))?' + _VALID_URL = r'https?://(?:(?P[^.]+)\.)?bandcamp\.com/album/(?P[^/?#&]+)' _TESTS = [{ 'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1', @@ -260,14 +260,6 @@ class BandcampAlbumIE(InfoExtractor): 'id': 'hierophany-of-the-open-grave', }, 'playlist_mincount': 9, - }, { - 'url': 'http://dotscale.bandcamp.com', - 'info_dict': { - 'title': 'Loom', - 'id': 'dotscale', - 'uploader_id': 'dotscale', - }, - 'playlist_mincount': 7, }, { # with escaped quote in title 'url': 'https://jstrecords.bandcamp.com/album/entropy-ep', From 3d39e15e96b0752013b1d4e65163e660080fae4b Mon Sep 17 00:00:00 2001 From: Lyz Date: Fri, 8 Mar 2019 12:53:20 +0100 Subject: [PATCH 25/44] Remove trailing $ in VALID_URL --- youtube_dl/extractor/bandcamp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index 35ccb4a56..d9e9537cf 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -412,7 +412,7 @@ class BandcampWeeklyIE(InfoExtractor): class BandcampUserIE(InfoExtractor): IE_NAME = 'Bandcamp:user' - _VALID_URL = r'https?://(?:(?P[^.]+)\.)?bandcamp\.com/?$' + _VALID_URL = r'https?://(?:(?P[^.]+)\.)?bandcamp\.com/?' _TESTS = [{ 'url': 'https://adrianvonziegler.bandcamp.com', From e0a9e262eefb5e52485c4bcc1e0edd4f6e0feefc Mon Sep 17 00:00:00 2001 From: Lyz Date: Fri, 8 Mar 2019 12:59:58 +0100 Subject: [PATCH 26/44] Switch from .format to % --- youtube_dl/extractor/bandcamp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index d9e9537cf..422a2867f 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -443,7 +443,7 @@ class BandcampUserIE(InfoExtractor): entries = [ self.url_result( - compat_urlparse.urljoin(url, 'album/{}'.format(album_id)), + compat_urlparse.urljoin(url, 'album/%s' % album_id), ie=BandcampAlbumIE.ie_key(), video_id='{}-{}'.format(uploader, album_id), video_title=album_id, From 16efa6ebccb8d973a778e644bc232be69a0ebf60 Mon Sep 17 00:00:00 2001 From: Lyz Date: Fri, 8 Mar 2019 13:05:35 +0100 Subject: [PATCH 27/44] Capture non-greedy the album elements --- youtube_dl/extractor/bandcamp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index 422a2867f..0906ec8f5 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -436,7 +436,7 @@ class BandcampUserIE(InfoExtractor): webpage = self._download_webpage(url, uploader) - album_elements = re.findall(r'', webpage) + album_elements = re.findall(r'', webpage) if not album_elements: raise ExtractorError('The page doesn\'t contain any albums') From 57f736fbf6aa24728c4d9732f25ec4d53b645fc6 Mon Sep 17 00:00:00 2001 From: Lyz Date: Fri, 8 Mar 2019 13:08:03 +0100 Subject: [PATCH 28/44] Added _match_id --- youtube_dl/extractor/bandcamp.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index 0906ec8f5..9e299b7bf 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -412,7 +412,7 @@ class BandcampWeeklyIE(InfoExtractor): class BandcampUserIE(InfoExtractor): IE_NAME = 'Bandcamp:user' - _VALID_URL = r'https?://(?:(?P[^.]+)\.)?bandcamp\.com/?' + _VALID_URL = r'https?://(?:(?P[^.]+)\.)?bandcamp\.com/?' _TESTS = [{ 'url': 'https://adrianvonziegler.bandcamp.com', @@ -431,8 +431,7 @@ class BandcampUserIE(InfoExtractor): }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - uploader = mobj.group('subdomain') + uploader = self._match_id(url) webpage = self._download_webpage(url, uploader) From d0a1cb3ae65ed104056f0411b7db418d1e9a2f13 Mon Sep 17 00:00:00 2001 From: Lyz Date: Fri, 8 Mar 2019 13:08:35 +0100 Subject: [PATCH 29/44] Removed test if no albums exist --- youtube_dl/extractor/bandcamp.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index 9e299b7bf..fbe834f91 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -437,9 +437,6 @@ class BandcampUserIE(InfoExtractor): album_elements = re.findall(r'', webpage) - if not album_elements: - raise ExtractorError('The page doesn\'t contain any albums') - entries = [ self.url_result( compat_urlparse.urljoin(url, 'album/%s' % album_id), From 7d1605bc1278abb289e52d4112080df505326b78 Mon Sep 17 00:00:00 2001 From: Lyz Date: Fri, 8 Mar 2019 13:18:01 +0100 Subject: [PATCH 30/44] Add suitable method for BandcampUser --- youtube_dl/extractor/bandcamp.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index fbe834f91..d0415267c 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -430,6 +430,12 @@ class BandcampUserIE(InfoExtractor): 'playlist_count': 1, }] + @classmethod + def suitable(cls, url): + return (False + if BandcampAlbumIE.suitable(url) + else super(BandcampUserIE, cls).suitable(url)) + def _real_extract(self, url): uploader = self._match_id(url) From 6f12f2a34b0b9a227579ad4114587ddedd1a2e6c Mon Sep 17 00:00:00 2001 From: Lyz Date: Fri, 8 Mar 2019 13:30:05 +0100 Subject: [PATCH 31/44] Add BandcampWeeklyIE and BandcampIE to BandcampUser suitable method --- youtube_dl/extractor/bandcamp.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index d0415267c..016294136 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -433,7 +433,8 @@ class BandcampUserIE(InfoExtractor): @classmethod def suitable(cls, url): return (False - if BandcampAlbumIE.suitable(url) + if BandcampAlbumIE.suitable(url) or BandcampIE.suitable(url) or + BandcampWeeklyIE.suitable(url) else super(BandcampUserIE, cls).suitable(url)) def _real_extract(self, url): From 65302cebf1483e91a13a41535740be8d13c4b694 Mon Sep 17 00:00:00 2001 From: Lyz Date: Fri, 8 Mar 2019 16:52:25 +0100 Subject: [PATCH 32/44] Captured album element with album --- youtube_dl/extractor/bandcamp.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index 016294136..91a2b1eaf 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -442,11 +442,11 @@ class BandcampUserIE(InfoExtractor): webpage = self._download_webpage(url, uploader) - album_elements = re.findall(r'', webpage) + album_elements = re.findall(r'', webpage) entries = [ self.url_result( - compat_urlparse.urljoin(url, 'album/%s' % album_id), + compat_urlparse.urljoin(url, album_id), ie=BandcampAlbumIE.ie_key(), video_id='{}-{}'.format(uploader, album_id), video_title=album_id, From 9cb8ca072dd97933313828a44d906fef37381391 Mon Sep 17 00:00:00 2001 From: Lyz Date: Mon, 18 Mar 2019 11:27:48 +0100 Subject: [PATCH 33/44] Switched from format to % --- youtube_dl/extractor/bandcamp.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index 91a2b1eaf..1d131f16d 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -448,7 +448,7 @@ class BandcampUserIE(InfoExtractor): self.url_result( compat_urlparse.urljoin(url, album_id), ie=BandcampAlbumIE.ie_key(), - video_id='{}-{}'.format(uploader, album_id), + video_id='%s-%s' % (uploader, album_id), video_title=album_id, ) for album_id in album_elements @@ -457,6 +457,6 @@ class BandcampUserIE(InfoExtractor): return { '_type': 'playlist', 'id': uploader, - 'title': 'Albums of {}'.format(uploader), + 'title': 'Albums of %s' % (uploader), 'entries': entries, } From a1a0490928e04f0378883f4e169263bfa13a51fc Mon Sep 17 00:00:00 2001 From: Lyz Date: Fri, 25 Oct 2019 17:09:27 +0200 Subject: [PATCH 34/44] * Rewrite extraction logic * Fix errors --- youtube_dl/extractor/bandcamp.py | 69 ++++++++++++++++++++++---------- 1 file changed, 47 insertions(+), 22 deletions(-) diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index 1d131f16d..0ea245c03 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -1,5 +1,6 @@ from __future__ import unicode_literals +import json import random import re import time @@ -15,6 +16,7 @@ from ..utils import ( int_or_none, KNOWN_EXTENSIONS, parse_filesize, + RegexNotFoundError, str_or_none, try_get, unescapeHTML, @@ -22,7 +24,6 @@ from ..utils import ( unified_strdate, unified_timestamp, url_or_none, - RegexNotFoundError, ) @@ -337,7 +338,6 @@ class BandcampWeeklyIE(InfoExtractor): 'series': 'Bandcamp Weekly', 'episode': 'Magic Moments', 'episode_number': 208, - 'episode_id': '224', } }, { 'url': 'https://bandcamp.com/?blah/blah@&show=228', @@ -405,36 +405,44 @@ class BandcampWeeklyIE(InfoExtractor): 'series': 'Bandcamp Weekly', 'episode': show.get('subtitle'), 'episode_number': episode_number, - 'episode_id': compat_str(video_id), 'formats': formats } class BandcampUserIE(InfoExtractor): IE_NAME = 'Bandcamp:user' - _VALID_URL = r'https?://(?:(?P[^.]+)\.)?bandcamp\.com/?' + _VALID_URL = r'https?://(?:(?P[^.]+)\.)?bandcamp\.com' _TESTS = [{ 'url': 'https://adrianvonziegler.bandcamp.com', 'info_dict': { 'id': 'adrianvonziegler', - 'title': 'Albums of adrianvonziegler', + 'title': 'Discography of adrianvonziegler', }, - 'playlist_mincount': 20, + 'playlist_mincount': 22, }, { 'url': 'http://dotscale.bandcamp.com', 'info_dict': { 'id': 'dotscale', - 'title': 'Albums of dotscale', + 'title': 'Discography of dotscale', }, 'playlist_count': 1, - }] + }, { + 'url': 'https://nightcallofficial.bandcamp.com', + 'info_dict': { + 'id': 'nightcallofficial', + 'title': 'Discography of nightcallofficial', + }, + 'playlist_count': 4, + + }, + ] @classmethod def suitable(cls, url): - return (False - if BandcampAlbumIE.suitable(url) or BandcampIE.suitable(url) or - BandcampWeeklyIE.suitable(url) + return (False if BandcampAlbumIE.suitable(url) + or BandcampIE.suitable(url) + or BandcampWeeklyIE.suitable(url) else super(BandcampUserIE, cls).suitable(url)) def _real_extract(self, url): @@ -442,21 +450,38 @@ class BandcampUserIE(InfoExtractor): webpage = self._download_webpage(url, uploader) - album_elements = re.findall(r'', webpage) + # Bandcamp User type 1 page + try: + discography_data = json.loads(self._search_regex( + r'data-edit-callback="/music_reorder" data-initial-values="([^"]+)">', + webpage, 'raw_data').replace('"', '"')) - entries = [ - self.url_result( - compat_urlparse.urljoin(url, album_id), - ie=BandcampAlbumIE.ie_key(), - video_id='%s-%s' % (uploader, album_id), - video_title=album_id, - ) - for album_id in album_elements - ] + entries = [ + self.url_result( + compat_urlparse.urljoin(url, element['page_url']), + ie=BandcampAlbumIE.ie_key(), + video_id=element['id'], + video_title=element['title'], + ) + for element in discography_data + ] + except RegexNotFoundError: + # Bandcamp user type 2 page + discography_data = re.findall( + r']+trackTitle["\'][^"\']+["\']([^"\']+)', webpage) + + entries = [ + self.url_result( + compat_urlparse.urljoin(url, element), + ie=BandcampAlbumIE.ie_key(), + video_title=element, + ) + for element in discography_data + ] return { '_type': 'playlist', 'id': uploader, - 'title': 'Albums of %s' % (uploader), + 'title': 'Discography of %s' % uploader, 'entries': entries, } From f69899d5764cb9d88e8da456aa5ec8b10e7a0edc Mon Sep 17 00:00:00 2001 From: Lyz Date: Fri, 25 Oct 2019 17:35:27 +0200 Subject: [PATCH 35/44] Added ie for tracks --- youtube_dl/extractor/bandcamp.py | 35 +++++++++++++++++--------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index 0ea245c03..d488c8622 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -447,8 +447,8 @@ class BandcampUserIE(InfoExtractor): def _real_extract(self, url): uploader = self._match_id(url) - webpage = self._download_webpage(url, uploader) + entries = [] # Bandcamp User type 1 page try: @@ -456,29 +456,32 @@ class BandcampUserIE(InfoExtractor): r'data-edit-callback="/music_reorder" data-initial-values="([^"]+)">', webpage, 'raw_data').replace('"', '"')) - entries = [ - self.url_result( + for element in discography_data: + if element['type'] == 'album': + ie = BandcampAlbumIE.ie_key() + else: + ie = BandcampIE.ie_key() + + entries.append(self.url_result( compat_urlparse.urljoin(url, element['page_url']), - ie=BandcampAlbumIE.ie_key(), + ie=ie, video_id=element['id'], - video_title=element['title'], - ) - for element in discography_data - ] + video_title=element['title'])) except RegexNotFoundError: # Bandcamp user type 2 page discography_data = re.findall( r']+trackTitle["\'][^"\']+["\']([^"\']+)', webpage) - entries = [ - self.url_result( - compat_urlparse.urljoin(url, element), - ie=BandcampAlbumIE.ie_key(), - video_title=element, - ) - for element in discography_data - ] + for element in discography_data: + if re.match('album', element): + ie = BandcampAlbumIE.ie_key() + else: + ie = BandcampIE.ie_key() + entries.append(self.url_result( + compat_urlparse.urljoin(url, element), + ie=ie, + video_title=element)) return { '_type': 'playlist', 'id': uploader, From 886277479540a9a9790d8b85a6481203405d4bc2 Mon Sep 17 00:00:00 2001 From: Lyz Date: Fri, 25 Oct 2019 17:40:45 +0200 Subject: [PATCH 36/44] Fix regular expression --- youtube_dl/extractor/bandcamp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index d488c8622..ba4440c57 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -473,7 +473,7 @@ class BandcampUserIE(InfoExtractor): r']+trackTitle["\'][^"\']+["\']([^"\']+)', webpage) for element in discography_data: - if re.match('album', element): + if re.match('/album/+', element): ie = BandcampAlbumIE.ie_key() else: ie = BandcampIE.ie_key() From 70499a274f854733a88a1771f168454fe13d6219 Mon Sep 17 00:00:00 2001 From: Lyz Date: Fri, 25 Oct 2019 18:03:12 +0200 Subject: [PATCH 37/44] Convert video_id from int to string --- youtube_dl/extractor/bandcamp.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index ba4440c57..ddbd1179b 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -465,7 +465,7 @@ class BandcampUserIE(InfoExtractor): entries.append(self.url_result( compat_urlparse.urljoin(url, element['page_url']), ie=ie, - video_id=element['id'], + video_id=str(element['id']), video_title=element['title'])) except RegexNotFoundError: # Bandcamp user type 2 page @@ -482,6 +482,7 @@ class BandcampUserIE(InfoExtractor): compat_urlparse.urljoin(url, element), ie=ie, video_title=element)) + return { '_type': 'playlist', 'id': uploader, From f5afc50897b18a1b9f14cbc5889ae7d404c86efa Mon Sep 17 00:00:00 2001 From: Lyz Date: Thu, 7 Nov 2019 22:42:13 +0100 Subject: [PATCH 38/44] trigger CI --- youtube_dl/extractor/bandcamp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index ddbd1179b..9fe107e2c 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -40,7 +40,7 @@ class BandcampIE(InfoExtractor): }, '_skip': 'There is a limit of 200 free downloads / month for the test song' }, { - # free download + # free download 'url': 'http://benprunty.bandcamp.com/track/lanius-battle', 'md5': '853e35bf34aa1d6fe2615ae612564b36', 'info_dict': { From 348635d5275a444a2205e5e1667cb5d0a5387349 Mon Sep 17 00:00:00 2001 From: Lyz Date: Tue, 7 Apr 2020 20:46:15 +0200 Subject: [PATCH 39/44] Update BandcampUser extractor to match the webpage changes --- youtube_dl/extractor/bandcamp.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index 9fe107e2c..4801cd319 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -419,7 +419,7 @@ class BandcampUserIE(InfoExtractor): 'id': 'adrianvonziegler', 'title': 'Discography of adrianvonziegler', }, - 'playlist_mincount': 22, + 'playlist_mincount': 23, }, { 'url': 'http://dotscale.bandcamp.com', 'info_dict': { @@ -451,23 +451,25 @@ class BandcampUserIE(InfoExtractor): entries = [] # Bandcamp User type 1 page - try: - discography_data = json.loads(self._search_regex( - r'data-edit-callback="/music_reorder" data-initial-values="([^"]+)">', - webpage, 'raw_data').replace('"', '"')) + discography_data = re.findall( + r'
  • ]+>\s*', + webpage, re.MULTILINE) - for element in discography_data: - if element['type'] == 'album': + if len(discography_data) > 0: + for match in discography_data: + element_id = match[0] + element_url = match[1] + if element_url.split('/')[1] == 'album': ie = BandcampAlbumIE.ie_key() else: ie = BandcampIE.ie_key() entries.append(self.url_result( - compat_urlparse.urljoin(url, element['page_url']), + compat_urlparse.urljoin(url, element_url), ie=ie, - video_id=str(element['id']), - video_title=element['title'])) - except RegexNotFoundError: + video_id=element_id, + video_title=element_url.split('/')[2])) + else: # Bandcamp user type 2 page discography_data = re.findall( r']+trackTitle["\'][^"\']+["\']([^"\']+)', webpage) From fbeb7859fecd071bd3a0ca7ee03b6ea3a1c5e2e2 Mon Sep 17 00:00:00 2001 From: Lyz Date: Tue, 7 Apr 2020 21:27:53 +0200 Subject: [PATCH 40/44] Fix merge request changes --- youtube_dl/extractor/bandcamp.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index 4801cd319..773ebe283 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -1,6 +1,5 @@ from __future__ import unicode_literals -import json import random import re import time @@ -16,7 +15,6 @@ from ..utils import ( int_or_none, KNOWN_EXTENSIONS, parse_filesize, - RegexNotFoundError, str_or_none, try_get, unescapeHTML, @@ -40,7 +38,7 @@ class BandcampIE(InfoExtractor): }, '_skip': 'There is a limit of 200 free downloads / month for the test song' }, { - # free download + # free download 'url': 'http://benprunty.bandcamp.com/track/lanius-battle', 'md5': '853e35bf34aa1d6fe2615ae612564b36', 'info_dict': { @@ -338,6 +336,7 @@ class BandcampWeeklyIE(InfoExtractor): 'series': 'Bandcamp Weekly', 'episode': 'Magic Moments', 'episode_number': 208, + 'episode_id': '224', } }, { 'url': 'https://bandcamp.com/?blah/blah@&show=228', @@ -404,6 +403,7 @@ class BandcampWeeklyIE(InfoExtractor): 'release_date': unified_strdate(show.get('published_date')), 'series': 'Bandcamp Weekly', 'episode': show.get('subtitle'), + 'episode_id': compat_str(video_id), 'episode_number': episode_number, 'formats': formats } From 8a01c503abdf12de1b35eda1db7caabc45baac8c Mon Sep 17 00:00:00 2001 From: Lyz Date: Tue, 7 Apr 2020 21:28:53 +0200 Subject: [PATCH 41/44] Change episode_id possition --- youtube_dl/extractor/bandcamp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index 773ebe283..102339334 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -403,8 +403,8 @@ class BandcampWeeklyIE(InfoExtractor): 'release_date': unified_strdate(show.get('published_date')), 'series': 'Bandcamp Weekly', 'episode': show.get('subtitle'), - 'episode_id': compat_str(video_id), 'episode_number': episode_number, + 'episode_id': compat_str(video_id), 'formats': formats } From f43cf3a3d31af98eb6ec68501b2e4e4e7a5934af Mon Sep 17 00:00:00 2001 From: Lyz Date: Fri, 8 Mar 2019 13:03:44 +0100 Subject: [PATCH 42/44] Revert trailing whitespace --- youtube_dl/extractor/extractors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index f106f0d02..531a11f15 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1410,7 +1410,7 @@ from .webofstories import ( WebOfStoriesPlaylistIE, ) from .weibo import ( - WeiboIE, + WeiboIE, WeiboMobileIE ) from .weiqitv import WeiqiTVIE From 85e170e542128840fd8f104de2bfbc1bdc15d38a Mon Sep 17 00:00:00 2001 From: Lyz Date: Fri, 25 Oct 2019 14:17:47 +0200 Subject: [PATCH 43/44] WIP: update mr --- youtube_dl/extractor/bandcamp.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index 102339334..2a4291715 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -115,12 +115,15 @@ class BandcampIE(InfoExtractor): track_number = int_or_none(track_info.get('track_num')) duration = float_or_none(track_info.get('duration')) + # r'\b%s\s*["\']?\s*:\s*(["\'])(?P(?:(?!\1).)+)\1' % key, def extract(key): return self._search_regex( - r'\b%s\s*["\']?\s*:\s*(["\'])(?P(?:(?!\1).)+)\1' % key, + r'\b%s\s*["\']?\s*:\s*(["\'])(?P.+)\1' % key, webpage, key, default=None, group='value') + import pdb; pdb.set_trace() # XXX BREAKPOINT artist = extract('artist') + album = extract('album_title') timestamp = unified_timestamp( extract('publish_date') or extract('album_publish_date')) From 37c5f6b32cbaf694172352871cb7dbb030c37ce4 Mon Sep 17 00:00:00 2001 From: Lyz Date: Mon, 25 May 2020 10:36:32 +0200 Subject: [PATCH 44/44] remove debug tracing --- youtube_dl/extractor/bandcamp.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index 2a4291715..9ea34aa3b 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -121,7 +121,6 @@ class BandcampIE(InfoExtractor): r'\b%s\s*["\']?\s*:\s*(["\'])(?P.+)\1' % key, webpage, key, default=None, group='value') - import pdb; pdb.set_trace() # XXX BREAKPOINT artist = extract('artist') album = extract('album_title')