mirror of
https://codeberg.org/polarisfm/youtube-dl
synced 2024-11-25 18:04:33 +01:00
* Rewrite extraction logic
* Fix errors
This commit is contained in:
parent
9cb8ca072d
commit
a1a0490928
@ -1,5 +1,6 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import json
|
||||||
import random
|
import random
|
||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
@ -15,6 +16,7 @@ from ..utils import (
|
|||||||
int_or_none,
|
int_or_none,
|
||||||
KNOWN_EXTENSIONS,
|
KNOWN_EXTENSIONS,
|
||||||
parse_filesize,
|
parse_filesize,
|
||||||
|
RegexNotFoundError,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
try_get,
|
try_get,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
@ -22,7 +24,6 @@ from ..utils import (
|
|||||||
unified_strdate,
|
unified_strdate,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
RegexNotFoundError,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -337,7 +338,6 @@ class BandcampWeeklyIE(InfoExtractor):
|
|||||||
'series': 'Bandcamp Weekly',
|
'series': 'Bandcamp Weekly',
|
||||||
'episode': 'Magic Moments',
|
'episode': 'Magic Moments',
|
||||||
'episode_number': 208,
|
'episode_number': 208,
|
||||||
'episode_id': '224',
|
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://bandcamp.com/?blah/blah@&show=228',
|
'url': 'https://bandcamp.com/?blah/blah@&show=228',
|
||||||
@ -405,36 +405,44 @@ class BandcampWeeklyIE(InfoExtractor):
|
|||||||
'series': 'Bandcamp Weekly',
|
'series': 'Bandcamp Weekly',
|
||||||
'episode': show.get('subtitle'),
|
'episode': show.get('subtitle'),
|
||||||
'episode_number': episode_number,
|
'episode_number': episode_number,
|
||||||
'episode_id': compat_str(video_id),
|
|
||||||
'formats': formats
|
'formats': formats
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class BandcampUserIE(InfoExtractor):
|
class BandcampUserIE(InfoExtractor):
|
||||||
IE_NAME = 'Bandcamp:user'
|
IE_NAME = 'Bandcamp:user'
|
||||||
_VALID_URL = r'https?://(?:(?P<id>[^.]+)\.)?bandcamp\.com/?'
|
_VALID_URL = r'https?://(?:(?P<id>[^.]+)\.)?bandcamp\.com'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://adrianvonziegler.bandcamp.com',
|
'url': 'https://adrianvonziegler.bandcamp.com',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'adrianvonziegler',
|
'id': 'adrianvonziegler',
|
||||||
'title': 'Albums of adrianvonziegler',
|
'title': 'Discography of adrianvonziegler',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 20,
|
'playlist_mincount': 22,
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://dotscale.bandcamp.com',
|
'url': 'http://dotscale.bandcamp.com',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'dotscale',
|
'id': 'dotscale',
|
||||||
'title': 'Albums of dotscale',
|
'title': 'Discography of dotscale',
|
||||||
},
|
},
|
||||||
'playlist_count': 1,
|
'playlist_count': 1,
|
||||||
}]
|
}, {
|
||||||
|
'url': 'https://nightcallofficial.bandcamp.com',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'nightcallofficial',
|
||||||
|
'title': 'Discography of nightcallofficial',
|
||||||
|
},
|
||||||
|
'playlist_count': 4,
|
||||||
|
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def suitable(cls, url):
|
def suitable(cls, url):
|
||||||
return (False
|
return (False if BandcampAlbumIE.suitable(url)
|
||||||
if BandcampAlbumIE.suitable(url) or BandcampIE.suitable(url) or
|
or BandcampIE.suitable(url)
|
||||||
BandcampWeeklyIE.suitable(url)
|
or BandcampWeeklyIE.suitable(url)
|
||||||
else super(BandcampUserIE, cls).suitable(url))
|
else super(BandcampUserIE, cls).suitable(url))
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@ -442,21 +450,38 @@ class BandcampUserIE(InfoExtractor):
|
|||||||
|
|
||||||
webpage = self._download_webpage(url, uploader)
|
webpage = self._download_webpage(url, uploader)
|
||||||
|
|
||||||
album_elements = re.findall(r'<a href="/(album/.[^"]+)">', webpage)
|
# Bandcamp User type 1 page
|
||||||
|
try:
|
||||||
|
discography_data = json.loads(self._search_regex(
|
||||||
|
r'data-edit-callback="/music_reorder" data-initial-values="([^"]+)">',
|
||||||
|
webpage, 'raw_data').replace('"', '"'))
|
||||||
|
|
||||||
entries = [
|
entries = [
|
||||||
self.url_result(
|
self.url_result(
|
||||||
compat_urlparse.urljoin(url, album_id),
|
compat_urlparse.urljoin(url, element['page_url']),
|
||||||
ie=BandcampAlbumIE.ie_key(),
|
ie=BandcampAlbumIE.ie_key(),
|
||||||
video_id='%s-%s' % (uploader, album_id),
|
video_id=element['id'],
|
||||||
video_title=album_id,
|
video_title=element['title'],
|
||||||
)
|
)
|
||||||
for album_id in album_elements
|
for element in discography_data
|
||||||
]
|
]
|
||||||
|
except RegexNotFoundError:
|
||||||
|
# Bandcamp user type 2 page
|
||||||
|
discography_data = re.findall(
|
||||||
|
r'<div[^>]+trackTitle["\'][^"\']+["\']([^"\']+)', webpage)
|
||||||
|
|
||||||
|
entries = [
|
||||||
|
self.url_result(
|
||||||
|
compat_urlparse.urljoin(url, element),
|
||||||
|
ie=BandcampAlbumIE.ie_key(),
|
||||||
|
video_title=element,
|
||||||
|
)
|
||||||
|
for element in discography_data
|
||||||
|
]
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'_type': 'playlist',
|
'_type': 'playlist',
|
||||||
'id': uploader,
|
'id': uploader,
|
||||||
'title': 'Albums of %s' % (uploader),
|
'title': 'Discography of %s' % uploader,
|
||||||
'entries': entries,
|
'entries': entries,
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user