1
0
mirror of https://codeberg.org/polarisfm/youtube-dl synced 2024-11-25 18:04:33 +01:00

* Rewrite extraction logic

* Fix errors
This commit is contained in:
Lyz 2019-10-25 17:09:27 +02:00
parent 9cb8ca072d
commit a1a0490928
No known key found for this signature in database
GPG Key ID: 6C7D7C1612CDE02F

View File

@ -1,5 +1,6 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import json
import random import random
import re import re
import time import time
@ -15,6 +16,7 @@ from ..utils import (
int_or_none, int_or_none,
KNOWN_EXTENSIONS, KNOWN_EXTENSIONS,
parse_filesize, parse_filesize,
RegexNotFoundError,
str_or_none, str_or_none,
try_get, try_get,
unescapeHTML, unescapeHTML,
@ -22,7 +24,6 @@ from ..utils import (
unified_strdate, unified_strdate,
unified_timestamp, unified_timestamp,
url_or_none, url_or_none,
RegexNotFoundError,
) )
@ -337,7 +338,6 @@ class BandcampWeeklyIE(InfoExtractor):
'series': 'Bandcamp Weekly', 'series': 'Bandcamp Weekly',
'episode': 'Magic Moments', 'episode': 'Magic Moments',
'episode_number': 208, 'episode_number': 208,
'episode_id': '224',
} }
}, { }, {
'url': 'https://bandcamp.com/?blah/blah@&show=228', 'url': 'https://bandcamp.com/?blah/blah@&show=228',
@ -405,36 +405,44 @@ class BandcampWeeklyIE(InfoExtractor):
'series': 'Bandcamp Weekly', 'series': 'Bandcamp Weekly',
'episode': show.get('subtitle'), 'episode': show.get('subtitle'),
'episode_number': episode_number, 'episode_number': episode_number,
'episode_id': compat_str(video_id),
'formats': formats 'formats': formats
} }
class BandcampUserIE(InfoExtractor): class BandcampUserIE(InfoExtractor):
IE_NAME = 'Bandcamp:user' IE_NAME = 'Bandcamp:user'
_VALID_URL = r'https?://(?:(?P<id>[^.]+)\.)?bandcamp\.com/?' _VALID_URL = r'https?://(?:(?P<id>[^.]+)\.)?bandcamp\.com'
_TESTS = [{ _TESTS = [{
'url': 'https://adrianvonziegler.bandcamp.com', 'url': 'https://adrianvonziegler.bandcamp.com',
'info_dict': { 'info_dict': {
'id': 'adrianvonziegler', 'id': 'adrianvonziegler',
'title': 'Albums of adrianvonziegler', 'title': 'Discography of adrianvonziegler',
}, },
'playlist_mincount': 20, 'playlist_mincount': 22,
}, { }, {
'url': 'http://dotscale.bandcamp.com', 'url': 'http://dotscale.bandcamp.com',
'info_dict': { 'info_dict': {
'id': 'dotscale', 'id': 'dotscale',
'title': 'Albums of dotscale', 'title': 'Discography of dotscale',
}, },
'playlist_count': 1, 'playlist_count': 1,
}] }, {
'url': 'https://nightcallofficial.bandcamp.com',
'info_dict': {
'id': 'nightcallofficial',
'title': 'Discography of nightcallofficial',
},
'playlist_count': 4,
},
]
@classmethod @classmethod
def suitable(cls, url): def suitable(cls, url):
return (False return (False if BandcampAlbumIE.suitable(url)
if BandcampAlbumIE.suitable(url) or BandcampIE.suitable(url) or or BandcampIE.suitable(url)
BandcampWeeklyIE.suitable(url) or BandcampWeeklyIE.suitable(url)
else super(BandcampUserIE, cls).suitable(url)) else super(BandcampUserIE, cls).suitable(url))
def _real_extract(self, url): def _real_extract(self, url):
@ -442,21 +450,38 @@ class BandcampUserIE(InfoExtractor):
webpage = self._download_webpage(url, uploader) webpage = self._download_webpage(url, uploader)
album_elements = re.findall(r'<a href="/(album/.[^"]+)">', webpage) # Bandcamp User type 1 page
try:
discography_data = json.loads(self._search_regex(
r'data-edit-callback="/music_reorder" data-initial-values="([^"]+)">',
webpage, 'raw_data').replace('&quot;', '"'))
entries = [ entries = [
self.url_result( self.url_result(
compat_urlparse.urljoin(url, album_id), compat_urlparse.urljoin(url, element['page_url']),
ie=BandcampAlbumIE.ie_key(), ie=BandcampAlbumIE.ie_key(),
video_id='%s-%s' % (uploader, album_id), video_id=element['id'],
video_title=album_id, video_title=element['title'],
) )
for album_id in album_elements for element in discography_data
]
except RegexNotFoundError:
# Bandcamp user type 2 page
discography_data = re.findall(
r'<div[^>]+trackTitle["\'][^"\']+["\']([^"\']+)', webpage)
entries = [
self.url_result(
compat_urlparse.urljoin(url, element),
ie=BandcampAlbumIE.ie_key(),
video_title=element,
)
for element in discography_data
] ]
return { return {
'_type': 'playlist', '_type': 'playlist',
'id': uploader, 'id': uploader,
'title': 'Albums of %s' % (uploader), 'title': 'Discography of %s' % uploader,
'entries': entries, 'entries': entries,
} }