1
0
mirror of https://codeberg.org/polarisfm/youtube-dl synced 2024-11-26 10:24:33 +01:00

[blogger] Add support for videos and embeds

There is no discernable title field in any of the JSON or URLs,
and the title is mandatory so 'Blogger' is used as a substitute.
This commit is contained in:
Paul Wise 2019-01-08 17:28:37 +08:00
parent 07af16b92e
commit 36251bece0
3 changed files with 86 additions and 0 deletions

View File

@ -0,0 +1,68 @@
# coding: utf-8
from __future__ import unicode_literals
import json
import re
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse_urlparse as urlparse,
compat_parse_qs as qsparse,
float_or_none,
str_or_none,
)
class BloggerIE(InfoExtractor):
IE_NAME = 'blogger.com'
_VALID_URL = r'https?://(?:www\.)?blogger\.com/video\.g\?token=(?P<id>.+)'
_VALID_EMBED = r'''<iframe[^>]+src=["']((?:https?:)?//(?:www\.)?blogger\.com/video\.g\?token=[^"']+)["']'''
_TESTS = [{
'url': 'https://www.blogger.com/video.g?token=AD6v5dzEe9hfcARr5Hlq1WTkYy6t-fXH3BBahVhGvVHe5szdEUBEloSEDSTA8-b111089KbfWuBvTN7fnbxMtymsHhXAXwVvyzHH4Qch2cfLQdGxKQrrEuFpC1amSl_9GuLWODjPgw',
'md5': 'f1bc19b6ea1b0fd1d81e84ca9ec467ac',
'info_dict': {
'id': 'BLOGGER-video-3c740e3a49197e16-796',
'ext': 'mp4',
'title': 'Blogger',
'thumbnail': r're:^https?://.*',
}
}]
@staticmethod
def _extract_url(webpage):
urls = BloggerIE._extract_urls(webpage)
return urls[0] if urls else None
@staticmethod
def _extract_urls(webpage):
return re.findall(BloggerIE._VALID_EMBED, webpage)
def _real_extract(self, url):
token_id = self._match_id(url)
webpage = self._download_webpage(url, token_id)
data_json = self._search_regex(
r'var\s+VIDEO_CONFIG\s*=\s*(\{.*)', webpage, 'JSON data block')
data_json = data_json.encode('utf-8').decode('unicode_escape')
data = json.loads(data_json)
iframe_id = data.get('iframe_id', token_id)
thumbnail = data.get('thumbnail')
streams = data['streams']
formats = [{
'ext':
qsparse(
urlparse(stream['play_url']).query
).get('mime')[0].replace('video/', ''),
'url': stream['play_url'],
'format_id': str_or_none(stream.get('format_id')),
} for stream in streams]
return {
'id': iframe_id,
'title': 'Blogger',
'formats': formats,
'thumbnail': thumbnail,
'duration':
float_or_none(qsparse(
urlparse(streams[0]['play_url']).query
).get('dur')[0]),
}

View File

@ -118,6 +118,7 @@ from .bleacherreport import (
BleacherReportCMSIE, BleacherReportCMSIE,
) )
from .blinkx import BlinkxIE from .blinkx import BlinkxIE
from .blogger import BloggerIE
from .bloomberg import BloombergIE from .bloomberg import BloombergIE
from .bokecc import BokeCCIE from .bokecc import BokeCCIE
from .bostonglobe import BostonGlobeIE from .bostonglobe import BostonGlobeIE

View File

@ -119,6 +119,7 @@ from .expressen import ExpressenIE
from .zype import ZypeIE from .zype import ZypeIE
from .odnoklassniki import OdnoklassnikiIE from .odnoklassniki import OdnoklassnikiIE
from .kinja import KinjaEmbedIE from .kinja import KinjaEmbedIE
from .blogger import BloggerIE
class GenericIE(InfoExtractor): class GenericIE(InfoExtractor):
@ -2151,6 +2152,17 @@ class GenericIE(InfoExtractor):
'skip_download': True, 'skip_download': True,
}, },
}, },
{
# blogger embed
'url': 'https://blog.tomeuvizoso.net/2019/01/a-panfrost-milestone.html',
'md5': 'f1bc19b6ea1b0fd1d81e84ca9ec467ac',
'info_dict': {
'id': 'BLOGGER-video-3c740e3a49197e16-796',
'ext': 'mp4',
'title': 'Blogger',
'thumbnail': r're:^https?://.*',
},
},
# { # {
# # TODO: find another test # # TODO: find another test
# # http://schema.org/VideoObject # # http://schema.org/VideoObject
@ -2943,6 +2955,11 @@ class GenericIE(InfoExtractor):
if onionstudios_url: if onionstudios_url:
return self.url_result(onionstudios_url) return self.url_result(onionstudios_url)
# Look for Blogger embeds
blogger_urls = BloggerIE._extract_urls(webpage)
if blogger_urls:
return self.playlist_from_matches(blogger_urls, video_id, video_title, ie=BloggerIE.ie_key())
# Look for ViewLift embeds # Look for ViewLift embeds
viewlift_url = ViewLiftEmbedIE._extract_url(webpage) viewlift_url = ViewLiftEmbedIE._extract_url(webpage)
if viewlift_url: if viewlift_url: