1
0
mirror of https://codeberg.org/polarisfm/youtube-dl synced 2024-11-26 02:14:32 +01:00

mastodon extractor, closes #18226 #25886

This commit is contained in:
selfisekai 2020-07-23 10:34:09 +02:00
parent a115e07594
commit 0b3b53128a
2 changed files with 112 additions and 0 deletions

View File

@ -601,6 +601,7 @@ from .markiza import (
MarkizaPageIE, MarkizaPageIE,
) )
from .massengeschmacktv import MassengeschmackTVIE from .massengeschmacktv import MassengeschmackTVIE
from .mastodon import MastodonIE
from .matchtv import MatchTVIE from .matchtv import MatchTVIE
from .mdr import MDRIE from .mdr import MDRIE
from .mediaset import MediasetIE from .mediaset import MediasetIE

View File

@ -0,0 +1,111 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import (
InfoExtractor,
ExtractorError,
)
from ..utils import (
clean_html,
str_or_none,
)
import re
# this infoextractor is for services implementing the Mastodon API, not just Mastodon
# supported services (possibly more already work or could):
# - Mastodon - https://github.com/tootsuite/mastodon
# - Glitch (a fork of Mastodon) - https://github.com/glitch-soc/mastodon
# - Pleroma - https://git.pleroma.social/pleroma/pleroma
# - Gab Social (a fork of Mastodon) - https://code.gab.com/gab/social/gab-social/
class MastodonIE(InfoExtractor):
IE_NAME = 'mastodon'
_VALID_URL = r'https?://(?P<host>[^/\s]+)(?<!facebook\.com)/(?:(?:@[a-zA-Z0-9_]+|[a-zA-Z0-9_]+/posts|users/[a-zA-Z0-9_]+/statuses)|notice|objects)/(?P<id>[0-9a-zA-Z-]+)'
_TESTS = [{
# mastodon, video description
"url": "https://mastodon.technology/@BadAtNames/104254332187004304",
"info_dict": {
"id": "104254332187004304",
"title": "BadAtNames - Mfw trump supporters complain about twitter",
"ext": "mp4",
"description": "md5:53f4428d4dc7e25a8255cf2a08488f2e",
},
}, {
# pleroma, /objects/ redirect, empty content
"url": "https://fedi.valkyrie.world/objects/386d2d68-090f-492e-81bd-8d32a3a65627",
"info_dict": {
"id": "9xLMO1BcEEbaM54LBI",
"title": "VD-15 - ",
"ext": "mp4",
"description": "video0_4_1.mp4",
},
}, {
# pleroma, multiple videos in single post (can't define tests for _type multi_video)
"url": "https://donotsta.re/notice/9xN1v6yM7WhzE7aIIC",
"only_matching": True,
}, {
# gab social
"url": "https://gab.com/ACT1TV/posts/104450493441154721",
"info_dict": {
"id": "104450493441154721",
"title": "Bill Blaze - He shoots, he scores and the crowd went wild.... #Animal #Sports",
"ext": "mp4",
},
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
host, id = mobj.group('host', 'id')
if '/objects/' in url:
page = self._download_webpage(url, '%s@%s' % (id, host), expected_status=302)
real_url = self._og_search_property('url', page, default=None)
if real_url:
return {
"_type": "url",
"ie_key": "Mastodon",
"url": real_url,
}
metadata = self._download_json('https://%s/api/v1/statuses/%s' % (host, id), '%s@%s' % (id, host))
if not metadata['media_attachments']:
raise ExtractorError('No attached medias')
medias = []
for media in metadata['media_attachments']:
if media['type'] == 'video':
medias.append(media)
title = '%s - %s' % (str_or_none(metadata['account']['display_name'] or metadata['account']['acct']), clean_html(str_or_none(metadata['content'])))
if len(medias) == 0:
raise ExtractorError('No audio/video attachments')
elif len(medias) == 1:
media = medias[0]
return {
"id": id,
"title": title,
"description": str_or_none(media['description']),
"url": str_or_none(media['url']),
"thumbnail": str_or_none(media['preview_url']),
}
else:
entries = []
for media in medias:
entries.append({
"id": id,
"title": str_or_none(media['description']) or title,
"url": str_or_none(media['url']),
"thumbnail": str_or_none(media['preview_url']),
})
return {
"_type": "multi_video",
"id": id,
"title": title,
"entries": entries,
}