mirror of
https://codeberg.org/polarisfm/youtube-dl
synced 2024-12-24 15:27:55 +01:00
[bannedvideo] Add new extractor
This commit is contained in:
parent
049c0486bb
commit
ca8d548ee3
220
youtube_dl/extractor/bannedvideo.py
Normal file
220
youtube_dl/extractor/bannedvideo.py
Normal file
@ -0,0 +1,220 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
from datetime import datetime
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class BannedVideoIE(InfoExtractor):
|
||||
_GRAPHQL_API = 'https://api.infowarsmedia.com/graphql'
|
||||
_GRAPHQL_HEADERS = {
|
||||
'Content-Type': 'application/json; charset=utf-8'
|
||||
}
|
||||
_GRAPHQL_GETVIDEO_QUERY = '''
|
||||
query GetVideo($id: String!) {
|
||||
getVideo(id: $id) {
|
||||
...DisplayVideoFields
|
||||
streamUrl
|
||||
directUrl
|
||||
unlisted
|
||||
live
|
||||
tags {
|
||||
_id
|
||||
name
|
||||
__typename
|
||||
}
|
||||
sale {
|
||||
_id
|
||||
text
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
}
|
||||
fragment DisplayVideoFields on Video {
|
||||
_id
|
||||
title
|
||||
summary
|
||||
playCount
|
||||
largeImage
|
||||
embedUrl
|
||||
published
|
||||
videoDuration
|
||||
channel {
|
||||
_id
|
||||
title
|
||||
avatar
|
||||
__typename
|
||||
}
|
||||
createdAt
|
||||
__typename
|
||||
}'''
|
||||
_GRAPHQL_GETCOMMENTS_QUERY = '''
|
||||
query GetVideoComments($id: String!, $limit: Float, $offset: Float) {
|
||||
getVideoComments(id: $id, limit: $limit, offset: $offset) {
|
||||
...VideoComment
|
||||
replyCount
|
||||
__typename
|
||||
}
|
||||
}
|
||||
fragment VideoComment on Comment {
|
||||
_id
|
||||
content
|
||||
liked
|
||||
user {
|
||||
_id
|
||||
username
|
||||
__typename
|
||||
}
|
||||
voteCount {
|
||||
positive
|
||||
__typename
|
||||
}
|
||||
linkedUser {
|
||||
_id
|
||||
username
|
||||
__typename
|
||||
}
|
||||
createdAt
|
||||
__typename
|
||||
}'''
|
||||
_GRAPHQL_GETCOMMENTSREPLIES_QUERY = '''
|
||||
query GetCommentReplies($id: String!, $limit: Float, $offset: Float) {
|
||||
getCommentReplies(id: $id, limit: $limit, offset: $offset) {
|
||||
...VideoComment
|
||||
replyTo {
|
||||
_id
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
}
|
||||
fragment VideoComment on Comment {
|
||||
_id
|
||||
content
|
||||
liked
|
||||
user {
|
||||
_id
|
||||
username
|
||||
__typename
|
||||
}
|
||||
voteCount {
|
||||
positive
|
||||
__typename
|
||||
}
|
||||
linkedUser {
|
||||
_id
|
||||
username
|
||||
__typename
|
||||
}
|
||||
createdAt
|
||||
__typename
|
||||
}'''
|
||||
_VALID_URL = r'https?://(?:www\.)?banned\.video/watch\?id=(?P<id>[0-f]{24})'
|
||||
_TEST = {
|
||||
'url': 'https://banned.video/watch?id=5e7a859644e02200c6ef5f11',
|
||||
'md5': '14b6e81d41beaaee2215cd75c6ed56e4',
|
||||
'info_dict': {
|
||||
'id': '5e7a859644e02200c6ef5f11',
|
||||
'ext': 'mp4',
|
||||
'title': 'China Discovers Origin of Corona Virus: Issues Emergency Statement',
|
||||
'thumbnail': r're:^https?://(?:www\.)?assets\.infowarsmedia.com/images/',
|
||||
'description': 'The Chinese Communist Party Official Spokesperson At the Ministry of Truth Released Their Statement Exclusively To Alex Jones and Infowars.com',
|
||||
'upload_date': '20200324',
|
||||
'timestamp': 1585084295.064,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_info = self._download_json(
|
||||
self._GRAPHQL_API,
|
||||
video_id,
|
||||
headers=self._GRAPHQL_HEADERS,
|
||||
data=json.dumps({
|
||||
'variables': {
|
||||
'id': video_id
|
||||
},
|
||||
'operationName': 'GetVideo',
|
||||
'query': self._GRAPHQL_GETVIDEO_QUERY
|
||||
}).encode('utf8'),
|
||||
).get('data').get('getVideo')
|
||||
video_comments = self._download_json(
|
||||
self._GRAPHQL_API,
|
||||
video_id,
|
||||
headers=self._GRAPHQL_HEADERS,
|
||||
data=json.dumps({
|
||||
'variables': {
|
||||
'id': video_id
|
||||
},
|
||||
'operationName': 'GetVideoComments',
|
||||
'query': self._GRAPHQL_GETCOMMENTS_QUERY
|
||||
}).encode('utf8'),
|
||||
).get('data').get('getVideoComments')
|
||||
upload_date = datetime.fromisoformat(video_info.get('createdAt')[:-1])
|
||||
metadata = {}
|
||||
metadata['id'] = video_id
|
||||
metadata['title'] = video_info.get('title')[:-1]
|
||||
metadata['description'] = video_info.get('summary')
|
||||
metadata['channel'] = video_info.get('channel').get('title')
|
||||
metadata['channel_id'] = video_info.get('channel').get('_id')
|
||||
metadata['view_count'] = video_info.get('playCount')
|
||||
metadata['thumbnail'] = video_info.get('largeImage')
|
||||
metadata['duration'] = video_info.get('videoDuration')
|
||||
metadata['upload_date'] = upload_date.strftime('%Y%m%d')
|
||||
metadata['timestamp'] = upload_date.timestamp()
|
||||
tags = []
|
||||
|
||||
for tag in video_info.get('tags'):
|
||||
tags.append(tag.get('name'))
|
||||
|
||||
metadata['tags'] = tags
|
||||
|
||||
is_live = video_info.get('live')
|
||||
|
||||
if is_live:
|
||||
formats = []
|
||||
formats.append(self._extract_m3u8_formats(video_info.get('streamUrl'), video_id, entry_protocol='hls', live=True))
|
||||
metadata['formats'] = formats
|
||||
else:
|
||||
metadata['url'] = video_info.get('directUrl')
|
||||
|
||||
metadata['is_live'] = is_live
|
||||
comments = []
|
||||
|
||||
for comment in video_comments:
|
||||
comment_date = datetime.fromisoformat(comment.get('createdAt')[:-1])
|
||||
comments.append({
|
||||
'id': comment.get('_id'),
|
||||
'text': comment.get('content'),
|
||||
'author': comment.get('user').get('username'),
|
||||
'author_id': comment.get('user').get('_id'),
|
||||
'timestamp': comment_date.timestamp(),
|
||||
'parent': 'root'
|
||||
})
|
||||
if comment.get('replyCount') > 0:
|
||||
replies = self._download_json(
|
||||
self._GRAPHQL_API,
|
||||
video_id,
|
||||
headers=self._GRAPHQL_HEADERS,
|
||||
data=json.dumps({
|
||||
'variables': {
|
||||
'id': comment.get('_id')
|
||||
},
|
||||
'operationName': 'GetCommentReplies',
|
||||
'query': self._GRAPHQL_GETCOMMENTSREPLIES_QUERY
|
||||
}).encode('utf8'),
|
||||
).get('data').get('getCommentReplies')
|
||||
for reply in replies:
|
||||
reply_date = datetime.fromisoformat(reply.get('createdAt')[:-1])
|
||||
comments.append({
|
||||
'id': reply.get('_id'),
|
||||
'text': reply.get('content'),
|
||||
'author': reply.get('user').get('username'),
|
||||
'author_id': reply.get('user').get('_id'),
|
||||
'timestamp': reply_date.timestamp(),
|
||||
'parent': comment.get('_id')
|
||||
})
|
||||
metadata["comments"] = comments
|
||||
return metadata
|
Loading…
Reference in New Issue
Block a user