from __future__ import unicode_literals import json from datetime import datetime from .common import InfoExtractor class BannedVideoIE(InfoExtractor): _GRAPHQL_API = 'https://api.infowarsmedia.com/graphql' _GRAPHQL_HEADERS = { 'Content-Type': 'application/json; charset=utf-8' } _GRAPHQL_GETVIDEO_QUERY = ''' query GetVideo($id: String!) { getVideo(id: $id) { ...DisplayVideoFields streamUrl directUrl unlisted live tags { _id name __typename } sale { _id text __typename } __typename } } fragment DisplayVideoFields on Video { _id title summary playCount largeImage embedUrl published videoDuration channel { _id title avatar __typename } createdAt __typename }''' _GRAPHQL_GETCOMMENTS_QUERY = ''' query GetVideoComments($id: String!, $limit: Float, $offset: Float) { getVideoComments(id: $id, limit: $limit, offset: $offset) { ...VideoComment replyCount __typename } } fragment VideoComment on Comment { _id content liked user { _id username __typename } voteCount { positive __typename } linkedUser { _id username __typename } createdAt __typename }''' _GRAPHQL_GETCOMMENTSREPLIES_QUERY = ''' query GetCommentReplies($id: String!, $limit: Float, $offset: Float) { getCommentReplies(id: $id, limit: $limit, offset: $offset) { ...VideoComment replyTo { _id __typename } __typename } } fragment VideoComment on Comment { _id content liked user { _id username __typename } voteCount { positive __typename } linkedUser { _id username __typename } createdAt __typename }''' _VALID_URL = r'https?://(?:www\.)?banned\.video/watch\?id=(?P[0-f]{24})' _TEST = { 'url': 'https://banned.video/watch?id=5e7a859644e02200c6ef5f11', 'md5': '14b6e81d41beaaee2215cd75c6ed56e4', 'info_dict': { 'id': '5e7a859644e02200c6ef5f11', 'ext': 'mp4', 'title': 'China Discovers Origin of Corona Virus: Issues Emergency Statement', 'thumbnail': r're:^https?://(?:www\.)?assets\.infowarsmedia.com/images/', 'description': 'The Chinese Communist Party Official Spokesperson At the Ministry of Truth Released Their Statement Exclusively To Alex Jones and Infowars.com', 'upload_date': '20200324', 'timestamp': 1585084295.064, } } def _real_extract(self, url): video_id = self._match_id(url) video_info = self._download_json( self._GRAPHQL_API, video_id, headers=self._GRAPHQL_HEADERS, data=json.dumps({ 'variables': { 'id': video_id }, 'operationName': 'GetVideo', 'query': self._GRAPHQL_GETVIDEO_QUERY }).encode('utf8'), ).get('data').get('getVideo') video_comments = self._download_json( self._GRAPHQL_API, video_id, headers=self._GRAPHQL_HEADERS, data=json.dumps({ 'variables': { 'id': video_id }, 'operationName': 'GetVideoComments', 'query': self._GRAPHQL_GETCOMMENTS_QUERY }).encode('utf8'), ).get('data').get('getVideoComments') upload_date = datetime.fromisoformat(video_info.get('createdAt')[:-1]) metadata = {} metadata['id'] = video_id metadata['title'] = video_info.get('title')[:-1] metadata['description'] = video_info.get('summary') metadata['channel'] = video_info.get('channel').get('title') metadata['channel_id'] = video_info.get('channel').get('_id') metadata['view_count'] = video_info.get('playCount') metadata['thumbnail'] = video_info.get('largeImage') metadata['duration'] = video_info.get('videoDuration') metadata['upload_date'] = upload_date.strftime('%Y%m%d') metadata['timestamp'] = upload_date.timestamp() tags = [] for tag in video_info.get('tags'): tags.append(tag.get('name')) metadata['tags'] = tags is_live = video_info.get('live') if is_live: formats = [] formats.append(self._extract_m3u8_formats(video_info.get('streamUrl'), video_id, entry_protocol='hls', live=True)) metadata['formats'] = formats else: metadata['url'] = video_info.get('directUrl') metadata['is_live'] = is_live comments = [] for comment in video_comments: comment_date = datetime.fromisoformat(comment.get('createdAt')[:-1]) comments.append({ 'id': comment.get('_id'), 'text': comment.get('content'), 'author': comment.get('user').get('username'), 'author_id': comment.get('user').get('_id'), 'timestamp': comment_date.timestamp(), 'parent': 'root' }) if comment.get('replyCount') > 0: replies = self._download_json( self._GRAPHQL_API, video_id, headers=self._GRAPHQL_HEADERS, data=json.dumps({ 'variables': { 'id': comment.get('_id') }, 'operationName': 'GetCommentReplies', 'query': self._GRAPHQL_GETCOMMENTSREPLIES_QUERY }).encode('utf8'), ).get('data').get('getCommentReplies') for reply in replies: reply_date = datetime.fromisoformat(reply.get('createdAt')[:-1]) comments.append({ 'id': reply.get('_id'), 'text': reply.get('content'), 'author': reply.get('user').get('username'), 'author_id': reply.get('user').get('_id'), 'timestamp': reply_date.timestamp(), 'parent': comment.get('_id') }) metadata["comments"] = comments return metadata