youtube-dl/youtube_dl/extractor/bannedvideo.py

from __future__ import unicode_literals

import json
from datetime import datetime

from .common import InfoExtractor


class BannedVideoIE(InfoExtractor):
    _GRAPHQL_API = 'https://api.infowarsmedia.com/graphql'
    _GRAPHQL_HEADERS = {
        'Content-Type': 'application/json; charset=utf-8'
    }
    _GRAPHQL_GETVIDEO_QUERY = '''
query GetVideo($id: String!) {
    getVideo(id: $id) {
        ...DisplayVideoFields
        streamUrl
        directUrl
        unlisted
        live
        tags {
            _id
            name
            __typename
        }
        sale {
            _id
            text
            __typename
        }
        __typename
    }
}
fragment DisplayVideoFields on Video {
    _id
    title
    summary
    playCount
    largeImage
    embedUrl
    published
    videoDuration
    channel {
        _id
        title
        avatar
        __typename
    }
    createdAt
    __typename
}'''
    _GRAPHQL_GETCOMMENTS_QUERY = '''
query GetVideoComments($id: String!, $limit: Float, $offset: Float) {
    getVideoComments(id: $id, limit: $limit, offset: $offset) {
        ...VideoComment
        replyCount
        __typename
    }
}
fragment VideoComment on Comment {
    _id
    content
    liked
    user {
        _id
        username
        __typename
    }
    voteCount {
        positive
        __typename
    }
    linkedUser {
        _id
        username
        __typename
    }
    createdAt
    __typename
}'''
    _GRAPHQL_GETCOMMENTSREPLIES_QUERY = '''
query GetCommentReplies($id: String!, $limit: Float, $offset: Float) {
    getCommentReplies(id: $id, limit: $limit, offset: $offset) {
        ...VideoComment
        replyTo {
            _id
            __typename
        }
        __typename
    }
}
fragment VideoComment on Comment {
    _id
    content
    liked
    user {
        _id
        username
        __typename
    }
    voteCount {
        positive
        __typename
    }
    linkedUser {
        _id
        username
        __typename
    }
    createdAt
    __typename
}'''
    _VALID_URL = r'https?://(?:www\.)?banned\.video/watch\?id=(?P<id>[0-f]{24})'
    _TEST = {
        'url': 'https://banned.video/watch?id=5e7a859644e02200c6ef5f11',
        'md5': '14b6e81d41beaaee2215cd75c6ed56e4',
        'info_dict': {
            'id': '5e7a859644e02200c6ef5f11',
            'ext': 'mp4',
            'title': 'China Discovers Origin of Corona Virus: Issues Emergency Statement',
            'thumbnail': r're:^https?://(?:www\.)?assets\.infowarsmedia.com/images/',
            'description': 'The Chinese Communist Party Official Spokesperson At the Ministry of Truth Released Their Statement Exclusively To Alex Jones and Infowars.com',
            'upload_date': '20200324',
            'timestamp': 1585084295.064,
        }
    }

    def _real_extract(self, url):
        video_id = self._match_id(url)
        video_info = self._download_json(
            self._GRAPHQL_API,
            video_id,
            headers=self._GRAPHQL_HEADERS,
            data=json.dumps({
                'variables': {
                    'id': video_id
                },
                'operationName': 'GetVideo',
                'query': self._GRAPHQL_GETVIDEO_QUERY
            }).encode('utf8'),
        ).get('data').get('getVideo')
        video_comments = self._download_json(
            self._GRAPHQL_API,
            video_id,
            headers=self._GRAPHQL_HEADERS,
            data=json.dumps({
                'variables': {
                    'id': video_id
                },
                'operationName': 'GetVideoComments',
                'query': self._GRAPHQL_GETCOMMENTS_QUERY
            }).encode('utf8'),
        ).get('data').get('getVideoComments')
        upload_date = datetime.fromisoformat(video_info.get('createdAt')[:-1])
        metadata = {}
        metadata['id'] = video_id
        metadata['title'] = video_info.get('title')[:-1]
        metadata['description'] = video_info.get('summary')
        metadata['channel'] = video_info.get('channel').get('title')
        metadata['channel_id'] = video_info.get('channel').get('_id')
        metadata['view_count'] = video_info.get('playCount')
        metadata['thumbnail'] = video_info.get('largeImage')
        metadata['duration'] = video_info.get('videoDuration')
        metadata['upload_date'] = upload_date.strftime('%Y%m%d')
        metadata['timestamp'] = upload_date.timestamp()
        tags = []

        for tag in video_info.get('tags'):
            tags.append(tag.get('name'))

        metadata['tags'] = tags

        is_live = video_info.get('live')

        if is_live:
            formats = []
            formats.append(self._extract_m3u8_formats(video_info.get('streamUrl'), video_id, entry_protocol='hls', live=True))
            metadata['formats'] = formats
        else:
            metadata['url'] = video_info.get('directUrl')

        metadata['is_live'] = is_live
        comments = []

        for comment in video_comments:
            comment_date = datetime.fromisoformat(comment.get('createdAt')[:-1])
            comments.append({
                'id': comment.get('_id'),
                'text': comment.get('content'),
                'author': comment.get('user').get('username'),
                'author_id': comment.get('user').get('_id'),
                'timestamp': comment_date.timestamp(),
                'parent': 'root'
            })
            if comment.get('replyCount') > 0:
                replies = self._download_json(
                    self._GRAPHQL_API,
                    video_id,
                    headers=self._GRAPHQL_HEADERS,
                    data=json.dumps({
                        'variables': {
                            'id': comment.get('_id')
                        },
                        'operationName': 'GetCommentReplies',
                        'query': self._GRAPHQL_GETCOMMENTSREPLIES_QUERY
                    }).encode('utf8'),
                ).get('data').get('getCommentReplies')
                for reply in replies:
                    reply_date = datetime.fromisoformat(reply.get('createdAt')[:-1])
                    comments.append({
                        'id': reply.get('_id'),
                        'text': reply.get('content'),
                        'author': reply.get('user').get('username'),
                        'author_id': reply.get('user').get('_id'),
                        'timestamp': reply_date.timestamp(),
                        'parent': comment.get('_id')
                    })
        metadata["comments"] = comments
        return metadata
[bannedvideo] Add new extractor 2020-04-02 04:09:05 +02:00			`from __future__ import unicode_literals`

			`import json`
			`from datetime import datetime`

			`from .common import InfoExtractor`


			`class BannedVideoIE(InfoExtractor):`
			`_GRAPHQL_API = 'https://api.infowarsmedia.com/graphql'`
			`_GRAPHQL_HEADERS = {`
			`'Content-Type': 'application/json; charset=utf-8'`
			`}`
			`_GRAPHQL_GETVIDEO_QUERY = '''`
			`query GetVideo($id: String!) {`
			`getVideo(id: $id) {`
			`...DisplayVideoFields`
			`streamUrl`
			`directUrl`
			`unlisted`
			`live`
			`tags {`
			`_id`
			`name`
			`__typename`
			`}`
			`sale {`
			`_id`
			`text`
			`__typename`
			`}`
			`__typename`
			`}`
			`}`
			`fragment DisplayVideoFields on Video {`
			`_id`
			`title`
			`summary`
			`playCount`
			`largeImage`
			`embedUrl`
			`published`
			`videoDuration`
			`channel {`
			`_id`
			`title`
			`avatar`
			`__typename`
			`}`
			`createdAt`
			`__typename`
			`}'''`
			`_GRAPHQL_GETCOMMENTS_QUERY = '''`
			`query GetVideoComments($id: String!, $limit: Float, $offset: Float) {`
			`getVideoComments(id: $id, limit: $limit, offset: $offset) {`
			`...VideoComment`
			`replyCount`
			`__typename`
			`}`
			`}`
			`fragment VideoComment on Comment {`
			`_id`
			`content`
			`liked`
			`user {`
			`_id`
			`username`
			`__typename`
			`}`
			`voteCount {`
			`positive`
			`__typename`
			`}`
			`linkedUser {`
			`_id`
			`username`
			`__typename`
			`}`
			`createdAt`
			`__typename`
			`}'''`
			`_GRAPHQL_GETCOMMENTSREPLIES_QUERY = '''`
			`query GetCommentReplies($id: String!, $limit: Float, $offset: Float) {`
			`getCommentReplies(id: $id, limit: $limit, offset: $offset) {`
			`...VideoComment`
			`replyTo {`
			`_id`
			`__typename`
			`}`
			`__typename`
			`}`
			`}`
			`fragment VideoComment on Comment {`
			`_id`
			`content`
			`liked`
			`user {`
			`_id`
			`username`
			`__typename`
			`}`
			`voteCount {`
			`positive`
			`__typename`
			`}`
			`linkedUser {`
			`_id`
			`username`
			`__typename`
			`}`
			`createdAt`
			`__typename`
			`}'''`
			`_VALID_URL = r'https?://(?:www\.)?banned\.video/watch\?id=(?P<id>[0-f]{24})'`
			`_TEST = {`
			`'url': 'https://banned.video/watch?id=5e7a859644e02200c6ef5f11',`
			`'md5': '14b6e81d41beaaee2215cd75c6ed56e4',`
			`'info_dict': {`
			`'id': '5e7a859644e02200c6ef5f11',`
			`'ext': 'mp4',`
			`'title': 'China Discovers Origin of Corona Virus: Issues Emergency Statement',`
			`'thumbnail': r're:^https?://(?:www\.)?assets\.infowarsmedia.com/images/',`
			`'description': 'The Chinese Communist Party Official Spokesperson At the Ministry of Truth Released Their Statement Exclusively To Alex Jones and Infowars.com',`
			`'upload_date': '20200324',`
			`'timestamp': 1585084295.064,`
			`}`
			`}`

			`def _real_extract(self, url):`
			`video_id = self._match_id(url)`
			`video_info = self._download_json(`
			`self._GRAPHQL_API,`
			`video_id,`
			`headers=self._GRAPHQL_HEADERS,`
			`data=json.dumps({`
			`'variables': {`
			`'id': video_id`
			`},`
			`'operationName': 'GetVideo',`
			`'query': self._GRAPHQL_GETVIDEO_QUERY`
			`}).encode('utf8'),`
			`).get('data').get('getVideo')`
			`video_comments = self._download_json(`
			`self._GRAPHQL_API,`
			`video_id,`
			`headers=self._GRAPHQL_HEADERS,`
			`data=json.dumps({`
			`'variables': {`
			`'id': video_id`
			`},`
			`'operationName': 'GetVideoComments',`
			`'query': self._GRAPHQL_GETCOMMENTS_QUERY`
			`}).encode('utf8'),`
			`).get('data').get('getVideoComments')`
			`upload_date = datetime.fromisoformat(video_info.get('createdAt')[:-1])`
			`metadata = {}`
			`metadata['id'] = video_id`
			`metadata['title'] = video_info.get('title')[:-1]`
			`metadata['description'] = video_info.get('summary')`
			`metadata['channel'] = video_info.get('channel').get('title')`
			`metadata['channel_id'] = video_info.get('channel').get('_id')`
			`metadata['view_count'] = video_info.get('playCount')`
			`metadata['thumbnail'] = video_info.get('largeImage')`
			`metadata['duration'] = video_info.get('videoDuration')`
			`metadata['upload_date'] = upload_date.strftime('%Y%m%d')`
			`metadata['timestamp'] = upload_date.timestamp()`
			`tags = []`

			`for tag in video_info.get('tags'):`
			`tags.append(tag.get('name'))`

			`metadata['tags'] = tags`

			`is_live = video_info.get('live')`

			`if is_live:`
			`formats = []`
			`formats.append(self._extract_m3u8_formats(video_info.get('streamUrl'), video_id, entry_protocol='hls', live=True))`
			`metadata['formats'] = formats`
			`else:`
			`metadata['url'] = video_info.get('directUrl')`

			`metadata['is_live'] = is_live`
			`comments = []`

			`for comment in video_comments:`
			`comment_date = datetime.fromisoformat(comment.get('createdAt')[:-1])`
			`comments.append({`
			`'id': comment.get('_id'),`
			`'text': comment.get('content'),`
			`'author': comment.get('user').get('username'),`
			`'author_id': comment.get('user').get('_id'),`
			`'timestamp': comment_date.timestamp(),`
			`'parent': 'root'`
			`})`
			`if comment.get('replyCount') > 0:`
			`replies = self._download_json(`
			`self._GRAPHQL_API,`
			`video_id,`
			`headers=self._GRAPHQL_HEADERS,`
			`data=json.dumps({`
			`'variables': {`
			`'id': comment.get('_id')`
			`},`
			`'operationName': 'GetCommentReplies',`
			`'query': self._GRAPHQL_GETCOMMENTSREPLIES_QUERY`
			`}).encode('utf8'),`
			`).get('data').get('getCommentReplies')`
			`for reply in replies:`
			`reply_date = datetime.fromisoformat(reply.get('createdAt')[:-1])`
			`comments.append({`
			`'id': reply.get('_id'),`
			`'text': reply.get('content'),`
			`'author': reply.get('user').get('username'),`
			`'author_id': reply.get('user').get('_id'),`
			`'timestamp': reply_date.timestamp(),`
			`'parent': comment.get('_id')`
			`})`
			`metadata["comments"] = comments`
			`return metadata`