From 89b2983b3f4bd3160b1e8cccd70ee36c30ffe729 Mon Sep 17 00:00:00 2001 From: quinlander Date: Sun, 14 Apr 2019 20:04:08 -0400 Subject: [PATCH] Added changba import and implemented simple changba extractor with single test --- youtube_dl/extractor/changba.py | 51 ++++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + 2 files changed, 52 insertions(+) create mode 100644 youtube_dl/extractor/changba.py diff --git a/youtube_dl/extractor/changba.py b/youtube_dl/extractor/changba.py new file mode 100644 index 000000000..1fbf7968f --- /dev/null +++ b/youtube_dl/extractor/changba.py @@ -0,0 +1,51 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + +import re + +class ChangbaIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?changba\.com/s/(?P[0-9A-Za-z-_]+)' + _TEST = { + 'url': 'https://changba.com/s/0GHVw6vyXv9N2FhaFi2WJg', + 'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)', + 'info_dict': { + 'id': '1152860688', + 'ext': 'mp4', + 'title': 'Video title goes here', + 'thumbnail': r're:^https?://.*\.jpg$', + # TODO more properties, either as: + # * A value + # * MD5 checksum; start the string with md5: + # * A regular expression; start the string with re: + # * Any Python type (for example int or float) + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + id = self._search_regex(r'workid=([0-9]+)', webpage, 'id') + isvideo = self._search_regex(r'&isvideo=([0-9])', webpage, 'isvideo') + title = self._search_regex(r']+class="title"[^>]*>([^<]+)', webpage, 'title') + + if int(isvideo) == 0: + ext = 'mp3' + try: + url = self._search_regex(r'([a-z]+:\/\/[0-9a-z]+\.changba\.com\/[a-z]+\/[a-z]+\/[0-9]+\/[0-9]+\.mp3)', webpage, 'url') + except: + url = "http://lzscuw.changba.com/" + str(id) + ".mp3" + else: + ext = 'mp4' + try: + url = self._search_regex(r'([a-z]+:\/\/[0-9a-z]+\.changba\.com\/[a-z]+\/[a-z]+\/[0-9]+\/[0-9]+\.mp4)', webpage, 'url') + except: + url = "http://lzscuw.changba.com/" + str(id) + ".mp4" + + return { + 'url': url, + 'id': id, + 'ext': ext, + 'title': title + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index cc19af5c4..a68849319 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -185,6 +185,7 @@ from .ceskatelevize import ( CeskaTelevizeIE, CeskaTelevizePoradyIE, ) +from .changba import ChangbaIE from .channel9 import Channel9IE from .charlierose import CharlieRoseIE from .chaturbate import ChaturbateIE