From cb9569bfae9b2f80b43e90a1c36d3e4d08d3c975 Mon Sep 17 00:00:00 2001 From: kazukazuprogram Date: Tue, 7 Jul 2020 22:31:54 +0900 Subject: [PATCH] [piapro] Add new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/piapro.py | 89 ++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+) create mode 100644 youtube_dl/extractor/piapro.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 4b3092028..8d3e598c0 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -836,6 +836,7 @@ from .periscope import ( from .philharmoniedeparis import PhilharmonieDeParisIE from .phoenix import PhoenixIE from .photobucket import PhotobucketIE +from .piapro import PiaproExtractorIE from .picarto import ( PicartoIE, PicartoVodIE, diff --git a/youtube_dl/extractor/piapro.py b/youtube_dl/extractor/piapro.py new file mode 100644 index 000000000..b539297de --- /dev/null +++ b/youtube_dl/extractor/piapro.py @@ -0,0 +1,89 @@ +# coding: utf-8 + +from __future__ import unicode_literals +from .common import InfoExtractor +from ..compat import compat_urlparse +from ..utils import urlencode_postdata, ExtractorError + + +class PiaproExtractorIE(InfoExtractor): + _LOGIN_URL = "https://piapro.jp/login/" + _LOGIN_REQUIRED = False + IE_NAME = "piapro" + IE_DESC = "piapro" + _NETRC_MACHINE = "piapro" + _VALID_URL = r'(https?:\/\/)??piapro\.jp\/t\/(?P\w+)\/?' + _TEST = { + 'url': 'https://piapro.jp/t/NXYR', + 'md5': 'a9d52f27d13bafab7ee34116a7dcfa77', + 'info_dict': { + 'id': 'NXYR', + 'ext': 'mp3', + 'title': '裏表ラバーズ', + 'thumbnail': r're:^https?://.*\.jpg$', + } + } + + def _real_initialize(self): + self._login_status = self._login() + + def _login(self): + username, password = self._get_login_info() + if not username: + return False + login_ok = True + login_form_strs = { + '_username': username, + '_password': password, + '_remember_me': 'on', + 'login': 'ログイン' + } + self._request_webpage(self._LOGIN_URL, None) + urlh = self._request_webpage( + 'https://piapro.jp/login/exe', None, + note='Logging in', errnote='Unable to log in', + data=urlencode_postdata(login_form_strs)) + if urlh is False: + login_ok = False + else: + parts = compat_urlparse.urlparse(urlh.geturl()) + if parts.path != '/': + login_ok = False + if not login_ok: + self._downloader.report_warning( + 'unable to log in: bad username or password') + return login_ok + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + catId = self._search_regex(r'categoryId=(.+)">', webpage, None) + is_music = int(catId) in [1, 2, 21, 22, 23, 24, 25] + if not is_music: + raise ExtractorError( + "It's not music.", expected=True, video_id=video_id) + title = self._html_search_regex( + r'

(.+?)

', webpage, 'title') + uploader = self._search_regex( + r'([^<]+)さん<', webpage, None) + contentId = self._html_search_regex( + r'contentId\:\'(.+)\'', webpage, None) + createDate = self._html_search_regex( + r'createDate\:\'(.+)\'', webpage, None) + player_url = "https://piapro.jp/html5_player_popup/?id={cid}&cdate={cdate}".format( + cid=contentId, cdate=createDate) + player_webpage = self._download_webpage(player_url, "Player") + mp3_url = self._html_search_regex( + r'mp3: \'(?P.*?)\'\}', player_webpage, 'url', group='url') + info = { + 'id': video_id, + 'title': title, + 'uploader': uploader, + 'formats': [{ + 'format_id': "player_mp3", + 'url': mp3_url, + 'ext': "mp3", + 'vcodec': "none", + }] + } + return info