[piapro] Add new extractor

2024-11-22 16:44:32 +01:00 · 2020-07-07 22:31:54 +09:00 · 2020-07-07 22:31:54 +09:00 · cb9569bfae
commit cb9569bfae
parent 07af16b92e
2 changed files with 90 additions and 0 deletions
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -836,6 +836,7 @@ from .periscope import (
 from .philharmoniedeparis import PhilharmonieDeParisIE
 from .phoenix import PhoenixIE
 from .photobucket import PhotobucketIE
+from .piapro import PiaproExtractorIE
 from .picarto import (
    PicartoIE,
    PicartoVodIE,
--- a/youtube_dl/extractor/piapro.py
+++ b/youtube_dl/extractor/piapro.py
@ -0,0 +1,89 @@
+# coding: utf-8
+
+from __future__ import unicode_literals
+from .common import InfoExtractor
+from ..compat import compat_urlparse
+from ..utils import urlencode_postdata, ExtractorError
+
+
+class PiaproExtractorIE(InfoExtractor):
+    _LOGIN_URL = "https://piapro.jp/login/"
+    _LOGIN_REQUIRED = False
+    IE_NAME = "piapro"
+    IE_DESC = "piapro"
+    _NETRC_MACHINE = "piapro"
+    _VALID_URL = r'(https?:\/\/)??piapro\.jp\/t\/(?P<id>\w+)\/?'
+    _TEST = {
+        'url': 'https://piapro.jp/t/NXYR',
+        'md5': 'a9d52f27d13bafab7ee34116a7dcfa77',
+        'info_dict': {
+            'id': 'NXYR',
+            'ext': 'mp3',
+            'title': '裏表ラバーズ',
+            'thumbnail': r're:^https?://.*\.jpg$',
+        }
+    }
+
+    def _real_initialize(self):
+        self._login_status = self._login()
+
+    def _login(self):
+        username, password = self._get_login_info()
+        if not username:
+            return False
+        login_ok = True
+        login_form_strs = {
+            '_username': username,
+            '_password': password,
+            '_remember_me': 'on',
+            'login': 'ログイン'
+        }
+        self._request_webpage(self._LOGIN_URL, None)
+        urlh = self._request_webpage(
+            'https://piapro.jp/login/exe', None,
+            note='Logging in', errnote='Unable to log in',
+            data=urlencode_postdata(login_form_strs))
+        if urlh is False:
+            login_ok = False
+        else:
+            parts = compat_urlparse.urlparse(urlh.geturl())
+            if parts.path != '/':
+                login_ok = False
+        if not login_ok:
+            self._downloader.report_warning(
+                'unable to log in: bad username or password')
+        return login_ok
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+        catId = self._search_regex(r'categoryId=(.+)">', webpage, None)
+        is_music = int(catId) in [1, 2, 21, 22, 23, 24, 25]
+        if not is_music:
+            raise ExtractorError(
+                "It's not music.", expected=True, video_id=video_id)
+        title = self._html_search_regex(
+            r'<h1 class="cd_works-title">(.+?)</h1>', webpage, 'title')
+        uploader = self._search_regex(
+            r'<a class="cd_user-name" href="/.*">([^<]+)さん<', webpage, None)
+        contentId = self._html_search_regex(
+            r'contentId\:\'(.+)\'', webpage, None)
+        createDate = self._html_search_regex(
+            r'createDate\:\'(.+)\'', webpage, None)
+        player_url = "https://piapro.jp/html5_player_popup/?id={cid}&cdate={cdate}".format(
+            cid=contentId, cdate=createDate)
+        player_webpage = self._download_webpage(player_url, "Player")
+        mp3_url = self._html_search_regex(
+            r'mp3: \'(?P<url>.*?)\'\}', player_webpage, 'url', group='url')
+        info = {
+            'id': video_id,
+            'title': title,
+            'uploader': uploader,
+            'formats': [{
+                'format_id': "player_mp3",
+                'url': mp3_url,
+                'ext': "mp3",
+                'vcodec': "none",
+            }]
+        }
+        return info