[narando] seperate [narando:player] extractor, improve code readability

2025-01-07 13:47:54 +01:00 · 2018-11-20 22:16:41 -05:00 · 2018-11-20 22:16:41 -05:00 · 92ae267c88
commit 92ae267c88
parent d33506b6d7
2 changed files with 36 additions and 14 deletions
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -1482,4 +1482,4 @@ from .zingmp3 import ZingMp3IE
 from .zype import ZypeIE


-from .narando import NarandoIE
+from .narando import NarandoIE, NarandoPlayerIE
--- a/youtube_dl/extractor/narando.py
+++ b/youtube_dl/extractor/narando.py
@ -5,6 +5,7 @@ from .common import InfoExtractor


 class NarandoIE(InfoExtractor):
+    IE_NAME = "narando"
    _VALID_URL = r'https?://(?:www\.)?narando\.com/articles/(?P<id>([a-zA-Z]|-)+)'
    _TEST = {
        'url': 'https://narando.com/articles/an-ihrem-selbstlob-erkennt-man-sie',
@ -15,31 +16,52 @@ class NarandoIE(InfoExtractor):
            'title': 'An  ihrem  Selbstlob  erkennt  man  sie',
            'url': 'https://static.narando.com/sounds/10492/original.mp3',
            'description': u'omnisophie.com: Kaum  eine  Woche  vergeht,  dass  nicht  jemand  mir  gegenüber  seine  Mathematik-Unkenntnisse  tränenlos  beweint.  „In  Mathe  war  ich  niemals  gut.“  Diese  Leute  sagen  mir  das  wohl,  weil  ich  Mathematiker  bin,  und  da  gehört  so  ein  fröhliches „Understatement“  zum  Small  Talk.  So  wie  wenn  ich  selbst  bedauernd-entschuldigend  auf  meine  grauen  Haare  zeige.  Ich  kann  eben  auch  nicht  alles  bieten...  „Mathe  kann  ich  nicht“,  „Ich  habe  kein  Internet“  oder  „Ich  will  auch  bewusst  nicht  alles  können“  wird  fast  wie  Eigenlob  vorgetragen.',
-            # TODO more properties, either as:
-            # * A value
-            # * MD5 checksum; start the string with md5:
-            # * A regular expression; start the string with re:
-            # * Any Python type (for example int or float)
        }
    }

    def _real_extract(self, url):
        video_id = self._match_id(url)
-#        webpage = self._download_webpage(url,"?")
-#        print(url)
-#        print('https://narando.com/articles/'+video_id)
+
        webpage = self._download_webpage('https://narando.com/articles/' + video_id, video_id)
-        # TODO more code goes here, for example ...
+
        title = self._html_search_regex(r'<h1 class="visible-xs h3">(.+?)</h1>', webpage, 'title')
-#        print(title)
+
        player_id = self._html_search_regex(r'[\n\r].*https:\/\/narando.com\/r\/\s*([^"]*)', webpage, 'player_id')
-        player_page = self._download_webpage('https://narando.com/widget?r=' + player_id, player_id)
-        download_url = self._html_search_regex(r'.<div class="stream_url hide">\s*([^?]*)', player_page, 'download_url')
+        mobj = NarandoPlayerIE()
+        download_url = mobj._real_extract("https://narando.com/widget?r=" + player_id)['url']
        description = self._html_search_regex(r'<meta content="(.+?)" property="og:description" />', webpage, 'description')
        return {
            'id': video_id,
            'title': title,
            'url': download_url,
            'description': description,
-            # TODO more properties (see youtube_dl/extractor/common.py)
+        }
+
+
+class NarandoPlayerIE(InfoExtractor):
+    IE_NAME = "narando:player"
+    _VALID_URL = r'https://narando.com/widget\?r=(?P<id>\w+)'
+    _TEST = {
+        'url': 'https://narando.com/widget?r=b2t4t789kxgy9g7ms4rwjvvw',
+        'md5': 'd20f671f0395bab8f8285d1f6e8f965e',
+        'info_dict': {
+            'id': 'b2t4t789kxgy9g7ms4rwjvvw',
+            'ext': 'mp3',
+            'title': 'An  ihrem  Selbstlob  erkennt  man  sie',
+            'url': 'https://static.narando.com/sounds/10492/original.mp3',
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        print(video_id)
+        webpage = self._download_webpage('https://narando.com/widget?r=' + video_id, video_id)
+        print(webpage)
+        title = self._html_search_regex(r'<title>narando \| (.+?)</title>', webpage, 'title')
+
+        download_url = self._html_search_regex(r'.<div class="stream_url hide">\s*([^?]*)', webpage, 'download_url')
+        return {
+            'id': video_id,
+            'title': title,
+            'url': download_url,
        }