[narando] improve coding methods to match standards, add support for more URL formats

2025-01-08 14:17:54 +01:00 · 2019-01-20 19:00:43 -05:00 · 2019-01-20 19:00:43 -05:00 · 83809b6ee1
commit 83809b6ee1
parent fc9ea7bd13
2 changed files with 30 additions and 19 deletions
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -674,7 +674,7 @@ from .myvi import (
 from .myvidster import MyVidsterIE
 from .narando import (
    NarandoIE,
-    NarandoPlayerIE,
+    NarandoArticleIE,
 )
 from .nationalgeographic import (
    NationalGeographicVideoIE,
--- a/youtube_dl/extractor/narando.py
+++ b/youtube_dl/extractor/narando.py
@ -4,9 +4,9 @@ from __future__ import unicode_literals
 from .common import InfoExtractor
-class NarandoPlayerIE(InfoExtractor):
+class NarandoIE(InfoExtractor):
-    IE_NAME = "narando:player"
+    IE_NAME = 'narando'
-    _VALID_URL = r'https://narando\.com/widget\?r=(?P<id>\w+)'
+    _VALID_URL = r'https?://narando\.com/widget\?.*?r=(?P<id>\w+)&?'
    _TEST = {
        'url': 'https://narando.com/widget?r=b2t4t789kxgy9g7ms4rwjvvw',
        'md5': 'd20f671f0395bab8f8285d1f6e8f965e',
@ -21,7 +21,7 @@ class NarandoPlayerIE(InfoExtractor):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        title = self._html_search_regex(r'<span class="clip-title">(.+?)</span>', webpage, 'title')
-        download_url = self._html_search_regex(r'.<div class="stream_url hide">\s*([^?]*)', webpage, 'download_url')
+        download_url = self._html_search_regex(r'<div class="stream_url hide">(.+)</div>', webpage, 'download_url')
        return {
            'id': video_id,
            'title': title,
@ -30,29 +30,40 @@ class NarandoPlayerIE(InfoExtractor):
        }
-class NarandoIE(InfoExtractor):
+class NarandoArticleIE(InfoExtractor):
-    IE_NAME = "narando"
+    IE_NAME = "narando:article"
-    _VALID_URL = r'https?://(?:www\.)?narando\.com/articles/(?P<id>.+)'
+    _VALID_URL = r'https?://(?:www\.)?narando\.com/(articles|r)/(?P<id>.+)'
-    _TEST = {
+    _TESTS = [
-        'url': 'https://narando.com/articles/an-ihrem-selbstlob-erkennt-man-sie',
+        {
-        'md5': 'd20f671f0395bab8f8285d1f6e8f965e',
+            'url': 'https://narando.com/articles/an-ihrem-selbstlob-erkennt-man-sie',
-        'info_dict': {
+            'md5': 'd20f671f0395bab8f8285d1f6e8f965e',
-            'id': 'b2t4t789kxgy9g7ms4rwjvvw',
+            'info_dict': {
-            'ext': 'mp3',
+                'id': 'b2t4t789kxgy9g7ms4rwjvvw',
-            'title': 'An  ihrem  Selbstlob  erkennt  man  sie',
+                'ext': 'mp3',
                'title': 'An  ihrem  Selbstlob  erkennt  man  sie',
            }
        },
        {
            'url': 'https://narando.com/r/b2t4t789kxgy9g7ms4rwjvvw', #alternate URL format
            'md5': 'd20f671f0395bab8f8285d1f6e8f965e',
            'info_dict': {
                'id': 'b2t4t789kxgy9g7ms4rwjvvw',
                'ext': 'mp3',
                'title': 'An  ihrem  Selbstlob  erkennt  man  sie',
            }
        }
-    }
+    ]
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        title = self._html_search_regex(r'<h1 class="visible-xs h3">(.+?)</h1>', webpage, 'title')
-        player_id = self._html_search_regex(r'\s*https://narando.com/r/([^"]*)', webpage, 'player_id')
+        player_id = self._html_search_regex(r'https://narando.com/r/(.+?)\"', webpage, 'player_id')
-        player_url = 'https://narando.com/widget?r=' + player_id
+        url_result = 'https://narando.com/widget?r=' + player_id
        return {
            'id': player_id,
            'title': title,
-            'url': player_url,
+            'url': url_result,
            '_type': 'url',
        }