[narando] Add new extractor

2025-01-08 14:17:54 +01:00 · 2018-11-20 17:16:19 -05:00 · 2018-11-20 17:16:19 -05:00 · 2190b6aaa1
commit 2190b6aaa1
parent 2e1280ed43
2 changed files with 45 additions and 0 deletions
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -1480,3 +1480,6 @@ from .zattoo import (
 from .zdf import ZDFIE, ZDFChannelIE
 from .zingmp3 import ZingMp3IE
 from .zype import ZypeIE
 from .narando import NarandoIE
--- a/youtube_dl/extractor/narando.py
+++ b/youtube_dl/extractor/narando.py
@ -0,0 +1,42 @@
 # coding: utf-8
 from __future__ import unicode_literals
 from .common import InfoExtractor
 class NarandoIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?narando\.com/articles/(?P<id>([a-zA-Z]|-)+)'
    _TEST = {
        'url': 'https://narando.com/articles/an-ihrem-selbstlob-erkennt-man-sie',
        'md5': 'd20f671f0395bab8f8285d1f6e8f965e',
        'info_dict': {
 #            'id': 'b2t4t789kxgy9g7ms4rwjvvw', was being used as id previously, is internal video id
            'id': 'an-ihrem-selbstlob-erkennt-man-sie',
            'ext': 'mp3',
            'title': 'An  ihrem  Selbstlob  erkennt  man  sie',
            'url': 'https://static.narando.com/sounds/10492/original.mp3',
            # TODO more properties, either as:
            # * A value
            # * MD5 checksum; start the string with md5:
            # * A regular expression; start the string with re:
            # * Any Python type (for example int or float)
        }
    }
    def _real_extract(self, url):
        video_id = self._match_id(url)
 #        webpage = self._download_webpage(url,"?")
 #        print(url)
 #        print('https://narando.com/articles/'+video_id)
        webpage = self._download_webpage('https://narando.com/articles/'+video_id+"?", video_id)#for some reason, this absolutely refused to work, so I'm negating the video_id and just adding it directly
        # TODO more code goes here, for example ...
        title = self._html_search_regex(r'<h1 class="visible-xs h3">(.+?)</h1>', webpage, 'title')
 #        print(title)
        player_id = self._html_search_regex(" ".join(r'[\n\r].*https:\/\/narando.com\/r\/\s*([^"]*)'.split()), webpage, 'player_id')
        player_page = self._download_webpage('https://narando.com/widget?r='+player_id+'&',player_id)#same as above
        download_url = self._html_search_regex(r'.<div class="stream_url hide">\s*([^?]*)', player_page, 'mp3_ddl')
        return {
            'id': video_id,
            'title': title,
            'url': download_url,
            # TODO more properties (see youtube_dl/extractor/common.py)
        }