1
0
mirror of https://codeberg.org/polarisfm/youtube-dl synced 2024-11-29 19:47:54 +01:00

[Lumni] Add new extractor

This commit is contained in:
Surkal 2020-09-06 18:43:14 +02:00
parent d51e23d9fc
commit 68b5101b48
2 changed files with 75 additions and 0 deletions

View File

@ -580,6 +580,7 @@ from .lnkgo import LnkGoIE
from .localnews8 import LocalNews8IE from .localnews8 import LocalNews8IE
from .lovehomeporn import LoveHomePornIE from .lovehomeporn import LoveHomePornIE
from .lrt import LRTIE from .lrt import LRTIE
from .lumni import LumniIE, LumniPlaylistIE
from .lynda import ( from .lynda import (
LyndaIE, LyndaIE,
LyndaCourseIE LyndaCourseIE

View File

@ -0,0 +1,74 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from .francetv import FranceTVIE
from ..utils import orderedSet
class LumniIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?lumni\.fr/video/(?P<id>[0-9a-z-]+)'
_TEST = {
'url': 'https://www.lumni.fr/video/la-guerre-froide',
'md5': '31158a5b300083ba373f4fc85dd88272',
'info_dict': {
'id': '302dbf40-b0df-4847-926b-99fdf4f10162',
'ext': 'mp4',
'timestamp': 1585754978,
'upload_date': '20200401',
'title': 'La guerre froide (1er avril)',
}
}
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
video_id = self._search_regex(
r'data-factoryid="([^"]+)',
webpage, 'video id')
full_id = 'francetv:%s' % video_id
return self.url_result(full_id,
ie=FranceTVIE.ie_key(),
video_id=video_id)
class LumniPlaylistIE(InfoExtractor):
_VALID_URL = r'''https?://
(?:www\.)?lumni\.fr/
(?:dossier|programme|serie)/
(?P<id>[0-9a-z-]+)
'''
_TESTS = [{
'url': 'https://www.lumni.fr/dossier/les-fondamentaux-vocabulaire',
'info_dict': {
'id': 'les-fondamentaux-vocabulaire',
'title': 'Les Fondamentaux : Vocabulaire',
},
'playlist_mincount': 39
}, {
'url': 'https://www.lumni.fr/programme/the-rich-morning-show',
'only_matching': True
}, {
'url': 'https://www.lumni.fr/serie/la-maison-lumni-college',
'only_matching': True
}
]
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
entries = [self.url_result(
'https://lumni.fr/video/%s' % video_id,
ie=LumniIE.ie_key(), video_id=video_id)
for video_id in orderedSet(re.findall(
r'<a[^>]+\bhref=["\']/video/([0-9a-z-]+)', webpage))]
playlist_title = self._og_search_title(webpage)
return self.playlist_result(entries, playlist_id, playlist_title)