From 68b5101b487159e1e59ced60ab389654a592e95d Mon Sep 17 00:00:00 2001 From: Surkal <> Date: Sun, 6 Sep 2020 18:43:14 +0200 Subject: [PATCH 1/3] [Lumni] Add new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/lumni.py | 74 ++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+) create mode 100644 youtube_dl/extractor/lumni.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 9564465a0..420f1be40 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -580,6 +580,7 @@ from .lnkgo import LnkGoIE from .localnews8 import LocalNews8IE from .lovehomeporn import LoveHomePornIE from .lrt import LRTIE +from .lumni import LumniIE, LumniPlaylistIE from .lynda import ( LyndaIE, LyndaCourseIE diff --git a/youtube_dl/extractor/lumni.py b/youtube_dl/extractor/lumni.py new file mode 100644 index 000000000..748306e59 --- /dev/null +++ b/youtube_dl/extractor/lumni.py @@ -0,0 +1,74 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from .francetv import FranceTVIE +from ..utils import orderedSet + + +class LumniIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?lumni\.fr/video/(?P[0-9a-z-]+)' + _TEST = { + 'url': 'https://www.lumni.fr/video/la-guerre-froide', + 'md5': '31158a5b300083ba373f4fc85dd88272', + 'info_dict': { + 'id': '302dbf40-b0df-4847-926b-99fdf4f10162', + 'ext': 'mp4', + 'timestamp': 1585754978, + 'upload_date': '20200401', + 'title': 'La guerre froide (1er avril)', + } + } + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + + video_id = self._search_regex( + r'data-factoryid="([^"]+)', + webpage, 'video id') + full_id = 'francetv:%s' % video_id + + return self.url_result(full_id, + ie=FranceTVIE.ie_key(), + video_id=video_id) + + +class LumniPlaylistIE(InfoExtractor): + _VALID_URL = r'''https?:// + (?:www\.)?lumni\.fr/ + (?:dossier|programme|serie)/ + (?P[0-9a-z-]+) + ''' + _TESTS = [{ + 'url': 'https://www.lumni.fr/dossier/les-fondamentaux-vocabulaire', + 'info_dict': { + 'id': 'les-fondamentaux-vocabulaire', + 'title': 'Les Fondamentaux : Vocabulaire', + }, + 'playlist_mincount': 39 + }, { + 'url': 'https://www.lumni.fr/programme/the-rich-morning-show', + 'only_matching': True + }, { + 'url': 'https://www.lumni.fr/serie/la-maison-lumni-college', + 'only_matching': True + } + ] + + def _real_extract(self, url): + playlist_id = self._match_id(url) + + webpage = self._download_webpage(url, playlist_id) + + entries = [self.url_result( + 'https://lumni.fr/video/%s' % video_id, + ie=LumniIE.ie_key(), video_id=video_id) + for video_id in orderedSet(re.findall( + r']+\bhref=["\']/video/([0-9a-z-]+)', webpage))] + + playlist_title = self._og_search_title(webpage) + + return self.playlist_result(entries, playlist_id, playlist_title) From 9d037f4f1dbf9cdb0abe2617b3b806203c875fea Mon Sep 17 00:00:00 2001 From: Surkal <> Date: Sun, 6 Sep 2020 19:00:26 +0200 Subject: [PATCH 2/3] fix regex --- youtube_dl/extractor/lumni.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/youtube_dl/extractor/lumni.py b/youtube_dl/extractor/lumni.py index 748306e59..68f6f0bd1 100644 --- a/youtube_dl/extractor/lumni.py +++ b/youtube_dl/extractor/lumni.py @@ -37,11 +37,7 @@ class LumniIE(InfoExtractor): class LumniPlaylistIE(InfoExtractor): - _VALID_URL = r'''https?:// - (?:www\.)?lumni\.fr/ - (?:dossier|programme|serie)/ - (?P[0-9a-z-]+) - ''' + _VALID_URL = r'https?://(?:www\.)?lumni\.fr/(?:dossier|programme|serie)/(?P[0-9a-z-]+)' _TESTS = [{ 'url': 'https://www.lumni.fr/dossier/les-fondamentaux-vocabulaire', 'info_dict': { From e062ec875d402a13117754dd7f7a4542353082eb Mon Sep 17 00:00:00 2001 From: Surkal <> Date: Mon, 7 Sep 2020 12:24:28 +0200 Subject: [PATCH 3/3] split regex --- youtube_dl/extractor/lumni.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/lumni.py b/youtube_dl/extractor/lumni.py index 68f6f0bd1..05146ab72 100644 --- a/youtube_dl/extractor/lumni.py +++ b/youtube_dl/extractor/lumni.py @@ -37,7 +37,12 @@ class LumniIE(InfoExtractor): class LumniPlaylistIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?lumni\.fr/(?:dossier|programme|serie)/(?P[0-9a-z-]+)' + _VALID_URL = r'''(?x) + https?:// + (?:www\.)?lumni\.fr/ + (?:dossier|programme|serie)/ + (?P[0-9a-z-]+) + ''' _TESTS = [{ 'url': 'https://www.lumni.fr/dossier/les-fondamentaux-vocabulaire', 'info_dict': {