1
0
mirror of https://codeberg.org/polarisfm/youtube-dl synced 2024-11-26 02:14:32 +01:00

[ENSSavoirs] Add new extractor

This commit is contained in:
Sylvain Saubier (ResponSyS) 2017-10-27 01:16:09 +02:00 committed by ResponSySS
parent 55c727a547
commit 4b9fdaaad3
3 changed files with 65 additions and 1 deletions

View File

@ -180,7 +180,7 @@ class InfoExtractor(object):
The following fields are optional:
alt_title: A secondary title of the video.
display_id An alternative identifier for the video, not necessarily
display_id: An alternative identifier for the video, not necessarily
unique, but available before title. Typically, id is
something like "4234987", title "Dancing naked mole rats",
and display_id "dancing-naked-mole-rats"

View File

@ -0,0 +1,63 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
class ENSSavoirsIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?savoirs\.ens\.fr/expose\.php\?id=(?P<id>[0-9]+)'
_TESTS = [{
'url': 'http://savoirs.ens.fr/expose.php?id=2516#foo',
'md5': 'a95f51212dd2d104fb5655bdf1d03071',
'info_dict': {
'id': '2516',
'ext': 'mp4',
'format_id': 'mp4',
# TODO ::: Mon 23 Oct 2017 10:38:07 PM CEST
# how to extract?
#'formats': {
# 'format_id': 'mp4',
# 'tbr': 1612,
# 'fps': 25,
# 'acodec': 'aac',
# 'vcodec': 'h264',
# 'width': 360,
# 'height': 640,
# 'filesize_approx': '600M'
# },
'title': 'Chiffrer mieux pour (dé)chiffrer plus',
'thumbnail': r're:^https?://(?:www\.)?savoirs\.ens\.fr/uploads/images/exposes/2516\.jpg$',
'creator': 'Anne Canteaut',
# TODO ::: Mon 23 Oct 2017 10:38:34 PM CEST
# hard to extract
#'release_date': 20160413,
#'description': "some long long text in <div id=description>"
}
}]
def _real_extract(self, url):
# TODO does this default to HTTP and not HTTPS?
url_base = "//savoirs.ens.fr/"
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
media_url = url_base + self._html_search_regex(r'<a[^>]+href=["\'](?P<media_url>[^"\']+)["\'][^>]*>Télécharger\sla\svidéo</a>', webpage, 'media_url')
title = self._html_search_regex(r'<span[^>]+class=["\']titrePageExpose["\'][^>]*>(?P<title>[^<]+)</span>', webpage, 'title')
ext = media_url.split('.')[-1]
thumbnail = "%suploads/images/exposes/%s.jpg" % (url_base, video_id)
creator = self._html_search_regex(r'<span[^>]+class=["\']exposeConferencierNom["\'][^>]*>(?P<creator>[^<]+)</span>', webpage, 'creator', fatal=False, default="ENS Savoirs")
return {
'id': video_id,
'url': media_url,
'title': title,
# Fails (need to extract text of <div id=description> and remove html tags)
#'description': self._og_search_description(webpage),
'ext': ext,
'format_id': ext,
'thumbnail': thumbnail,
'creator': creator,
#'formats': { 'format_id': ext }
#'release_date': release_date,
}

View File

@ -315,6 +315,7 @@ from .ellentv import (
from .elpais import ElPaisIE
from .embedly import EmbedlyIE
from .engadget import EngadgetIE
from .enssavoirs import ENSSavoirsIE
from .eporner import EpornerIE
from .eroprofile import EroProfileIE
from .escapist import EscapistIE