mirror of
https://codeberg.org/polarisfm/youtube-dl
synced 2024-11-26 02:14:32 +01:00
[ENSSavoirs] Add new extractor
This commit is contained in:
parent
55c727a547
commit
4b9fdaaad3
@ -180,7 +180,7 @@ class InfoExtractor(object):
|
||||
The following fields are optional:
|
||||
|
||||
alt_title: A secondary title of the video.
|
||||
display_id An alternative identifier for the video, not necessarily
|
||||
display_id: An alternative identifier for the video, not necessarily
|
||||
unique, but available before title. Typically, id is
|
||||
something like "4234987", title "Dancing naked mole rats",
|
||||
and display_id "dancing-naked-mole-rats"
|
||||
|
63
youtube_dl/extractor/enssavoirs.py
Normal file
63
youtube_dl/extractor/enssavoirs.py
Normal file
@ -0,0 +1,63 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class ENSSavoirsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?savoirs\.ens\.fr/expose\.php\?id=(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://savoirs.ens.fr/expose.php?id=2516#foo',
|
||||
'md5': 'a95f51212dd2d104fb5655bdf1d03071',
|
||||
'info_dict': {
|
||||
'id': '2516',
|
||||
'ext': 'mp4',
|
||||
'format_id': 'mp4',
|
||||
# TODO ::: Mon 23 Oct 2017 10:38:07 PM CEST
|
||||
# how to extract?
|
||||
#'formats': {
|
||||
# 'format_id': 'mp4',
|
||||
# 'tbr': 1612,
|
||||
# 'fps': 25,
|
||||
# 'acodec': 'aac',
|
||||
# 'vcodec': 'h264',
|
||||
# 'width': 360,
|
||||
# 'height': 640,
|
||||
# 'filesize_approx': '600M'
|
||||
# },
|
||||
'title': 'Chiffrer mieux pour (dé)chiffrer plus',
|
||||
'thumbnail': r're:^https?://(?:www\.)?savoirs\.ens\.fr/uploads/images/exposes/2516\.jpg$',
|
||||
'creator': 'Anne Canteaut',
|
||||
# TODO ::: Mon 23 Oct 2017 10:38:34 PM CEST
|
||||
# hard to extract
|
||||
#'release_date': 20160413,
|
||||
#'description': "some long long text in <div id=description>"
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
# TODO does this default to HTTP and not HTTPS?
|
||||
url_base = "//savoirs.ens.fr/"
|
||||
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
media_url = url_base + self._html_search_regex(r'<a[^>]+href=["\'](?P<media_url>[^"\']+)["\'][^>]*>Télécharger\sla\svidéo</a>', webpage, 'media_url')
|
||||
title = self._html_search_regex(r'<span[^>]+class=["\']titrePageExpose["\'][^>]*>(?P<title>[^<]+)</span>', webpage, 'title')
|
||||
ext = media_url.split('.')[-1]
|
||||
thumbnail = "%suploads/images/exposes/%s.jpg" % (url_base, video_id)
|
||||
creator = self._html_search_regex(r'<span[^>]+class=["\']exposeConferencierNom["\'][^>]*>(?P<creator>[^<]+)</span>', webpage, 'creator', fatal=False, default="ENS Savoirs")
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': media_url,
|
||||
'title': title,
|
||||
# Fails (need to extract text of <div id=description> and remove html tags)
|
||||
#'description': self._og_search_description(webpage),
|
||||
'ext': ext,
|
||||
'format_id': ext,
|
||||
'thumbnail': thumbnail,
|
||||
'creator': creator,
|
||||
#'formats': { 'format_id': ext }
|
||||
#'release_date': release_date,
|
||||
}
|
@ -315,6 +315,7 @@ from .ellentv import (
|
||||
from .elpais import ElPaisIE
|
||||
from .embedly import EmbedlyIE
|
||||
from .engadget import EngadgetIE
|
||||
from .enssavoirs import ENSSavoirsIE
|
||||
from .eporner import EpornerIE
|
||||
from .eroprofile import EroProfileIE
|
||||
from .escapist import EscapistIE
|
||||
|
Loading…
Reference in New Issue
Block a user