From b0434adccc77aa6dfe330fd8702f4dc34ecd0642 Mon Sep 17 00:00:00 2001 From: Sam Date: Sat, 14 Apr 2018 03:10:00 -0400 Subject: [PATCH] scientology.py Add new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/scientology.py | 42 +++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+) create mode 100644 youtube_dl/extractor/scientology.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index c9f60114d..6bb3c5a7d 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -941,6 +941,7 @@ from .safari import ( from .sapo import SapoIE from .savefrom import SaveFromIE from .sbs import SBSIE +from .scientology import ScientologyIE from .screencast import ScreencastIE from .screencastomatic import ScreencastOMaticIE from .scrippsnetworks import ScrippsNetworksWatchIE diff --git a/youtube_dl/extractor/scientology.py b/youtube_dl/extractor/scientology.py new file mode 100644 index 000000000..adbe43c15 --- /dev/null +++ b/youtube_dl/extractor/scientology.py @@ -0,0 +1,42 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +import re + + +class ScientologyIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?scientology\.tv/series/(?P[^/?#]+)/(?P[^/?#]+).html' + _TEST = { + 'url': 'https://www.scientology.tv/series/l-ron-hubbard-in-his-own-voice/life-as-an-author.html', + 'info_dict': { + 'id': 'life-as-an-author', + 'ext': 'm3u8', + 'title': 'Life as an Author | L. Ron Hubbard: In his Own Voice', + 'description': 'The author on his real life adventures that thrilled millions, to his discoveries behind Dianetics.' + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + title = self._html_search_regex(r'(.+?)', webpage, 'title').strip() + description = self._html_search_regex(r'', webpage, 'description').strip() + description = re.sub("[^a-zA-Z0-9.,_\s-]+", " ", description) + + # changing address for extration url + extract_ext = re.search(r'(.*?)', webpage).group(0) + extract_ext = extract_ext.replace('', '').replace('', '') + url = url[:url.find('/', 10)] + extract_ext + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'url': url, + }