#!/usr/bin/env python # -*- coding: utf-8 -*- from __future__ import absolute_import import base64 import datetime import itertools import netrc import os import re import socket import time import email.utils import xml.etree.ElementTree import random import math import operator import hashlib import binascii import urllib from .utils import * from .extractor.common import InfoExtractor, SearchInfoExtractor from .extractor.dailymotion import DailymotionIE from .extractor.metacafe import MetacafeIE from .extractor.statigram import StatigramIE from .extractor.photobucket import PhotobucketIE from .extractor.vimeo import VimeoIE from .extractor.yahoo import YahooIE from .extractor.youtube import YoutubeIE, YoutubePlaylistIE, YoutubeUserIE, YoutubeChannelIE class ArteTvIE(InfoExtractor): """arte.tv information extractor.""" _VALID_URL = r'(?:http://)?videos\.arte\.tv/(?:fr|de)/videos/.*' _LIVE_URL = r'index-[0-9]+\.html$' IE_NAME = u'arte.tv' def fetch_webpage(self, url): request = compat_urllib_request.Request(url) try: self.report_download_webpage(url) webpage = compat_urllib_request.urlopen(request).read() except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: raise ExtractorError(u'Unable to retrieve video webpage: %s' % compat_str(err)) except ValueError as err: raise ExtractorError(u'Invalid URL: %s' % url) return webpage def grep_webpage(self, url, regex, regexFlags, matchTuples): page = self.fetch_webpage(url) mobj = re.search(regex, page, regexFlags) info = {} if mobj is None: raise ExtractorError(u'Invalid URL: %s' % url) for (i, key, err) in matchTuples: if mobj.group(i) is None: raise ExtractorError(err) else: info[key] = mobj.group(i) return info def extractLiveStream(self, url): video_lang = url.split('/')[-4] info = self.grep_webpage( url, r'src="(.*?/videothek_js.*?\.js)', 0, [ (1, 'url', u'Invalid URL: %s' % url) ] ) http_host = url.split('/')[2] next_url = 'http://%s%s' % (http_host, compat_urllib_parse.unquote(info.get('url'))) info = self.grep_webpage( next_url, r'(s_artestras_scst_geoFRDE_' + video_lang + '.*?)\'.*?' + '(http://.*?\.swf).*?' + '(rtmp://.*?)\'', re.DOTALL, [ (1, 'path', u'could not extract video path: %s' % url), (2, 'player', u'could not extract video player: %s' % url), (3, 'url', u'could not extract video url: %s' % url) ] ) video_url = u'%s/%s' % (info.get('url'), info.get('path')) def extractPlus7Stream(self, url): video_lang = url.split('/')[-3] info = self.grep_webpage( url, r'param name="movie".*?videorefFileUrl=(http[^\'"&]*)', 0, [ (1, 'url', u'Invalid URL: %s' % url) ] ) next_url = compat_urllib_parse.unquote(info.get('url')) info = self.grep_webpage( next_url, r'