youtube-dl/youtube_dl/extractor/dailymotion.py

# coding: utf-8
from __future__ import unicode_literals

import re
import json
import itertools

from .common import InfoExtractor

from ..utils import (
    determine_ext,
    error_to_compat_str,
    ExtractorError,
    int_or_none,
    parse_iso8601,
    sanitized_Request,
    str_to_int,
    unescapeHTML,
    mimetype2ext,
)


class DailymotionBaseInfoExtractor(InfoExtractor):
    @staticmethod
    def _build_request(url):
        """Build a request with the family filter disabled"""
        request = sanitized_Request(url)
        request.add_header('Cookie', 'family_filter=off; ff=off')
        return request

    def _download_webpage_handle_no_ff(self, url, *args, **kwargs):
        request = self._build_request(url)
        return self._download_webpage_handle(request, *args, **kwargs)

    def _download_webpage_no_ff(self, url, *args, **kwargs):
        request = self._build_request(url)
        return self._download_webpage(request, *args, **kwargs)


class DailymotionIE(DailymotionBaseInfoExtractor):
    _VALID_URL = r'(?i)https?://(?:(www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(?:(?:embed|swf|#)/)?video|swf)/(?P<id>[^/?_]+)'
    IE_NAME = 'dailymotion'

    _FORMATS = [
        ('stream_h264_ld_url', 'ld'),
        ('stream_h264_url', 'standard'),
        ('stream_h264_hq_url', 'hq'),
        ('stream_h264_hd_url', 'hd'),
        ('stream_h264_hd1080_url', 'hd180'),
    ]

    _TESTS = [{
        'url': 'http://www.dailymotion.com/video/x5kesuj_office-christmas-party-review-jason-bateman-olivia-munn-t-j-miller_news',
        'md5': '074b95bdee76b9e3654137aee9c79dfe',
        'info_dict': {
            'id': 'x5kesuj',
            'ext': 'mp4',
            'title': 'Office Christmas Party Review –  Jason Bateman, Olivia Munn, T.J. Miller',
            'description': 'Office Christmas Party Review -  Jason Bateman, Olivia Munn, T.J. Miller',
            'thumbnail': r're:^https?:.*\.(?:jpg|png)$',
            'duration': 187,
            'timestamp': 1493651285,
            'upload_date': '20170501',
            'uploader': 'Deadline',
            'uploader_id': 'x1xm8ri',
            'age_limit': 0,
            'view_count': int,
        },
    }, {
        'url': 'https://www.dailymotion.com/video/x2iuewm_steam-machine-models-pricing-listed-on-steam-store-ign-news_videogames',
        'md5': '2137c41a8e78554bb09225b8eb322406',
        'info_dict': {
            'id': 'x2iuewm',
            'ext': 'mp4',
            'title': 'Steam Machine Models, Pricing Listed on Steam Store - IGN News',
            'description': 'Several come bundled with the Steam Controller.',
            'thumbnail': r're:^https?:.*\.(?:jpg|png)$',
            'duration': 74,
            'timestamp': 1425657362,
            'upload_date': '20150306',
            'uploader': 'IGN',
            'uploader_id': 'xijv66',
            'age_limit': 0,
            'view_count': int,
        },
        'skip': 'video gone',
    }, {
        # Vevo video
        'url': 'http://www.dailymotion.com/video/x149uew_katy-perry-roar-official_musi',
        'info_dict': {
            'title': 'Roar (Official)',
            'id': 'USUV71301934',
            'ext': 'mp4',
            'uploader': 'Katy Perry',
            'upload_date': '20130905',
        },
        'params': {
            'skip_download': True,
        },
        'skip': 'VEVO is only available in some countries',
    }, {
        # age-restricted video
        'url': 'http://www.dailymotion.com/video/xyh2zz_leanna-decker-cyber-girl-of-the-year-desires-nude-playboy-plus_redband',
        'md5': '0d667a7b9cebecc3c89ee93099c4159d',
        'info_dict': {
            'id': 'xyh2zz',
            'ext': 'mp4',
            'title': 'Leanna Decker - Cyber Girl Of The Year Desires Nude [Playboy Plus]',
            'uploader': 'HotWaves1012',
            'age_limit': 18,
        },
        'skip': 'video gone',
    }, {
        # geo-restricted, player v5
        'url': 'http://www.dailymotion.com/video/xhza0o',
        'only_matching': True,
    }, {
        # with subtitles
        'url': 'http://www.dailymotion.com/video/x20su5f_the-power-of-nightmares-1-the-rise-of-the-politics-of-fear-bbc-2004_news',
        'only_matching': True,
    }, {
        'url': 'http://www.dailymotion.com/swf/video/x3n92nf',
        'only_matching': True,
    }, {
        'url': 'http://www.dailymotion.com/swf/x3ss1m_funny-magic-trick-barry-and-stuart_fun',
        'only_matching': True,
    }]

    @staticmethod
    def _extract_urls(webpage):
        # Look for embedded Dailymotion player
        matches = re.findall(
            r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage)
        return list(map(lambda m: unescapeHTML(m[1]), matches))

    def _real_extract(self, url):
        video_id = self._match_id(url)

        webpage = self._download_webpage_no_ff(
            'https://www.dailymotion.com/video/%s' % video_id, video_id)

        age_limit = self._rta_search(webpage)

        description = self._og_search_description(webpage) or self._html_search_meta(
            'description', webpage, 'description')

        view_count_str = self._search_regex(
            (r'<meta[^>]+itemprop="interactionCount"[^>]+content="UserPlays:([\s\d,.]+)"',
             r'video_views_count[^>]+>\s+([\s\d\,.]+)'),
            webpage, 'view count', fatal=False)
        if view_count_str:
            view_count_str = re.sub(r'\s', '', view_count_str)
        view_count = str_to_int(view_count_str)
        comment_count = int_or_none(self._search_regex(
            r'<meta[^>]+itemprop="interactionCount"[^>]+content="UserComments:(\d+)"',
            webpage, 'comment count', default=None))

        player_v5 = self._search_regex(
            [r'buildPlayer\(({.+?})\);\n',  # See https://github.com/rg3/youtube-dl/issues/7826
             r'playerV5\s*=\s*dmp\.create\([^,]+?,\s*({.+?})\);',
             r'buildPlayer\(({.+?})\);',
             r'var\s+config\s*=\s*({.+?});'],
            webpage, 'player v5', default=None)
        if player_v5:
            player = self._parse_json(player_v5, video_id)
            metadata = player['metadata']

            self._check_error(metadata)

            formats = []
            for quality, media_list in metadata['qualities'].items():
                for media in media_list:
                    media_url = media.get('url')
                    if not media_url:
                        continue
                    type_ = media.get('type')
                    if type_ == 'application/vnd.lumberjack.manifest':
                        continue
                    ext = mimetype2ext(type_) or determine_ext(media_url)
                    if ext == 'm3u8':
                        formats.extend(self._extract_m3u8_formats(
                            media_url, video_id, 'mp4', preference=-1,
                            m3u8_id='hls', fatal=False))
                    elif ext == 'f4m':
                        formats.extend(self._extract_f4m_formats(
                            media_url, video_id, preference=-1, f4m_id='hds', fatal=False))
                    else:
                        f = {
                            'url': media_url,
                            'format_id': 'http-%s' % quality,
                            'ext': ext,
                        }
                        m = re.search(r'H264-(?P<width>\d+)x(?P<height>\d+)', media_url)
                        if m:
                            f.update({
                                'width': int(m.group('width')),
                                'height': int(m.group('height')),
                            })
                        formats.append(f)
            self._sort_formats(formats)

            title = metadata['title']
            duration = int_or_none(metadata.get('duration'))
            timestamp = int_or_none(metadata.get('created_time'))
            thumbnail = metadata.get('poster_url')
            uploader = metadata.get('owner', {}).get('screenname')
            uploader_id = metadata.get('owner', {}).get('id')

            subtitles = {}
            subtitles_data = metadata.get('subtitles', {}).get('data', {})
            if subtitles_data and isinstance(subtitles_data, dict):
                for subtitle_lang, subtitle in subtitles_data.items():
                    subtitles[subtitle_lang] = [{
                        'ext': determine_ext(subtitle_url),
                        'url': subtitle_url,
                    } for subtitle_url in subtitle.get('urls', [])]

            return {
                'id': video_id,
                'title': title,
                'description': description,
                'thumbnail': thumbnail,
                'duration': duration,
                'timestamp': timestamp,
                'uploader': uploader,
                'uploader_id': uploader_id,
                'age_limit': age_limit,
                'view_count': view_count,
                'comment_count': comment_count,
                'formats': formats,
                'subtitles': subtitles,
            }

        # vevo embed
        vevo_id = self._search_regex(
            r'<link rel="video_src" href="[^"]*?vevo.com[^"]*?video=(?P<id>[\w]*)',
            webpage, 'vevo embed', default=None)
        if vevo_id:
            return self.url_result('vevo:%s' % vevo_id, 'Vevo')

        # fallback old player
        embed_page = self._download_webpage_no_ff(
            'https://www.dailymotion.com/embed/video/%s' % video_id,
            video_id, 'Downloading embed page')

        timestamp = parse_iso8601(self._html_search_meta(
            'video:release_date', webpage, 'upload date'))

        info = self._parse_json(
            self._search_regex(
                r'var info = ({.*?}),$', embed_page,
                'video info', flags=re.MULTILINE),
            video_id)

        self._check_error(info)

        formats = []
        for (key, format_id) in self._FORMATS:
            video_url = info.get(key)
            if video_url is not None:
                m_size = re.search(r'H264-(\d+)x(\d+)', video_url)
                if m_size is not None:
                    width, height = map(int_or_none, (m_size.group(1), m_size.group(2)))
                else:
                    width, height = None, None
                formats.append({
                    'url': video_url,
                    'ext': 'mp4',
                    'format_id': format_id,
                    'width': width,
                    'height': height,
                })
        self._sort_formats(formats)

        # subtitles
        video_subtitles = self.extract_subtitles(video_id, webpage)

        title = self._og_search_title(webpage, default=None)
        if title is None:
            title = self._html_search_regex(
                r'(?s)<span\s+id="video_title"[^>]*>(.*?)</span>', webpage,
                'title')

        return {
            'id': video_id,
            'formats': formats,
            'uploader': info['owner.screenname'],
            'timestamp': timestamp,
            'title': title,
            'description': description,
            'subtitles': video_subtitles,
            'thumbnail': info['thumbnail_url'],
            'age_limit': age_limit,
            'view_count': view_count,
            'duration': info['duration']
        }

    def _check_error(self, info):
        error = info.get('error')
        if info.get('error') is not None:
            title = error['title']
            # See https://developer.dailymotion.com/api#access-error
            if error.get('code') == 'DM007':
                self.raise_geo_restricted(msg=title)
            raise ExtractorError(
                '%s said: %s' % (self.IE_NAME, title), expected=True)

    def _get_subtitles(self, video_id, webpage):
        try:
            sub_list = self._download_webpage(
                'https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id,
                video_id, note=False)
        except ExtractorError as err:
            self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
            return {}
        info = json.loads(sub_list)
        if (info['total'] > 0):
            sub_lang_list = dict((l['language'], [{'url': l['url'], 'ext': 'srt'}]) for l in info['list'])
            return sub_lang_list
        self._downloader.report_warning('video doesn\'t have subtitles')
        return {}


class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
    IE_NAME = 'dailymotion:playlist'
    _VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>.+?)/'
    _MORE_PAGES_INDICATOR = r'(?s)<div class="pages[^"]*">.*?<a\s+class="[^"]*?icon-arrow_right[^"]*?"'
    _PAGE_TEMPLATE = 'https://www.dailymotion.com/playlist/%s/%s'
    _TESTS = [{
        'url': 'http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q',
        'info_dict': {
            'title': 'SPORT',
            'id': 'xv4bw_nqtv_sport',
        },
        'playlist_mincount': 20,
    }]

    def _extract_entries(self, id):
        video_ids = set()
        processed_urls = set()
        for pagenum in itertools.count(1):
            page_url = self._PAGE_TEMPLATE % (id, pagenum)
            webpage, urlh = self._download_webpage_handle_no_ff(
                page_url, id, 'Downloading page %s' % pagenum)
            if urlh.geturl() in processed_urls:
                self.report_warning('Stopped at duplicated page %s, which is the same as %s' % (
                    page_url, urlh.geturl()), id)
                break

            processed_urls.add(urlh.geturl())

            for video_id in re.findall(r'data-xid="(.+?)"', webpage):
                if video_id not in video_ids:
                    yield self.url_result(
                        'http://www.dailymotion.com/video/%s' % video_id,
                        DailymotionIE.ie_key(), video_id)
                    video_ids.add(video_id)

            if re.search(self._MORE_PAGES_INDICATOR, webpage) is None:
                break

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        playlist_id = mobj.group('id')
        webpage = self._download_webpage(url, playlist_id)

        return {
            '_type': 'playlist',
            'id': playlist_id,
            'title': self._og_search_title(webpage),
            'entries': self._extract_entries(playlist_id),
        }


class DailymotionUserIE(DailymotionPlaylistIE):
    IE_NAME = 'dailymotion:user'
    _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist)/)(?:(?:old/)?user/)?(?P<user>[^/]+)'
    _PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s'
    _TESTS = [{
        'url': 'https://www.dailymotion.com/user/nqtv',
        'info_dict': {
            'id': 'nqtv',
            'title': 'Rémi Gaillard',
        },
        'playlist_mincount': 100,
    }, {
        'url': 'http://www.dailymotion.com/user/UnderProject',
        'info_dict': {
            'id': 'UnderProject',
            'title': 'UnderProject',
        },
        'playlist_mincount': 1800,
        'expected_warnings': [
            'Stopped at duplicated page',
        ],
        'skip': 'Takes too long time',
    }]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        user = mobj.group('user')
        webpage = self._download_webpage(
            'https://www.dailymotion.com/user/%s' % user, user)
        full_user = unescapeHTML(self._html_search_regex(
            r'<a class="nav-image" title="([^"]+)" href="/%s">' % re.escape(user),
            webpage, 'user'))

        return {
            '_type': 'playlist',
            'id': user,
            'title': full_user,
            'entries': self._extract_entries(user),
        }


class DailymotionCloudIE(DailymotionBaseInfoExtractor):
    _VALID_URL_PREFIX = r'https?://api\.dmcloud\.net/(?:player/)?embed/'
    _VALID_URL = r'%s[^/]+/(?P<id>[^/?]+)' % _VALID_URL_PREFIX
    _VALID_EMBED_URL = r'%s[^/]+/[^\'"]+' % _VALID_URL_PREFIX

    _TESTS = [{
        # From http://www.francetvinfo.fr/economie/entreprises/les-entreprises-familiales-le-secret-de-la-reussite_933271.html
        # Tested at FranceTvInfo_2
        'url': 'http://api.dmcloud.net/embed/4e7343f894a6f677b10006b4/556e03339473995ee145930c?auth=1464865870-0-jyhsm84b-ead4c701fb750cf9367bf4447167a3db&autoplay=1',
        'only_matching': True,
    }, {
        # http://www.francetvinfo.fr/societe/larguez-les-amarres-le-cobaturage-se-developpe_980101.html
        'url': 'http://api.dmcloud.net/player/embed/4e7343f894a6f677b10006b4/559545469473996d31429f06?auth=1467430263-0-90tglw2l-a3a4b64ed41efe48d7fccad85b8b8fda&autoplay=1',
        'only_matching': True,
    }]

    @classmethod
    def _extract_dmcloud_url(cls, webpage):
        mobj = re.search(r'<iframe[^>]+src=[\'"](%s)[\'"]' % cls._VALID_EMBED_URL, webpage)
        if mobj:
            return mobj.group(1)

        mobj = re.search(
            r'<input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=[\'"](%s)[\'"]' % cls._VALID_EMBED_URL,
            webpage)
        if mobj:
            return mobj.group(1)

    def _real_extract(self, url):
        video_id = self._match_id(url)

        webpage = self._download_webpage_no_ff(url, video_id)

        title = self._html_search_regex(r'<title>([^>]+)</title>', webpage, 'title')

        video_info = self._parse_json(self._search_regex(
            r'var\s+info\s*=\s*([^;]+);', webpage, 'video info'), video_id)

        # TODO: parse ios_url, which is in fact a manifest
        video_url = video_info['mp4_url']

        return {
            'id': video_id,
            'url': video_url,
            'title': title,
            'thumbnail': video_info.get('thumbnail_url'),
        }
-												PEP8 applied

											
										
										
											2014-11-23 20:41:03 +01:00
+								# coding: utf-8
-												Move playlist tests to extractors.

From now on, test_download will run these tests. That means we benefit not only from the networking setup in there, but also from the other tests (for example test_all_urls to find problems with _VALID_URLs).

											
										
										
											2014-08-28 00:58:24 +02:00
+								from __future__ import unicode_literals
-												Move DailyMotion into its own file

											
										
										
											2013-06-23 20:09:47 +02:00
+								import re
-												Dailymotion: fix the download of the video in the max quality (closes #986)

											
										
										
											2013-07-05 14:15:26 +02:00
+								import json
-												[dailymotion] Add an extractor for Dailymotion playlists

											
										
										
											2013-07-29 12:07:38 +02:00
+								import itertools
-												Move DailyMotion into its own file

											
										
										
											2013-06-23 20:09:47 +02:00
 								from .common import InfoExtractor
-												[dailymotion] Added support for subtitles + new InfoExtractor for
generic subtitle download.

The idea is that all subtitle downloaders must descend from SubtitlesIE
and implement only three basic methods to achieve the complete subtitle
download functionality. This will allow to reduce the code in YoutubeIE
once it is rewritten.

											
										
										
											2013-08-07 18:59:11 +02:00
-												Fix imports and general cleanup

· Import from compat what comes from compat. Yes, some names are available in utils too, but that's an implementation detail.
· Use _match_id consistently whenever possible
· Fix some outdated tests
· Use consistent valid URL (always match the whole protocol, no ^ at start required)
· Use modern test definitions

											
										
										
											2014-12-13 12:24:42 +01:00
+								from ..utils import (
-												[daylimotion] Adapt to player v5 and modernize (Closes #6151, closes #6250)

											
										
										
											2015-07-23 18:04:47 +02:00
+								    determine_ext,
-												Rename error_to_str to error_to_compat_str

											
										
										
											2015-12-20 02:00:39 +01:00
+								    error_to_compat_str,
-												Properly convert errors to strings

											
										
										
											2015-12-20 00:27:38 +01:00
+								    ExtractorError,
-												Fix imports and general cleanup

· Import from compat what comes from compat. Yes, some names are available in utils too, but that's an implementation detail.
· Use _match_id consistently whenever possible
· Fix some outdated tests
· Use consistent valid URL (always match the whole protocol, no ^ at start required)
· Use modern test definitions

											
										
										
											2014-12-13 12:24:42 +01:00
+								    int_or_none,
-												[daylimotion] Adapt to player v5 and modernize (Closes #6151, closes #6250)

											
										
										
											2015-07-23 18:04:47 +02:00
+								    parse_iso8601,
-												Switch codebase to use sanitized_Request instead of
compat_urllib_request.Request

[downloader/dash] Use sanitized_Request

[downloader/http] Use sanitized_Request

[atresplayer] Use sanitized_Request

[bambuser] Use sanitized_Request

[bliptv] Use sanitized_Request

[brightcove] Use sanitized_Request

[cbs] Use sanitized_Request

[ceskatelevize] Use sanitized_Request

[collegerama] Use sanitized_Request

[extractor/common] Use sanitized_Request

[crunchyroll] Use sanitized_Request

[dailymotion] Use sanitized_Request

[dcn] Use sanitized_Request

[dramafever] Use sanitized_Request

[dumpert] Use sanitized_Request

[eitb] Use sanitized_Request

[escapist] Use sanitized_Request

[everyonesmixtape] Use sanitized_Request

[extremetube] Use sanitized_Request

[facebook] Use sanitized_Request

[fc2] Use sanitized_Request

[flickr] Use sanitized_Request

[4tube] Use sanitized_Request

[gdcvault] Use sanitized_Request

[extractor/generic] Use sanitized_Request

[hearthisat] Use sanitized_Request

[hotnewhiphop] Use sanitized_Request

[hypem] Use sanitized_Request

[iprima] Use sanitized_Request

[ivi] Use sanitized_Request

[keezmovies] Use sanitized_Request

[letv] Use sanitized_Request

[lynda] Use sanitized_Request

[metacafe] Use sanitized_Request

[minhateca] Use sanitized_Request

[miomio] Use sanitized_Request

[meovideo] Use sanitized_Request

[mofosex] Use sanitized_Request

[moniker] Use sanitized_Request

[mooshare] Use sanitized_Request

[movieclips] Use sanitized_Request

[mtv] Use sanitized_Request

[myvideo] Use sanitized_Request

[neteasemusic] Use sanitized_Request

[nfb] Use sanitized_Request

[niconico] Use sanitized_Request

[noco] Use sanitized_Request

[nosvideo] Use sanitized_Request

[novamov] Use sanitized_Request

[nowness] Use sanitized_Request

[nuvid] Use sanitized_Request

[played] Use sanitized_Request

[pluralsight] Use sanitized_Request

[pornhub] Use sanitized_Request

[pornotube] Use sanitized_Request

[primesharetv] Use sanitized_Request

[promptfile] Use sanitized_Request

[qqmusic] Use sanitized_Request

[rtve] Use sanitized_Request

[safari] Use sanitized_Request

[sandia] Use sanitized_Request

[shared] Use sanitized_Request

[sharesix] Use sanitized_Request

[sina] Use sanitized_Request

[smotri] Use sanitized_Request

[sohu] Use sanitized_Request

[spankwire] Use sanitized_Request

[sportdeutschland] Use sanitized_Request

[streamcloud] Use sanitized_Request

[streamcz] Use sanitized_Request

[tapely] Use sanitized_Request

[tube8] Use sanitized_Request

[tubitv] Use sanitized_Request

[twitch] Use sanitized_Request

[twitter] Use sanitized_Request

[udemy] Use sanitized_Request

[vbox7] Use sanitized_Request

[veoh] Use sanitized_Request

[vessel] Use sanitized_Request

[vevo] Use sanitized_Request

[viddler] Use sanitized_Request

[videomega] Use sanitized_Request

[viewvster] Use sanitized_Request

[viki] Use sanitized_Request

[vk] Use sanitized_Request

[vodlocker] Use sanitized_Request

[voicerepublic] Use sanitized_Request

[wistia] Use sanitized_Request

[xfileshare] Use sanitized_Request

[xtube] Use sanitized_Request

[xvideos] Use sanitized_Request

[yandexmusic] Use sanitized_Request

[youku] Use sanitized_Request

[youporn] Use sanitized_Request

[youtube] Use sanitized_Request

[patreon] Use sanitized_Request

[extractor/common] Remove unused import

[nfb] PEP 8

											
										
										
											2015-11-21 17:18:17 +01:00
+								    sanitized_Request,
-												[dailymotion] Extract view count (#1895)

											
										
										
											2013-12-06 13:36:36 +01:00
+								    str_to_int,
-												[dailymotion] Fix user playlist extraction

											
										
										
											2014-04-23 14:42:34 +02:00
+								    unescapeHTML,
-												use mimetype2ext to determine manifest ext in multiple extractors

											
										
										
											2016-07-06 10:11:46 +02:00
+								    mimetype2ext,
-												Move DailyMotion into its own file

											
										
										
											2013-06-23 20:09:47 +02:00
+								)
-												PEP8 applied

											
										
										
											2014-11-23 20:41:03 +01:00
-												[dailymotion] Disable the family filter in the playlists (fixes #1524)

											
										
										
											2013-09-29 12:44:02 +02:00
+								class DailymotionBaseInfoExtractor(InfoExtractor):
 								    @staticmethod
 								    def _build_request(url):
 								        """Build a request with the family filter disabled"""
-												Switch codebase to use sanitized_Request instead of
compat_urllib_request.Request

[downloader/dash] Use sanitized_Request

[downloader/http] Use sanitized_Request

[atresplayer] Use sanitized_Request

[bambuser] Use sanitized_Request

[bliptv] Use sanitized_Request

[brightcove] Use sanitized_Request

[cbs] Use sanitized_Request

[ceskatelevize] Use sanitized_Request

[collegerama] Use sanitized_Request

[extractor/common] Use sanitized_Request

[crunchyroll] Use sanitized_Request

[dailymotion] Use sanitized_Request

[dcn] Use sanitized_Request

[dramafever] Use sanitized_Request

[dumpert] Use sanitized_Request

[eitb] Use sanitized_Request

[escapist] Use sanitized_Request

[everyonesmixtape] Use sanitized_Request

[extremetube] Use sanitized_Request

[facebook] Use sanitized_Request

[fc2] Use sanitized_Request

[flickr] Use sanitized_Request

[4tube] Use sanitized_Request

[gdcvault] Use sanitized_Request

[extractor/generic] Use sanitized_Request

[hearthisat] Use sanitized_Request

[hotnewhiphop] Use sanitized_Request

[hypem] Use sanitized_Request

[iprima] Use sanitized_Request

[ivi] Use sanitized_Request

[keezmovies] Use sanitized_Request

[letv] Use sanitized_Request

[lynda] Use sanitized_Request

[metacafe] Use sanitized_Request

[minhateca] Use sanitized_Request

[miomio] Use sanitized_Request

[meovideo] Use sanitized_Request

[mofosex] Use sanitized_Request

[moniker] Use sanitized_Request

[mooshare] Use sanitized_Request

[movieclips] Use sanitized_Request

[mtv] Use sanitized_Request

[myvideo] Use sanitized_Request

[neteasemusic] Use sanitized_Request

[nfb] Use sanitized_Request

[niconico] Use sanitized_Request

[noco] Use sanitized_Request

[nosvideo] Use sanitized_Request

[novamov] Use sanitized_Request

[nowness] Use sanitized_Request

[nuvid] Use sanitized_Request

[played] Use sanitized_Request

[pluralsight] Use sanitized_Request

[pornhub] Use sanitized_Request

[pornotube] Use sanitized_Request

[primesharetv] Use sanitized_Request

[promptfile] Use sanitized_Request

[qqmusic] Use sanitized_Request

[rtve] Use sanitized_Request

[safari] Use sanitized_Request

[sandia] Use sanitized_Request

[shared] Use sanitized_Request

[sharesix] Use sanitized_Request

[sina] Use sanitized_Request

[smotri] Use sanitized_Request

[sohu] Use sanitized_Request

[spankwire] Use sanitized_Request

[sportdeutschland] Use sanitized_Request

[streamcloud] Use sanitized_Request

[streamcz] Use sanitized_Request

[tapely] Use sanitized_Request

[tube8] Use sanitized_Request

[tubitv] Use sanitized_Request

[twitch] Use sanitized_Request

[twitter] Use sanitized_Request

[udemy] Use sanitized_Request

[vbox7] Use sanitized_Request

[veoh] Use sanitized_Request

[vessel] Use sanitized_Request

[vevo] Use sanitized_Request

[viddler] Use sanitized_Request

[videomega] Use sanitized_Request

[viewvster] Use sanitized_Request

[viki] Use sanitized_Request

[vk] Use sanitized_Request

[vodlocker] Use sanitized_Request

[voicerepublic] Use sanitized_Request

[wistia] Use sanitized_Request

[xfileshare] Use sanitized_Request

[xtube] Use sanitized_Request

[xvideos] Use sanitized_Request

[yandexmusic] Use sanitized_Request

[youku] Use sanitized_Request

[youporn] Use sanitized_Request

[youtube] Use sanitized_Request

[patreon] Use sanitized_Request

[extractor/common] Remove unused import

[nfb] PEP 8

											
										
										
											2015-11-21 17:18:17 +01:00
+								        request = sanitized_Request(url)
-												[dailymotion] Fix ff cookie and use it for embed page (Closes #5330)

											
										
										
											2015-03-31 16:55:21 +02:00
+								        request.add_header('Cookie', 'family_filter=off; ff=off')
-												[dailymotion] Disable the family filter in the playlists (fixes #1524)

											
										
										
											2013-09-29 12:44:02 +02:00
+								        return request
-												[dailymotion] Added support for subtitles + new InfoExtractor for
generic subtitle download.

The idea is that all subtitle downloaders must descend from SubtitlesIE
and implement only three basic methods to achieve the complete subtitle
download functionality. This will allow to reduce the code in YoutubeIE
once it is rewritten.

											
										
										
											2013-08-07 18:59:11 +02:00
-												[dailymotion:playlist] Detect problematic redirection (fixes #6347)

											
										
										
											2015-07-24 15:29:44 +02:00
+								    def _download_webpage_handle_no_ff(self, url, *args, **kwargs):
 								        request = self._build_request(url)
 								        return self._download_webpage_handle(request, *args, **kwargs)
-												[daylimotion] Adapt to player v5 and modernize (Closes #6151, closes #6250)

											
										
										
											2015-07-23 18:04:47 +02:00
+								    def _download_webpage_no_ff(self, url, *args, **kwargs):
 								        request = self._build_request(url)
 								        return self._download_webpage(request, *args, **kwargs)
-												PEP8 applied

											
										
										
											2014-11-23 20:41:03 +01:00
-												Move DailyMotion into its own file

											
										
										
											2013-06-23 20:09:47 +02:00
-												[daylimotion] Adapt to player v5 and modernize (Closes #6151, closes #6250)

											
										
										
											2015-07-23 18:04:47 +02:00
+								class DailymotionIE(DailymotionBaseInfoExtractor):
-												[dailymotion] Extend _VALID_URL (closes #13079)

											
										
										
											2017-05-14 04:55:40 +02:00
+								    _VALID_URL = r'(?i)https?://(?:(www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(?:(?:embed|swf|#)/)?video|swf)/(?P<id>[^/?_]+)'
-												[dailymotion] Correct test case

											
										
										
											2014-08-27 19:20:20 +02:00
+								    IE_NAME = 'dailymotion'
-												[dailymotion] Extract all the available formats (closes #1028)

											
										
										
											2013-10-23 17:33:38 +02:00
 								    _FORMATS = [
-												[dailymotion] Correct test case

											
										
										
											2014-08-27 19:20:20 +02:00
+								        ('stream_h264_ld_url', 'ld'),
 								        ('stream_h264_url', 'standard'),
 								        ('stream_h264_hq_url', 'hq'),
 								        ('stream_h264_hd_url', 'hd'),
 								        ('stream_h264_hd1080_url', 'hd180'),
-												[dailymotion] Extract all the available formats (closes #1028)

											
										
										
											2013-10-23 17:33:38 +02:00
+								    ]
-												[dailymotion] Extend _VALID_URL (closes #13079)

											
										
										
											2017-05-14 04:55:40 +02:00
+								    _TESTS = [{
 								        'url': 'http://www.dailymotion.com/video/x5kesuj_office-christmas-party-review-jason-bateman-olivia-munn-t-j-miller_news',
 								        'md5': '074b95bdee76b9e3654137aee9c79dfe',
 								        'info_dict': {
 								            'id': 'x5kesuj',
 								            'ext': 'mp4',
 								            'title': 'Office Christmas Party Review –  Jason Bateman, Olivia Munn, T.J. Miller',
 								            'description': 'Office Christmas Party Review -  Jason Bateman, Olivia Munn, T.J. Miller',
 								            'thumbnail': r're:^https?:.*\.(?:jpg|png)$',
 								            'duration': 187,
 								            'timestamp': 1493651285,
 								            'upload_date': '20170501',
 								            'uploader': 'Deadline',
 								            'uploader_id': 'x1xm8ri',
 								            'age_limit': 0,
 								            'view_count': int,
-												[dailymotion] Add working test


											
										
										
											2017-05-01 20:37:23 +02:00
+								        },
-												[dailymotion] Extend _VALID_URL (closes #13079)

											
										
										
											2017-05-14 04:55:40 +02:00
+								    }, {
 								        'url': 'https://www.dailymotion.com/video/x2iuewm_steam-machine-models-pricing-listed-on-steam-store-ign-news_videogames',
 								        'md5': '2137c41a8e78554bb09225b8eb322406',
 								        'info_dict': {
 								            'id': 'x2iuewm',
 								            'ext': 'mp4',
 								            'title': 'Steam Machine Models, Pricing Listed on Steam Store - IGN News',
 								            'description': 'Several come bundled with the Steam Controller.',
 								            'thumbnail': r're:^https?:.*\.(?:jpg|png)$',
 								            'duration': 74,
 								            'timestamp': 1425657362,
 								            'upload_date': '20150306',
 								            'uploader': 'IGN',
 								            'uploader_id': 'xijv66',
 								            'age_limit': 0,
 								            'view_count': int,
-												[dailymotion] Detect vevo videos (fixes #1532)

All videos from the Vevo user, just embed videos from vevo.com

											
										
										
											2013-10-01 15:05:41 +02:00
+								        },
-												[dailymotion] Extend _VALID_URL (closes #13079)

											
										
										
											2017-05-14 04:55:40 +02:00
+								        'skip': 'video gone',
 								    }, {
-												[dailymotion] Detect vevo videos (fixes #1532)

All videos from the Vevo user, just embed videos from vevo.com

											
										
										
											2013-10-01 15:05:41 +02:00
+								        # Vevo video
-												[dailymotion] Extend _VALID_URL (closes #13079)

											
										
										
											2017-05-14 04:55:40 +02:00
+								        'url': 'http://www.dailymotion.com/video/x149uew_katy-perry-roar-official_musi',
 								        'info_dict': {
 								            'title': 'Roar (Official)',
 								            'id': 'USUV71301934',
 								            'ext': 'mp4',
 								            'uploader': 'Katy Perry',
 								            'upload_date': '20130905',
-												[dailymotion] Detect vevo videos (fixes #1532)

All videos from the Vevo user, just embed videos from vevo.com

											
										
										
											2013-10-01 15:05:41 +02:00
+								        },
-												[dailymotion] Extend _VALID_URL (closes #13079)

											
										
										
											2017-05-14 04:55:40 +02:00
+								        'params': {
 								            'skip_download': True,
 								        },
 								        'skip': 'VEVO is only available in some countries',
 								    }, {
-												[dailymotion] Fix support for age-restricted videos (Fixes #1688)

											
										
										
											2013-10-31 00:20:49 +01:00
+								        # age-restricted video
-												[dailymotion] Extend _VALID_URL (closes #13079)

											
										
										
											2017-05-14 04:55:40 +02:00
+								        'url': 'http://www.dailymotion.com/video/xyh2zz_leanna-decker-cyber-girl-of-the-year-desires-nude-playboy-plus_redband',
 								        'md5': '0d667a7b9cebecc3c89ee93099c4159d',
 								        'info_dict': {
 								            'id': 'xyh2zz',
 								            'ext': 'mp4',
 								            'title': 'Leanna Decker - Cyber Girl Of The Year Desires Nude [Playboy Plus]',
 								            'uploader': 'HotWaves1012',
 								            'age_limit': 18,
-												[dailymotion] Report errors from player v5

											
										
										
											2015-10-17 19:26:30 +02:00
+								        },
-												[dailymotion] Extend _VALID_URL (closes #13079)

											
										
										
											2017-05-14 04:55:40 +02:00
+								        'skip': 'video gone',
 								    }, {
-												[dailymotion] Report errors from player v5

											
										
										
											2015-10-17 19:26:30 +02:00
+								        # geo-restricted, player v5
-												[dailymotion] Extend _VALID_URL (closes #13079)

											
										
										
											2017-05-14 04:55:40 +02:00
+								        'url': 'http://www.dailymotion.com/video/xhza0o',
 								        'only_matching': True,
 								    }, {
-												[dailymotion] Add subtitles test URL for reference

											
										
										
											2015-12-10 16:54:48 +01:00
+								        # with subtitles
-												[dailymotion] Extend _VALID_URL (closes #13079)

											
										
										
											2017-05-14 04:55:40 +02:00
+								        'url': 'http://www.dailymotion.com/video/x20su5f_the-power-of-nightmares-1-the-rise-of-the-politics-of-fear-bbc-2004_news',
 								        'only_matching': True,
 								    }, {
 								        'url': 'http://www.dailymotion.com/swf/video/x3n92nf',
 								        'only_matching': True,
 								    }, {
 								        'url': 'http://www.dailymotion.com/swf/x3ss1m_funny-magic-trick-barry-and-stuart_fun',
 								        'only_matching': True,
 								    }]
-												Move DailyMotion into its own file

											
										
										
											2013-06-23 20:09:47 +02:00
-												[francetv] Recognize more Dailymotion embedded videos

Closes #9955

											
										
										
											2016-07-06 17:37:54 +02:00
+								    @staticmethod
 								    def _extract_urls(webpage):
 								        # Look for embedded Dailymotion player
 								        matches = re.findall(
 								            r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage)
 								        return list(map(lambda m: unescapeHTML(m[1]), matches))
-												Move DailyMotion into its own file

											
										
										
											2013-06-23 20:09:47 +02:00
+								    def _real_extract(self, url):
-												[dailymotion] Alternative title search (Fixes #3882)

											
										
										
											2014-10-05 21:59:53 +02:00
+								        video_id = self._match_id(url)
-												Move DailyMotion into its own file

											
										
										
											2013-06-23 20:09:47 +02:00
-												[daylimotion] Adapt to player v5 and modernize (Closes #6151, closes #6250)

											
										
										
											2015-07-23 18:04:47 +02:00
+								        webpage = self._download_webpage_no_ff(
 								            'https://www.dailymotion.com/video/%s' % video_id, video_id)
 								        age_limit = self._rta_search(webpage)
 								        description = self._og_search_description(webpage) or self._html_search_meta(
 								            'description', webpage, 'description')
-												Move DailyMotion into its own file

											
										
										
											2013-06-23 20:09:47 +02:00
-												[dailymotion] Fix view count extraction

Fix view count parsing when the decimal marker is a whitespace, e.g. '101 101'

											
										
										
											2016-02-16 10:45:53 +01:00
+								        view_count_str = self._search_regex(
 								            (r'<meta[^>]+itemprop="interactionCount"[^>]+content="UserPlays:([\s\d,.]+)"',
 								             r'video_views_count[^>]+>\s+([\s\d\,.]+)'),
 								            webpage, 'view count', fatal=False)
 								        if view_count_str:
 								            view_count_str = re.sub(r'\s', '', view_count_str)
 								        view_count = str_to_int(view_count_str)
-												[daylimotion] Adapt to player v5 and modernize (Closes #6151, closes #6250)

											
										
										
											2015-07-23 18:04:47 +02:00
+								        comment_count = int_or_none(self._search_regex(
 								            r'<meta[^>]+itemprop="interactionCount"[^>]+content="UserComments:(\d+)"',
-												[dailymotion] Make comment count optional (closes #12209)
Not served anymore

											
										
										
											2017-02-22 15:49:30 +01:00
+								            webpage, 'comment count', default=None))
-												[daylimotion] Adapt to player v5 and modernize (Closes #6151, closes #6250)

											
										
										
											2015-07-23 18:04:47 +02:00
 								        player_v5 = self._search_regex(
-												[dailymotion] Restrict player v5 regex (Closes #7826)

											
										
										
											2015-12-10 16:27:47 +01:00
+								            [r'buildPlayer\(({.+?})\);\n',  # See https://github.com/rg3/youtube-dl/issues/7826
 								             r'playerV5\s*=\s*dmp\.create\([^,]+?,\s*({.+?})\);',
-												[dailymotion] Fix extraction and update _TESTS

Closes #10901

Seems all videos use player V5 syntax now

											
										
										
											2016-10-12 15:45:49 +02:00
+								             r'buildPlayer\(({.+?})\);',
 								             r'var\s+config\s*=\s*({.+?});'],
-												[daylimotion] Adapt to player v5 and modernize (Closes #6151, closes #6250)

											
										
										
											2015-07-23 18:04:47 +02:00
+								            webpage, 'player v5', default=None)
 								        if player_v5:
 								            player = self._parse_json(player_v5, video_id)
 								            metadata = player['metadata']
-												[dailymotion] Report errors from player v5

											
										
										
											2015-10-17 19:26:30 +02:00
 								            self._check_error(metadata)
-												[daylimotion] Adapt to player v5 and modernize (Closes #6151, closes #6250)

											
										
										
											2015-07-23 18:04:47 +02:00
+								            formats = []
 								            for quality, media_list in metadata['qualities'].items():
 								                for media in media_list:
 								                    media_url = media.get('url')
 								                    if not media_url:
 								                        continue
 								                    type_ = media.get('type')
 								                    if type_ == 'application/vnd.lumberjack.manifest':
 								                        continue
-												use mimetype2ext to determine manifest ext in multiple extractors

											
										
										
											2016-07-06 10:11:46 +02:00
+								                    ext = mimetype2ext(type_) or determine_ext(media_url)
 								                    if ext == 'm3u8':
-												Simplify formats accumulation for f4m/m3u8/smil formats

Now all _extract_*_formats routines return a list

											
										
										
											2015-12-28 19:58:24 +01:00
+								                        formats.extend(self._extract_m3u8_formats(
-												[dailymotion] Prefer direct links (Closes #8156)

											
										
										
											2016-01-12 18:23:39 +01:00
+								                            media_url, video_id, 'mp4', preference=-1,
 								                            m3u8_id='hls', fatal=False))
-												use mimetype2ext to determine manifest ext in multiple extractors

											
										
										
											2016-07-06 10:11:46 +02:00
+								                    elif ext == 'f4m':
-												Simplify formats accumulation for f4m/m3u8/smil formats

Now all _extract_*_formats routines return a list

											
										
										
											2015-12-28 19:58:24 +01:00
+								                        formats.extend(self._extract_f4m_formats(
 								                            media_url, video_id, preference=-1, f4m_id='hds', fatal=False))
-												[daylimotion] Adapt to player v5 and modernize (Closes #6151, closes #6250)

											
										
										
											2015-07-23 18:04:47 +02:00
+								                    else:
 								                        f = {
 								                            'url': media_url,
-												[dailymotion] Prefer direct links (Closes #8156)

											
										
										
											2016-01-12 18:23:39 +01:00
+								                            'format_id': 'http-%s' % quality,
-												use mimetype2ext to determine manifest ext in multiple extractors

											
										
										
											2016-07-06 10:11:46 +02:00
+								                            'ext': ext,
-												[daylimotion] Adapt to player v5 and modernize (Closes #6151, closes #6250)

											
										
										
											2015-07-23 18:04:47 +02:00
+								                        }
 								                        m = re.search(r'H264-(?P<width>\d+)x(?P<height>\d+)', media_url)
 								                        if m:
 								                            f.update({
 								                                'width': int(m.group('width')),
 								                                'height': int(m.group('height')),
 								                            })
 								                        formats.append(f)
 								            self._sort_formats(formats)
 								            title = metadata['title']
 								            duration = int_or_none(metadata.get('duration'))
 								            timestamp = int_or_none(metadata.get('created_time'))
 								            thumbnail = metadata.get('poster_url')
 								            uploader = metadata.get('owner', {}).get('screenname')
 								            uploader_id = metadata.get('owner', {}).get('id')
 								            subtitles = {}
-												[dailymotion] Fix subtitles extraction

											
										
										
											2015-12-10 16:29:07 +01:00
+								            subtitles_data = metadata.get('subtitles', {}).get('data', {})
 								            if subtitles_data and isinstance(subtitles_data, dict):
 								                for subtitle_lang, subtitle in subtitles_data.items():
 								                    subtitles[subtitle_lang] = [{
 								                        'ext': determine_ext(subtitle_url),
 								                        'url': subtitle_url,
 								                    } for subtitle_url in subtitle.get('urls', [])]
-												[daylimotion] Adapt to player v5 and modernize (Closes #6151, closes #6250)

											
										
										
											2015-07-23 18:04:47 +02:00
 								            return {
 								                'id': video_id,
 								                'title': title,
 								                'description': description,
 								                'thumbnail': thumbnail,
 								                'duration': duration,
 								                'timestamp': timestamp,
 								                'uploader': uploader,
 								                'uploader_id': uploader_id,
 								                'age_limit': age_limit,
 								                'view_count': view_count,
 								                'comment_count': comment_count,
 								                'formats': formats,
 								                'subtitles': subtitles,
 								            }
-												Move DailyMotion into its own file

											
										
										
											2013-06-23 20:09:47 +02:00
-												[daylimotion] Adapt to player v5 and modernize (Closes #6151, closes #6250)

											
										
										
											2015-07-23 18:04:47 +02:00
+								        # vevo embed
 								        vevo_id = self._search_regex(
-												[dailymotion] Fix extraction of vevo videos (fixes #4168)

											
										
										
											2014-11-12 23:32:27 +01:00
+								            r'<link rel="video_src" href="[^"]*?vevo.com[^"]*?video=(?P<id>[\w]*)',
-												[daylimotion] Adapt to player v5 and modernize (Closes #6151, closes #6250)

											
										
										
											2015-07-23 18:04:47 +02:00
+								            webpage, 'vevo embed', default=None)
 								        if vevo_id:
 								            return self.url_result('vevo:%s' % vevo_id, 'Vevo')
-												[dailymotion] Detect vevo videos (fixes #1532)

All videos from the Vevo user, just embed videos from vevo.com

											
										
										
											2013-10-01 15:05:41 +02:00
-												[daylimotion] Adapt to player v5 and modernize (Closes #6151, closes #6250)

											
										
										
											2015-07-23 18:04:47 +02:00
+								        # fallback old player
 								        embed_page = self._download_webpage_no_ff(
 								            'https://www.dailymotion.com/embed/video/%s' % video_id,
 								            video_id, 'Downloading embed page')
 								        timestamp = parse_iso8601(self._html_search_meta(
 								            'video:release_date', webpage, 'upload date'))
 								        info = self._parse_json(
 								            self._search_regex(
 								                r'var info = ({.*?}),$', embed_page,
 								                'video info', flags=re.MULTILINE),
 								            video_id)
-												Move DailyMotion into its own file

											
										
										
											2013-06-23 20:09:47 +02:00
-												[dailymotion] Report errors from player v5

											
										
										
											2015-10-17 19:26:30 +02:00
+								        self._check_error(info)
-												Dailymotion: fix the download of the video in the max quality (closes #986)

											
										
										
											2013-07-05 14:15:26 +02:00
-												[dailymotion] Extract all the available formats (closes #1028)

											
										
										
											2013-10-23 17:33:38 +02:00
+								        formats = []
 								        for (key, format_id) in self._FORMATS:
 								            video_url = info.get(key)
 								            if video_url is not None:
 								                m_size = re.search(r'H264-(\d+)x(\d+)', video_url)
 								                if m_size is not None:
-												[dailymotion] Convert width and height fields from strings to integers

											
										
										
											2014-03-04 22:24:38 +01:00
+								                    width, height = map(int_or_none, (m_size.group(1), m_size.group(2)))
-												[dailymotion] Extract all the available formats (closes #1028)

											
										
										
											2013-10-23 17:33:38 +02:00
+								                else:
 								                    width, height = None, None
 								                formats.append({
 								                    'url': video_url,
 								                    'ext': 'mp4',
 								                    'format_id': format_id,
 								                    'width': width,
 								                    'height': height,
 								                })
-												[daylimotion] Adapt to player v5 and modernize (Closes #6151, closes #6250)

											
										
										
											2015-07-23 18:04:47 +02:00
+								        self._sort_formats(formats)
-												Dailymotion: fix the download of the video in the max quality (closes #986)

											
										
										
											2013-07-05 14:15:26 +02:00
-												[dailymotion] Added support for subtitles + new InfoExtractor for
generic subtitle download.

The idea is that all subtitle downloaders must descend from SubtitlesIE
and implement only three basic methods to achieve the complete subtitle
download functionality. This will allow to reduce the code in YoutubeIE
once it is rewritten.

											
										
										
											2013-08-07 18:59:11 +02:00
+								        # subtitles
-												[subtitles] refactor to support websites with subtitle information the
webpage.

I added the parameter webpage, so now it's similar to the way automatic
captions are handled. This is an improvement needed for websites like
TED.

											
										
										
											2013-11-02 18:01:05 +01:00
+								        video_subtitles = self.extract_subtitles(video_id, webpage)
-												[dailymotion] Added support for subtitles + new InfoExtractor for
generic subtitle download.

The idea is that all subtitle downloaders must descend from SubtitlesIE
and implement only three basic methods to achieve the complete subtitle
download functionality. This will allow to reduce the code in YoutubeIE
once it is rewritten.

											
										
										
											2013-08-07 18:59:11 +02:00
-												[dailymotion] Alternative title search (Fixes #3882)

											
										
										
											2014-10-05 21:59:53 +02:00
+								        title = self._og_search_title(webpage, default=None)
 								        if title is None:
 								            title = self._html_search_regex(
 								                r'(?s)<span\s+id="video_title"[^>]*>(.*?)</span>', webpage,
 								                'title')
-												[dailymotion] Extract view count (#1895)

											
										
										
											2013-12-06 13:36:36 +01:00
-												[dailymotion] Fix support for age-restricted videos (Fixes #1688)

											
										
										
											2013-10-31 00:20:49 +01:00
+								        return {
-												[dailymotion] Alternative title search (Fixes #3882)

											
										
										
											2014-10-05 21:59:53 +02:00
+								            'id': video_id,
-												[dailymotion] Extract all the available formats (closes #1028)

											
										
										
											2013-10-23 17:33:38 +02:00
+								            'formats': formats,
-												[Dailymotion] fix uploader name (fixes #3153)

											
										
										
											2014-06-25 17:44:19 +02:00
+								            'uploader': info['owner.screenname'],
-												[daylimotion] Adapt to player v5 and modernize (Closes #6151, closes #6250)

											
										
										
											2015-07-23 18:04:47 +02:00
+								            'timestamp': timestamp,
-												[dailymotion] Alternative title search (Fixes #3882)

											
										
										
											2014-10-05 21:59:53 +02:00
+								            'title': title,
-												[daylimotion] Adapt to player v5 and modernize (Closes #6151, closes #6250)

											
										
										
											2015-07-23 18:04:47 +02:00
+								            'description': description,
-												[dailymotion] Alternative title search (Fixes #3882)

											
										
										
											2014-10-05 21:59:53 +02:00
+								            'subtitles': video_subtitles,
-												[dailymotion] Fix support for age-restricted videos (Fixes #1688)

											
										
										
											2013-10-31 00:20:49 +01:00
+								            'thumbnail': info['thumbnail_url'],
 								            'age_limit': age_limit,
-												[dailymotion] Extract view count (#1895)

											
										
										
											2013-12-06 13:36:36 +01:00
+								            'view_count': view_count,
-												[dailymotion] Extract duration (closes #6221)

											
										
										
											2015-07-14 08:33:55 +02:00
+								            'duration': info['duration']
-												[dailymotion] Fix support for age-restricted videos (Fixes #1688)

											
										
										
											2013-10-31 00:20:49 +01:00
+								        }
-												[dailymotion] Add an extractor for Dailymotion playlists

											
										
										
											2013-07-29 12:07:38 +02:00
-												[dailymotion] Report errors from player v5

											
										
										
											2015-10-17 19:26:30 +02:00
+								    def _check_error(self, info):
-												[dailymotion] Raise GeoRestrictedError

											
										
										
											2017-02-26 10:50:57 +01:00
+								        error = info.get('error')
-												[dailymotion] Report errors from player v5

											
										
										
											2015-10-17 19:26:30 +02:00
+								        if info.get('error') is not None:
-												[dailymotion] Raise GeoRestrictedError

											
										
										
											2017-02-26 10:50:57 +01:00
+								            title = error['title']
 								            # See https://developer.dailymotion.com/api#access-error
 								            if error.get('code') == 'DM007':
 								                self.raise_geo_restricted(msg=title)
-												[dailymotion] Error spelling

											
										
										
											2015-10-17 21:00:37 +02:00
+								            raise ExtractorError(
-												[dailymotion] Raise GeoRestrictedError

											
										
										
											2017-02-26 10:50:57 +01:00
+								                '%s said: %s' % (self.IE_NAME, title), expected=True)
-												[dailymotion] Report errors from player v5

											
										
										
											2015-10-17 19:26:30 +02:00
-												[dailymotion] Convert to new subtitles system

											
										
										
											2015-02-16 21:28:06 +01:00
+								    def _get_subtitles(self, video_id, webpage):
-												[subtitles] made inheritance hierarchy flat as requested

											
										
										
											2013-09-11 15:21:09 +02:00
+								        try:
-												[subtitles] Use self._download_webpage for extracting the subtitles

It raises ExtractorError for the same exceptions we have to catch.

											
										
										
											2013-09-11 16:24:47 +02:00
+								            sub_list = self._download_webpage(
 								                'https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id,
 								                video_id, note=False)
 								        except ExtractorError as err:
-												Rename error_to_str to error_to_compat_str

											
										
										
											2015-12-20 02:00:39 +01:00
+								            self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
-												[subtitles] made inheritance hierarchy flat as requested

											
										
										
											2013-09-11 15:21:09 +02:00
+								            return {}
 								        info = json.loads(sub_list)
 								        if (info['total'] > 0):
-												[dailymotion] Convert to new subtitles system

											
										
										
											2015-02-16 21:28:06 +01:00
+								            sub_lang_list = dict((l['language'], [{'url': l['url'], 'ext': 'srt'}]) for l in info['list'])
-												[subtitles] made inheritance hierarchy flat as requested

											
										
										
											2013-09-11 15:21:09 +02:00
+								            return sub_lang_list
-												Move playlist tests to extractors.

From now on, test_download will run these tests. That means we benefit not only from the networking setup in there, but also from the other tests (for example test_all_urls to find problems with _VALID_URLs).

											
										
										
											2014-08-28 00:58:24 +02:00
+								        self._downloader.report_warning('video doesn\'t have subtitles')
-												[subtitles] made inheritance hierarchy flat as requested

											
										
										
											2013-09-11 15:21:09 +02:00
+								        return {}
-												[dailymotion] Add an extractor for Dailymotion playlists

											
										
										
											2013-07-29 12:07:38 +02:00
-												[dailymotion] Disable the family filter in the playlists (fixes #1524)

											
										
										
											2013-09-29 12:44:02 +02:00
+								class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
-												Move playlist tests to extractors.

From now on, test_download will run these tests. That means we benefit not only from the networking setup in there, but also from the other tests (for example test_all_urls to find problems with _VALID_URLs).

											
										
										
											2014-08-28 00:58:24 +02:00
+								    IE_NAME = 'dailymotion:playlist'
-												[dailymotion] Add an extractor for Dailymotion playlists

											
										
										
											2013-07-29 12:07:38 +02:00
+								    _VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>.+?)/'
-												[dailymotion] Fix playlist+user

											
										
										
											2014-04-04 02:04:16 +02:00
+								    _MORE_PAGES_INDICATOR = r'(?s)<div class="pages[^"]*">.*?<a\s+class="[^"]*?icon-arrow_right[^"]*?"'
-												[dailymotion] Add an extractor for users (closes #1476)

											
										
										
											2013-09-21 12:45:53 +02:00
+								    _PAGE_TEMPLATE = 'https://www.dailymotion.com/playlist/%s/%s'
-												Move playlist tests to extractors.

From now on, test_download will run these tests. That means we benefit not only from the networking setup in there, but also from the other tests (for example test_all_urls to find problems with _VALID_URLs).

											
										
										
											2014-08-28 00:58:24 +02:00
+								    _TESTS = [{
 								        'url': 'http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q',
 								        'info_dict': {
 								            'title': 'SPORT',
-												Extend various playlist tests

											
										
										
											2015-02-18 00:49:10 +01:00
+								            'id': 'xv4bw_nqtv_sport',
-												Move playlist tests to extractors.

From now on, test_download will run these tests. That means we benefit not only from the networking setup in there, but also from the other tests (for example test_all_urls to find problems with _VALID_URLs).

											
										
										
											2014-08-28 00:58:24 +02:00
+								        },
 								        'playlist_mincount': 20,
 								    }]
-												[dailymotion] Add an extractor for Dailymotion playlists

											
										
										
											2013-07-29 12:07:38 +02:00
-												[dailymotion] Add an extractor for users (closes #1476)

											
										
										
											2013-09-21 12:45:53 +02:00
+								    def _extract_entries(self, id):
-												[dailymotion:playlist] Use an iterator for the entries

So that using '--playlist-end' only downloads the required pages (reported in #2175).

											
										
										
											2015-08-02 15:19:57 +02:00
+								        video_ids = set()
-												[dailymotion:playlist] Detect problematic redirection (fixes #6347)

											
										
										
											2015-07-24 15:29:44 +02:00
+								        processed_urls = set()
-												[dailymotion] Add an extractor for Dailymotion playlists

											
										
										
											2013-07-29 12:07:38 +02:00
+								        for pagenum in itertools.count(1):
-												[dailymotion:playlist] Detect problematic redirection (fixes #6347)

											
										
										
											2015-07-24 15:29:44 +02:00
+								            page_url = self._PAGE_TEMPLATE % (id, pagenum)
 								            webpage, urlh = self._download_webpage_handle_no_ff(
 								                page_url, id, 'Downloading page %s' % pagenum)
 								            if urlh.geturl() in processed_urls:
 								                self.report_warning('Stopped at duplicated page %s, which is the same as %s' % (
 								                    page_url, urlh.geturl()), id)
 								                break
 								            processed_urls.add(urlh.geturl())
-												[dailymotion] Add an extractor for Dailymotion playlists

											
										
										
											2013-07-29 12:07:38 +02:00
-												[dailymotion:playlist] Use an iterator for the entries

So that using '--playlist-end' only downloads the required pages (reported in #2175).

											
										
										
											2015-08-02 15:19:57 +02:00
+								            for video_id in re.findall(r'data-xid="(.+?)"', webpage):
 								                if video_id not in video_ids:
-												[dailymotion:playlist] Carry long line

											
										
										
											2016-07-29 17:47:34 +02:00
+								                    yield self.url_result(
 								                        'http://www.dailymotion.com/video/%s' % video_id,
 								                        DailymotionIE.ie_key(), video_id)
-												[dailymotion:playlist] Use an iterator for the entries

So that using '--playlist-end' only downloads the required pages (reported in #2175).

											
										
										
											2015-08-02 15:19:57 +02:00
+								                    video_ids.add(video_id)
-												[dailymotion] Add an extractor for Dailymotion playlists

											
										
										
											2013-07-29 12:07:38 +02:00
-												[dailymotion] Fix playlist+user

											
										
										
											2014-04-04 02:04:16 +02:00
+								            if re.search(self._MORE_PAGES_INDICATOR, webpage) is None:
-												[dailymotion] Add an extractor for Dailymotion playlists

											
										
										
											2013-07-29 12:07:38 +02:00
+								                break
-												[dailymotion] Add an extractor for users (closes #1476)

											
										
										
											2013-09-21 12:45:53 +02:00
 								    def _real_extract(self, url):
 								        mobj = re.match(self._VALID_URL, url)
 								        playlist_id = mobj.group('id')
 								        webpage = self._download_webpage(url, playlist_id)
-												[dailymotion:playlist] Fix title

											
										
										
											2014-04-11 02:16:46 +02:00
+								        return {
 								            '_type': 'playlist',
 								            'id': playlist_id,
 								            'title': self._og_search_title(webpage),
 								            'entries': self._extract_entries(playlist_id),
 								        }
-												[dailymotion] Add an extractor for users (closes #1476)

											
										
										
											2013-09-21 12:45:53 +02:00
 								class DailymotionUserIE(DailymotionPlaylistIE):
-												Move playlist tests to extractors.

From now on, test_download will run these tests. That means we benefit not only from the networking setup in there, but also from the other tests (for example test_all_urls to find problems with _VALID_URLs).

											
										
										
											2014-08-28 00:58:24 +02:00
+								    IE_NAME = 'dailymotion:user'
-												[dailymotion] Extend _VALID_URL

											
										
										
											2016-01-19 16:20:14 +01:00
+								    _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist)/)(?:(?:old/)?user/)?(?P<user>[^/]+)'
-												[dailymotion] Add an extractor for users (closes #1476)

											
										
										
											2013-09-21 12:45:53 +02:00
+								    _PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s'
-												Move playlist tests to extractors.

From now on, test_download will run these tests. That means we benefit not only from the networking setup in there, but also from the other tests (for example test_all_urls to find problems with _VALID_URLs).

											
										
										
											2014-08-28 00:58:24 +02:00
+								    _TESTS = [{
 								        'url': 'https://www.dailymotion.com/user/nqtv',
 								        'info_dict': {
 								            'id': 'nqtv',
 								            'title': 'Rémi Gaillard',
 								        },
 								        'playlist_mincount': 100,
-												[dailymotion:playlist] Detect problematic redirection (fixes #6347)

											
										
										
											2015-07-24 15:29:44 +02:00
+								    }, {
 								        'url': 'http://www.dailymotion.com/user/UnderProject',
 								        'info_dict': {
 								            'id': 'UnderProject',
 								            'title': 'UnderProject',
 								        },
 								        'playlist_mincount': 1800,
 								        'expected_warnings': [
 								            'Stopped at duplicated page',
 								        ],
 								        'skip': 'Takes too long time',
-												Move playlist tests to extractors.

From now on, test_download will run these tests. That means we benefit not only from the networking setup in there, but also from the other tests (for example test_all_urls to find problems with _VALID_URLs).

											
										
										
											2014-08-28 00:58:24 +02:00
+								    }]
-												[dailymotion] Add an extractor for users (closes #1476)

											
										
										
											2013-09-21 12:45:53 +02:00
 								    def _real_extract(self, url):
 								        mobj = re.match(self._VALID_URL, url)
 								        user = mobj.group('user')
-												[dailymotion:user] Process user home as user (Closes #5823)

											
										
										
											2015-05-26 18:12:26 +02:00
+								        webpage = self._download_webpage(
 								            'https://www.dailymotion.com/user/%s' % user, user)
-												[dailymotion] Fix user playlist extraction

											
										
										
											2014-04-23 14:42:34 +02:00
+								        full_user = unescapeHTML(self._html_search_regex(
 								            r'<a class="nav-image" title="([^"]+)" href="/%s">' % re.escape(user),
-												Move playlist tests to extractors.

From now on, test_download will run these tests. That means we benefit not only from the networking setup in there, but also from the other tests (for example test_all_urls to find problems with _VALID_URLs).

											
										
										
											2014-08-28 00:58:24 +02:00
+								            webpage, 'user'))
-												[dailymotion] Add an extractor for users (closes #1476)

											
										
										
											2013-09-21 12:45:53 +02:00
 								        return {
 								            '_type': 'playlist',
 								            'id': user,
 								            'title': full_user,
 								            'entries': self._extract_entries(user),
 								        }
-												[dailymotion/generic] Add DailymotionCloudIE

											
										
										
											2015-06-21 15:30:34 +02:00
 								class DailymotionCloudIE(DailymotionBaseInfoExtractor):
-												Add support for https for rest of the exctractors.

											
										
										
											2016-09-08 08:52:22 +02:00
+								    _VALID_URL_PREFIX = r'https?://api\.dmcloud\.net/(?:player/)?embed/'
-												[dailymotion:cloud] Extend _VALID_URL (Closes #6145)

											
										
										
											2015-07-03 18:47:52 +02:00
+								    _VALID_URL = r'%s[^/]+/(?P<id>[^/?]+)' % _VALID_URL_PREFIX
 								    _VALID_EMBED_URL = r'%s[^/]+/[^\'"]+' % _VALID_URL_PREFIX
-												[dailymotion/generic] Add DailymotionCloudIE

											
										
										
											2015-06-21 15:30:34 +02:00
-												[dailymotion:cloud] Extend _VALID_URL (Closes #6145)

											
										
										
											2015-07-03 18:47:52 +02:00
+								    _TESTS = [{
-												[dailymotion/generic] Add DailymotionCloudIE

											
										
										
											2015-06-21 15:30:34 +02:00
+								        # From http://www.francetvinfo.fr/economie/entreprises/les-entreprises-familiales-le-secret-de-la-reussite_933271.html
 								        # Tested at FranceTvInfo_2
 								        'url': 'http://api.dmcloud.net/embed/4e7343f894a6f677b10006b4/556e03339473995ee145930c?auth=1464865870-0-jyhsm84b-ead4c701fb750cf9367bf4447167a3db&autoplay=1',
 								        'only_matching': True,
-												[dailymotion:cloud] Extend _VALID_URL (Closes #6145)

											
										
										
											2015-07-03 18:47:52 +02:00
+								    }, {
 								        # http://www.francetvinfo.fr/societe/larguez-les-amarres-le-cobaturage-se-developpe_980101.html
 								        'url': 'http://api.dmcloud.net/player/embed/4e7343f894a6f677b10006b4/559545469473996d31429f06?auth=1467430263-0-90tglw2l-a3a4b64ed41efe48d7fccad85b8b8fda&autoplay=1',
 								        'only_matching': True,
 								    }]
-												[dailymotion/generic] Add DailymotionCloudIE

											
										
										
											2015-06-21 15:30:34 +02:00
 								    @classmethod
-												[dailymotion:cloud] Use idiomatic name for classmethod's first argument

											
										
										
											2016-02-14 08:44:23 +01:00
+								    def _extract_dmcloud_url(cls, webpage):
 								        mobj = re.search(r'<iframe[^>]+src=[\'"](%s)[\'"]' % cls._VALID_EMBED_URL, webpage)
-												[dailymotion/generic] Add DailymotionCloudIE

											
										
										
											2015-06-21 15:30:34 +02:00
+								        if mobj:
 								            return mobj.group(1)
-												[dailymotion:cloud] Extend _VALID_URL (Closes #6145)

											
										
										
											2015-07-03 18:47:52 +02:00
+								        mobj = re.search(
-												[dailymotion:cloud] Use idiomatic name for classmethod's first argument

											
										
										
											2016-02-14 08:44:23 +01:00
+								            r'<input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=[\'"](%s)[\'"]' % cls._VALID_EMBED_URL,
-												[dailymotion:cloud] Extend _VALID_URL (Closes #6145)

											
										
										
											2015-07-03 18:47:52 +02:00
+								            webpage)
-												[dailymotion/generic] Add DailymotionCloudIE

											
										
										
											2015-06-21 15:30:34 +02:00
+								        if mobj:
 								            return mobj.group(1)
 								    def _real_extract(self, url):
 								        video_id = self._match_id(url)
-												[daylimotion] Adapt to player v5 and modernize (Closes #6151, closes #6250)

											
										
										
											2015-07-23 18:04:47 +02:00
+								        webpage = self._download_webpage_no_ff(url, video_id)
-												[dailymotion/generic] Add DailymotionCloudIE

											
										
										
											2015-06-21 15:30:34 +02:00
 								        title = self._html_search_regex(r'<title>([^>]+)</title>', webpage, 'title')
 								        video_info = self._parse_json(self._search_regex(
 								            r'var\s+info\s*=\s*([^;]+);', webpage, 'video info'), video_id)
 								        # TODO: parse ios_url, which is in fact a manifest
 								        video_url = video_info['mp4_url']
 								        return {
 								            'id': video_id,
 								            'url': video_url,
 								            'title': title,
 								            'thumbnail': video_info.get('thumbnail_url'),
 								        }