youtube-dl/youtube_dl/extractor/lynda.py

from __future__ import unicode_literals

import re

from .common import InfoExtractor
from ..compat import (
    compat_str,
    compat_urlparse,
)
from ..utils import (
    ExtractorError,
    int_or_none,
    urlencode_postdata,
)


class LyndaBaseIE(InfoExtractor):
    _SIGNIN_URL = 'https://www.lynda.com/signin/lynda'
    _PASSWORD_URL = 'https://www.lynda.com/signin/password'
    _USER_URL = 'https://www.lynda.com/signin/user'
    _ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.'
    _NETRC_MACHINE = 'lynda'

    def _real_initialize(self):
        self._login()

    @staticmethod
    def _check_error(json_string, key_or_keys):
        keys = [key_or_keys] if isinstance(key_or_keys, compat_str) else key_or_keys
        for key in keys:
            error = json_string.get(key)
            if error:
                raise ExtractorError('Unable to login: %s' % error, expected=True)

    def _login_step(self, form_html, fallback_action_url, extra_form_data, note, referrer_url):
        action_url = self._search_regex(
            r'<form[^>]+action=(["\'])(?P<url>.+?)\1', form_html,
            'post url', default=fallback_action_url, group='url')

        if not action_url.startswith('http'):
            action_url = compat_urlparse.urljoin(self._SIGNIN_URL, action_url)

        form_data = self._hidden_inputs(form_html)
        form_data.update(extra_form_data)

        response = self._download_json(
            action_url, None, note,
            data=urlencode_postdata(form_data),
            headers={
                'Referer': referrer_url,
                'X-Requested-With': 'XMLHttpRequest',
            }, expected_status=(418, 500, ))

        self._check_error(response, ('email', 'password', 'ErrorMessage'))

        return response, action_url

    def _login(self):
        username, password = self._get_login_info()
        if username is None:
            return

        # Step 1: download signin page
        signin_page = self._download_webpage(
            self._SIGNIN_URL, None, 'Downloading signin page')

        # Already logged in
        if any(re.search(p, signin_page) for p in (
                r'isLoggedIn\s*:\s*true', r'logout\.aspx', r'>Log out<')):
            return

        # Step 2: submit email
        signin_form = self._search_regex(
            r'(?s)(<form[^>]+data-form-name=["\']signin["\'][^>]*>.+?</form>)',
            signin_page, 'signin form')
        signin_page, signin_url = self._login_step(
            signin_form, self._PASSWORD_URL, {'email': username},
            'Submitting email', self._SIGNIN_URL)

        # Step 3: submit password
        password_form = signin_page['body']
        self._login_step(
            password_form, self._USER_URL, {'email': username, 'password': password},
            'Submitting password', signin_url)


class LyndaIE(LyndaBaseIE):
    IE_NAME = 'lynda'
    IE_DESC = 'lynda.com videos'
    _VALID_URL = r'''(?x)
                    https?://
                        (?:www\.)?(?:lynda\.com|educourse\.ga)/
                        (?:
                            (?:[^/]+/){2,3}(?P<course_id>\d+)|
                            player/embed
                        )/
                        (?P<id>\d+)
                    '''

    _TIMECODE_REGEX = r'\[(?P<timecode>\d+:\d+:\d+[\.,]\d+)\]'

    _TESTS = [{
        'url': 'https://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html',
        # md5 is unstable
        'info_dict': {
            'id': '114408',
            'ext': 'mp4',
            'title': 'Using the exercise files',
            'duration': 68
        }
    }, {
        'url': 'https://www.lynda.com/player/embed/133770?tr=foo=1;bar=g;fizz=rt&fs=0',
        'only_matching': True,
    }, {
        'url': 'https://educourse.ga/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html',
        'only_matching': True,
    }, {
        'url': 'https://www.lynda.com/de/Graphic-Design-tutorials/Willkommen-Grundlagen-guten-Gestaltung/393570/393572-4.html',
        'only_matching': True,
    }]

    def _raise_unavailable(self, video_id):
        self.raise_login_required(
            'Video %s is only available for members' % video_id)

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        course_id = mobj.group('course_id')

        query = {
            'videoId': video_id,
            'type': 'video',
        }

        video = self._download_json(
            'https://www.lynda.com/ajax/player', video_id,
            'Downloading video JSON', fatal=False, query=query)

        # Fallback scenario
        if not video:
            query['courseId'] = course_id

            play = self._download_json(
                'https://www.lynda.com/ajax/course/%s/%s/play'
                % (course_id, video_id), video_id, 'Downloading play JSON')

            if not play:
                self._raise_unavailable(video_id)

            formats = []
            for formats_dict in play:
                urls = formats_dict.get('urls')
                if not isinstance(urls, dict):
                    continue
                cdn = formats_dict.get('name')
                for format_id, format_url in urls.items():
                    if not format_url:
                        continue
                    formats.append({
                        'url': format_url,
                        'format_id': '%s-%s' % (cdn, format_id) if cdn else format_id,
                        'height': int_or_none(format_id),
                    })
            self._sort_formats(formats)

            conviva = self._download_json(
                'https://www.lynda.com/ajax/player/conviva', video_id,
                'Downloading conviva JSON', query=query)

            return {
                'id': video_id,
                'title': conviva['VideoTitle'],
                'description': conviva.get('VideoDescription'),
                'release_year': int_or_none(conviva.get('ReleaseYear')),
                'duration': int_or_none(conviva.get('Duration')),
                'creator': conviva.get('Author'),
                'formats': formats,
            }

        if 'Status' in video:
            raise ExtractorError(
                'lynda returned error: %s' % video['Message'], expected=True)

        if video.get('HasAccess') is False:
            self._raise_unavailable(video_id)

        video_id = compat_str(video.get('ID') or video_id)
        duration = int_or_none(video.get('DurationInSeconds'))
        title = video['Title']

        formats = []

        fmts = video.get('Formats')
        if fmts:
            formats.extend([{
                'url': f['Url'],
                'ext': f.get('Extension'),
                'width': int_or_none(f.get('Width')),
                'height': int_or_none(f.get('Height')),
                'filesize': int_or_none(f.get('FileSize')),
                'format_id': compat_str(f.get('Resolution')) if f.get('Resolution') else None,
            } for f in fmts if f.get('Url')])

        prioritized_streams = video.get('PrioritizedStreams')
        if prioritized_streams:
            for prioritized_stream_id, prioritized_stream in prioritized_streams.items():
                formats.extend([{
                    'url': video_url,
                    'height': int_or_none(format_id),
                    'format_id': '%s-%s' % (prioritized_stream_id, format_id),
                } for format_id, video_url in prioritized_stream.items()])

        self._check_formats(formats, video_id)
        self._sort_formats(formats)

        subtitles = self.extract_subtitles(video_id)

        return {
            'id': video_id,
            'title': title,
            'duration': duration,
            'subtitles': subtitles,
            'formats': formats
        }

    def _fix_subtitles(self, subs):
        srt = ''
        seq_counter = 0
        for pos in range(0, len(subs) - 1):
            seq_current = subs[pos]
            m_current = re.match(self._TIMECODE_REGEX, seq_current['Timecode'])
            if m_current is None:
                continue
            seq_next = subs[pos + 1]
            m_next = re.match(self._TIMECODE_REGEX, seq_next['Timecode'])
            if m_next is None:
                continue
            appear_time = m_current.group('timecode')
            disappear_time = m_next.group('timecode')
            text = seq_current['Caption'].strip()
            if text:
                seq_counter += 1
                srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (seq_counter, appear_time, disappear_time, text)
        if srt:
            return srt

    def _get_subtitles(self, video_id):
        url = 'https://www.lynda.com/ajax/player?videoId=%s&type=transcript' % video_id
        subs = self._download_json(url, None, False)
        fixed_subs = self._fix_subtitles(subs)
        if fixed_subs:
            return {'en': [{'ext': 'srt', 'data': fixed_subs}]}
        else:
            return {}


class LyndaCourseIE(LyndaBaseIE):
    IE_NAME = 'lynda:course'
    IE_DESC = 'lynda.com online courses'

    # Course link equals to welcome/introduction video link of same course
    # We will recognize it as course link
    _VALID_URL = r'https?://(?:www|m)\.(?:lynda\.com|educourse\.ga)/(?P<coursepath>(?:[^/]+/){2,3}(?P<courseid>\d+))-2\.html'

    _TESTS = [{
        'url': 'https://www.lynda.com/Graphic-Design-tutorials/Grundlagen-guten-Gestaltung/393570-2.html',
        'only_matching': True,
    }, {
        'url': 'https://www.lynda.com/de/Graphic-Design-tutorials/Grundlagen-guten-Gestaltung/393570-2.html',
        'only_matching': True,
    }]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        course_path = mobj.group('coursepath')
        course_id = mobj.group('courseid')

        item_template = 'https://www.lynda.com/%s/%%s-4.html' % course_path

        course = self._download_json(
            'https://www.lynda.com/ajax/player?courseId=%s&type=course' % course_id,
            course_id, 'Downloading course JSON', fatal=False)

        if not course:
            webpage = self._download_webpage(url, course_id)
            entries = [
                self.url_result(
                    item_template % video_id, ie=LyndaIE.ie_key(),
                    video_id=video_id)
                for video_id in re.findall(
                    r'data-video-id=["\'](\d+)', webpage)]
            return self.playlist_result(
                entries, course_id,
                self._og_search_title(webpage, fatal=False),
                self._og_search_description(webpage))

        if course.get('Status') == 'NotFound':
            raise ExtractorError(
                'Course %s does not exist' % course_id, expected=True)

        unaccessible_videos = 0
        entries = []

        # Might want to extract videos right here from video['Formats'] as it seems 'Formats' is not provided
        # by single video API anymore

        for chapter in course['Chapters']:
            for video in chapter.get('Videos', []):
                if video.get('HasAccess') is False:
                    unaccessible_videos += 1
                    continue
                video_id = video.get('ID')
                if video_id:
                    entries.append({
                        '_type': 'url_transparent',
                        'url': item_template % video_id,
                        'ie_key': LyndaIE.ie_key(),
                        'chapter': chapter.get('Title'),
                        'chapter_number': int_or_none(chapter.get('ChapterIndex')),
                        'chapter_id': compat_str(chapter.get('ID')),
                    })

        if unaccessible_videos > 0:
            self._downloader.report_warning(
                '%s videos are only available for members (or paid members) and will not be downloaded. '
                % unaccessible_videos + self._ACCOUNT_CREDENTIALS_HINT)

        course_title = course.get('Title')
        course_description = course.get('Description')

        return self.playlist_result(entries, course_id, course_title, course_description)
[lynda] minor changes 2014-01-03 13:24:29 +01:00			`from __future__ import unicode_literals`

[lynda] Add support for lynda.com (#1966) 2013-12-26 09:48:24 +01:00			`import re`

			`from .common import InfoExtractor`
[lynda] Add support for new authentication (Closes #9740) 2016-06-10 17:40:18 +02:00			`from ..compat import (`
			`compat_str,`
			`compat_urlparse,`
			`)`
Fix imports and general cleanup · Import from compat what comes from compat. Yes, some names are available in utils too, but that's an implementation detail. · Use _match_id consistently whenever possible · Fix some outdated tests · Use consistent valid URL (always match the whole protocol, no ^ at start required) · Use modern test definitions 2014-12-13 12:24:42 +01:00			`from ..utils import (`
[lynda] Fix successful login regex and fix formats extraction (Closes #2520) 2014-03-07 00:56:48 +01:00			`ExtractorError,`
			`int_or_none,`
Use urlencode_postdata across the codebase 2016-03-25 21:19:24 +01:00			`urlencode_postdata,`
[lynda] Add support for member accounts and paid videos (Closes #2125) 2014-01-11 23:26:35 +01:00			`)`
[lynda] Add support for lynda.com (#1966) 2013-12-26 09:48:24 +01:00

[lynda] Completely skip videos we don't have access to, extract base class and modernize (Closes #5093) 2015-03-02 17:12:10 +01:00			`class LyndaBaseIE(InfoExtractor):`
[lynda] Fix authentication (closes #18158) 2018-11-29 19:20:27 +01:00			`_SIGNIN_URL = 'https://www.lynda.com/signin/lynda'`
[lynda] Add support for new authentication (Closes #9740) 2016-06-10 17:40:18 +02:00			`_PASSWORD_URL = 'https://www.lynda.com/signin/password'`
			`_USER_URL = 'https://www.lynda.com/signin/user'`
[lynda] Completely skip videos we don't have access to, extract base class and modernize (Closes #5093) 2015-03-02 17:12:10 +01:00			`_ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.'`
Make sure netrc works for all extractors with login support Fixes #5112 2015-03-03 12:59:17 +01:00			`_NETRC_MACHINE = 'lynda'`
[lynda] Completely skip videos we don't have access to, extract base class and modernize (Closes #5093) 2015-03-02 17:12:10 +01:00
			`def _real_initialize(self):`
			`self._login()`

[lynda] Add support for new authentication (Closes #9740) 2016-06-10 17:40:18 +02:00			`@staticmethod`
			`def _check_error(json_string, key_or_keys):`
			`keys = [key_or_keys] if isinstance(key_or_keys, compat_str) else key_or_keys`
			`for key in keys:`
			`error = json_string.get(key)`
			`if error:`
			`raise ExtractorError('Unable to login: %s' % error, expected=True)`

			`def _login_step(self, form_html, fallback_action_url, extra_form_data, note, referrer_url):`
			`action_url = self._search_regex(`
			`r'<form[^>]+action=(["\'])(?P<url>.+?)\1', form_html,`
			`'post url', default=fallback_action_url, group='url')`

			`if not action_url.startswith('http'):`
			`action_url = compat_urlparse.urljoin(self._SIGNIN_URL, action_url)`

			`form_data = self._hidden_inputs(form_html)`
			`form_data.update(extra_form_data)`

[lynda] Simplify login and improve error capturing (#16891) 2018-07-02 21:47:09 +02:00			`response = self._download_json(`
			`action_url, None, note,`
			`data=urlencode_postdata(form_data),`
			`headers={`
			`'Referer': referrer_url,`
			`'X-Requested-With': 'XMLHttpRequest',`
			`}, expected_status=(418, 500, ))`
[lynda] Add support for new authentication (Closes #9740) 2016-06-10 17:40:18 +02:00
[lynda] Simplify login and improve error capturing (#16891) 2018-07-02 21:47:09 +02:00			`self._check_error(response, ('email', 'password', 'ErrorMessage'))`
[lynda] Add support for new authentication (Closes #9740) 2016-06-10 17:40:18 +02:00
			`return response, action_url`

[lynda] Completely skip videos we don't have access to, extract base class and modernize (Closes #5093) 2015-03-02 17:12:10 +01:00			`def _login(self):`
[lynda] Style 2015-11-14 11:44:24 +01:00			`username, password = self._get_login_info()`
[lynda] Completely skip videos we don't have access to, extract base class and modernize (Closes #5093) 2015-03-02 17:12:10 +01:00			`if username is None:`
			`return`

[lynda] Add support for new authentication (Closes #9740) 2016-06-10 17:40:18 +02:00			`# Step 1: download signin page`
			`signin_page = self._download_webpage(`
			`self._SIGNIN_URL, None, 'Downloading signin page')`

[lynda] Skip login if already logged in 2016-06-10 18:01:52 +02:00			`# Already logged in`
			`if any(re.search(p, signin_page) for p in (`
Fix "invalid escape sequences" error on Python 3.6 2017-01-02 13:08:07 +01:00			`r'isLoggedIn\s:\strue', r'logout\.aspx', r'>Log out<')):`
[lynda] Skip login if already logged in 2016-06-10 18:01:52 +02:00			`return`

[lynda] Add support for new authentication (Closes #9740) 2016-06-10 17:40:18 +02:00			`# Step 2: submit email`
			`signin_form = self._search_regex(`
			`r'(?s)(<form[^>]+data-form-name=["\']signin["\'][^>]*>.+?</form>)',`
			`signin_page, 'signin form')`
			`signin_page, signin_url = self._login_step(`
			`signin_form, self._PASSWORD_URL, {'email': username},`
			`'Submitting email', self._SIGNIN_URL)`

			`# Step 3: submit password`
			`password_form = signin_page['body']`
			`self._login_step(`
			`password_form, self._USER_URL, {'email': username, 'password': password},`
			`'Submitting password', signin_url)`
[lynda] Completely skip videos we don't have access to, extract base class and modernize (Closes #5093) 2015-03-02 17:12:10 +01:00

			`class LyndaIE(LyndaBaseIE):`
[lynda] minor changes 2014-01-03 13:24:29 +01:00			`IE_NAME = 'lynda'`
			`IE_DESC = 'lynda.com videos'`
[lynda] Relax _VALID_URL (closes #15185) 2018-01-06 17:12:30 +01:00			`_VALID_URL = r'''(?x)`
			`https?://`
			`(?:www\.)?(?:lynda\.com\|educourse\.ga)/`
			`(?:`
			`(?:[^/]+/){2,3}(?P<course_id>\d+)\|`
			`player/embed`
			`)/`
			`(?P<id>\d+)`
			`'''`
[lynda] Add support for member accounts and paid videos (Closes #2125) 2014-01-11 23:26:35 +01:00
			`_TIMECODE_REGEX = r'\[(?P<timecode>\d+:\d+:\d+[\.,]\d+)\]'`

[lynda] Extend _VALID_URL 2015-02-27 15:56:06 +01:00			`_TESTS = [{`
[lynda] Switch to https (closes #10916) 2016-10-15 18:56:09 +02:00			`'url': 'https://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html',`
[lynda] Remove md5 from test (Closes #10047) 2016-07-09 16:29:11 +02:00			`# md5 is unstable`
[lynda] Remove useless u"" 2014-01-07 07:14:12 +01:00			`'info_dict': {`
[lynda] Modernize 2014-03-07 16:11:01 +01:00			`'id': '114408',`
			`'ext': 'mp4',`
[lynda] minor changes 2014-01-03 13:24:29 +01:00			`'title': 'Using the exercise files',`
			`'duration': 68`
[lynda] Add support for lynda.com (#1966) 2013-12-26 09:48:24 +01:00			`}`
[lynda] Extend _VALID_URL 2015-02-27 15:56:06 +01:00			`}, {`
			`'url': 'https://www.lynda.com/player/embed/133770?tr=foo=1;bar=g;fizz=rt&fs=0',`
			`'only_matching': True,`
[lynda] Add support for educourse.ga (closes #14286) 2017-09-21 18:00:35 +02:00			`}, {`
			`'url': 'https://educourse.ga/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html',`
			`'only_matching': True,`
[lynda] Relax _VALID_URL (closes #15185) 2018-01-06 17:12:30 +01:00			`}, {`
			`'url': 'https://www.lynda.com/de/Graphic-Design-tutorials/Willkommen-Grundlagen-guten-Gestaltung/393570/393572-4.html',`
			`'only_matching': True,`
[lynda] Extend _VALID_URL 2015-02-27 15:56:06 +01:00			`}]`
[lynda] Add support for member accounts and paid videos (Closes #2125) 2014-01-11 23:26:35 +01:00
[lynda] Add fallback extraction scenario 2016-10-15 19:07:40 +02:00			`def _raise_unavailable(self, video_id):`
			`self.raise_login_required(`
			`'Video %s is only available for members' % video_id)`

[lynda] Add support for lynda.com (#1966) 2013-12-26 09:48:24 +01:00			`def _real_extract(self, url):`
[lynda] Add fallback extraction scenario 2016-10-15 19:07:40 +02:00			`mobj = re.match(self._VALID_URL, url)`
			`video_id = mobj.group('id')`
			`course_id = mobj.group('course_id')`

			`query = {`
			`'videoId': video_id,`
			`'type': 'video',`
			`}`
[lynda] Add support for lynda.com (#1966) 2013-12-26 09:48:24 +01:00
[lynda] Modernize and make more robust 2015-11-06 18:24:39 +01:00			`video = self._download_json(`
[lynda] Add fallback extraction scenario 2016-10-15 19:07:40 +02:00			`'https://www.lynda.com/ajax/player', video_id,`
			`'Downloading video JSON', fatal=False, query=query)`

			`# Fallback scenario`
			`if not video:`
			`query['courseId'] = course_id`

			`play = self._download_json(`
			`'https://www.lynda.com/ajax/course/%s/%s/play'`
			`% (course_id, video_id), video_id, 'Downloading play JSON')`

			`if not play:`
			`self._raise_unavailable(video_id)`

			`formats = []`
			`for formats_dict in play:`
			`urls = formats_dict.get('urls')`
			`if not isinstance(urls, dict):`
			`continue`
			`cdn = formats_dict.get('name')`
			`for format_id, format_url in urls.items():`
			`if not format_url:`
			`continue`
			`formats.append({`
			`'url': format_url,`
			`'format_id': '%s-%s' % (cdn, format_id) if cdn else format_id,`
			`'height': int_or_none(format_id),`
			`})`
			`self._sort_formats(formats)`

			`conviva = self._download_json(`
			`'https://www.lynda.com/ajax/player/conviva', video_id,`
			`'Downloading conviva JSON', query=query)`

			`return {`
			`'id': video_id,`
			`'title': conviva['VideoTitle'],`
			`'description': conviva.get('VideoDescription'),`
			`'release_year': int_or_none(conviva.get('ReleaseYear')),`
			`'duration': int_or_none(conviva.get('Duration')),`
			`'creator': conviva.get('Author'),`
			`'formats': formats,`
			`}`
[lynda] Add support for lynda.com (#1966) 2013-12-26 09:48:24 +01:00
[lynda] Modernize and make more robust 2015-11-06 18:24:39 +01:00			`if 'Status' in video:`
[lynda] Completely skip videos we don't have access to, extract base class and modernize (Closes #5093) 2015-03-02 17:12:10 +01:00			`raise ExtractorError(`
[lynda] Modernize and make more robust 2015-11-06 18:24:39 +01:00			`'lynda returned error: %s' % video['Message'], expected=True)`
[lynda] Add support for lynda.com (#1966) 2013-12-26 09:48:24 +01:00
[lynda] Modernize and make more robust 2015-11-06 18:24:39 +01:00			`if video.get('HasAccess') is False:`
[lynda] Add fallback extraction scenario 2016-10-15 19:07:40 +02:00			`self._raise_unavailable(video_id)`
[lynda] Add support for lynda.com (#1966) 2013-12-26 09:48:24 +01:00
[lynda] Modernize and make more robust 2015-11-06 18:24:39 +01:00			`video_id = compat_str(video.get('ID') or video_id)`
			`duration = int_or_none(video.get('DurationInSeconds'))`
			`title = video['Title']`
[lynda] Add support for lynda.com (#1966) 2013-12-26 09:48:24 +01:00
[lynda] Fix successful login regex and fix formats extraction (Closes #2520) 2014-03-07 00:56:48 +01:00			`formats = []`

[lynda] Modernize and make more robust 2015-11-06 18:24:39 +01:00			`fmts = video.get('Formats')`
[lynda] Fix successful login regex and fix formats extraction (Closes #2520) 2014-03-07 00:56:48 +01:00			`if fmts:`
[lynda] Modernize and make more robust 2015-11-06 18:24:39 +01:00			`formats.extend([{`
			`'url': f['Url'],`
			`'ext': f.get('Extension'),`
			`'width': int_or_none(f.get('Width')),`
			`'height': int_or_none(f.get('Height')),`
			`'filesize': int_or_none(f.get('FileSize')),`
			`'format_id': compat_str(f.get('Resolution')) if f.get('Resolution') else None,`
			`} for f in fmts if f.get('Url')])`

			`prioritized_streams = video.get('PrioritizedStreams')`
[lynda] Fix successful login regex and fix formats extraction (Closes #2520) 2014-03-07 00:56:48 +01:00			`if prioritized_streams:`
[lynda] Extract all prioritized streams 2015-10-17 21:36:03 +02:00			`for prioritized_stream_id, prioritized_stream in prioritized_streams.items():`
[lynda] Modernize and make more robust 2015-11-06 18:24:39 +01:00			`formats.extend([{`
			`'url': video_url,`
[lynda] Fix height for prioritized streams 2016-10-15 19:08:46 +02:00			`'height': int_or_none(format_id),`
[lynda] Modernize and make more robust 2015-11-06 18:24:39 +01:00			`'format_id': '%s-%s' % (prioritized_stream_id, format_id),`
			`} for format_id, video_url in prioritized_stream.items()])`
[lynda] Add support for lynda.com (#1966) 2013-12-26 09:48:24 +01:00
[lynda] Pre-test video URLs for HTTP errors (Closes #2185, closes #4782) 2015-01-25 19:33:42 +01:00			`self._check_formats(formats, video_id)`
[lynda] Add support for lynda.com (#1966) 2013-12-26 09:48:24 +01:00			`self._sort_formats(formats)`
[lynda] Add support for member accounts and paid videos (Closes #2125) 2014-01-11 23:26:35 +01:00
[lynda] Modernize and make more robust 2015-11-06 18:24:39 +01:00			`subtitles = self.extract_subtitles(video_id)`
[lynda] Add support for member accounts and paid videos (Closes #2125) 2014-01-11 23:26:35 +01:00
[lynda] Add support for lynda.com (#1966) 2013-12-26 09:48:24 +01:00			`return {`
			`'id': video_id,`
			`'title': title,`
			`'duration': duration,`
[lynda] Add subtitles extraction 2014-01-05 17:59:33 +01:00			`'subtitles': subtitles,`
[lynda] Add support for lynda.com (#1966) 2013-12-26 09:48:24 +01:00			`'formats': formats`
			`}`
[lynda] Add support for member accounts and paid videos (Closes #2125) 2014-01-11 23:26:35 +01:00
[lynda] Convert to new subtitles system 2015-02-18 17:46:33 +01:00			`def _fix_subtitles(self, subs):`
			`srt = ''`
[lynda] Check for the empty subtitle 2015-03-02 10:49:39 +01:00			`seq_counter = 0`
[lynda] Convert to new subtitles system 2015-02-18 17:46:33 +01:00			`for pos in range(0, len(subs) - 1):`
			`seq_current = subs[pos]`
			`m_current = re.match(self._TIMECODE_REGEX, seq_current['Timecode'])`
			`if m_current is None:`
			`continue`
			`seq_next = subs[pos + 1]`
			`m_next = re.match(self._TIMECODE_REGEX, seq_next['Timecode'])`
			`if m_next is None:`
[lynda] Add subtitles extraction 2014-01-05 17:59:33 +01:00			`continue`
[lynda] Convert to new subtitles system 2015-02-18 17:46:33 +01:00			`appear_time = m_current.group('timecode')`
			`disappear_time = m_next.group('timecode')`
[lynda] Check for the empty subtitle 2015-03-02 10:49:39 +01:00			`text = seq_current['Caption'].strip()`
			`if text:`
			`seq_counter += 1`
			`srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (seq_counter, appear_time, disappear_time, text)`
[lynda] Convert to new subtitles system 2015-02-18 17:46:33 +01:00			`if srt:`
			`return srt`

[lynda] Modernize and make more robust 2015-11-06 18:24:39 +01:00			`def _get_subtitles(self, video_id):`
[lynda] Switch to https (closes #10916) 2016-10-15 18:56:09 +02:00			`url = 'https://www.lynda.com/ajax/player?videoId=%s&type=transcript' % video_id`
[lynda] Convert to new subtitles system 2015-02-18 17:46:33 +01:00			`subs = self._download_json(url, None, False)`
[lynda] Skip invalid subtitles (closes #15159) 2018-01-03 10:41:28 +01:00			`fixed_subs = self._fix_subtitles(subs)`
			`if fixed_subs:`
			`return {'en': [{'ext': 'srt', 'data': fixed_subs}]}`
[lynda] Convert to new subtitles system 2015-02-18 17:46:33 +01:00			`else:`
			`return {}`
[lynda] Add support for lynda.com (#1966) 2013-12-26 09:48:24 +01:00

[lynda] Completely skip videos we don't have access to, extract base class and modernize (Closes #5093) 2015-03-02 17:12:10 +01:00			`class LyndaCourseIE(LyndaBaseIE):`
[lynda] minor changes 2014-01-03 13:24:29 +01:00			`IE_NAME = 'lynda:course'`
			`IE_DESC = 'lynda.com online courses'`
[lynda] Add support for lynda.com (#1966) 2013-12-26 09:48:24 +01:00
			`# Course link equals to welcome/introduction video link of same course`
			`# We will recognize it as course link`
[lynda] Relax _VALID_URL (closes #15185) 2018-01-06 17:12:30 +01:00			`_VALID_URL = r'https?://(?:www\|m)\.(?:lynda\.com\|educourse\.ga)/(?P<coursepath>(?:[^/]+/){2,3}(?P<courseid>\d+))-2\.html'`

			`_TESTS = [{`
			`'url': 'https://www.lynda.com/Graphic-Design-tutorials/Grundlagen-guten-Gestaltung/393570-2.html',`
			`'only_matching': True,`
			`}, {`
			`'url': 'https://www.lynda.com/de/Graphic-Design-tutorials/Grundlagen-guten-Gestaltung/393570-2.html',`
			`'only_matching': True,`
			`}]`
[lynda] Add support for lynda.com (#1966) 2013-12-26 09:48:24 +01:00
			`def _real_extract(self, url):`
			`mobj = re.match(self._VALID_URL, url)`
			`course_path = mobj.group('coursepath')`
			`course_id = mobj.group('courseid')`
PEP8 applied 2014-11-23 20:41:03 +01:00
[lynda:course] Add webpage extraction fallback (closes #12238) 2017-02-23 23:01:31 +01:00			`item_template = 'https://www.lynda.com/%s/%%s-4.html' % course_path`

[lynda:course] Modernize and make more robust 2015-11-06 18:10:07 +01:00			`course = self._download_json(`
[lynda] Switch to https (closes #10916) 2016-10-15 18:56:09 +02:00			`'https://www.lynda.com/ajax/player?courseId=%s&type=course' % course_id,`
[lynda:course] Add webpage extraction fallback (closes #12238) 2017-02-23 23:01:31 +01:00			`course_id, 'Downloading course JSON', fatal=False)`

			`if not course:`
			`webpage = self._download_webpage(url, course_id)`
			`entries = [`
			`self.url_result(`
			`item_template % video_id, ie=LyndaIE.ie_key(),`
			`video_id=video_id)`
			`for video_id in re.findall(`
			`r'data-video-id=["\'](\d+)', webpage)]`
			`return self.playlist_result(`
			`entries, course_id,`
			`self._og_search_title(webpage, fatal=False),`
			`self._og_search_description(webpage))`
[lynda] Add support for lynda.com (#1966) 2013-12-26 09:48:24 +01:00
[lynda:course] Modernize and make more robust 2015-11-06 18:10:07 +01:00			`if course.get('Status') == 'NotFound':`
[lynda] Completely skip videos we don't have access to, extract base class and modernize (Closes #5093) 2015-03-02 17:12:10 +01:00			`raise ExtractorError(`
			`'Course %s does not exist' % course_id, expected=True)`
[lynda] Add support for lynda.com (#1966) 2013-12-26 09:48:24 +01:00
			`unaccessible_videos = 0`
[lynda] Extract chapter metadata (#8993) 2016-03-26 21:00:36 +01:00			`entries = []`
[lynda] Add support for lynda.com (#1966) 2013-12-26 09:48:24 +01:00
[lynda] Fix successful login regex and fix formats extraction (Closes #2520) 2014-03-07 00:56:48 +01:00			`# Might want to extract videos right here from video['Formats'] as it seems 'Formats' is not provided`
			`# by single video API anymore`

[lynda:course] Modernize and make more robust 2015-11-06 18:10:07 +01:00			`for chapter in course['Chapters']:`
			`for video in chapter.get('Videos', []):`
			`if video.get('HasAccess') is False:`
[lynda] Add support for lynda.com (#1966) 2013-12-26 09:48:24 +01:00			`unaccessible_videos += 1`
			`continue`
[lynda] Extract chapter metadata (#8993) 2016-03-26 21:00:36 +01:00			`video_id = video.get('ID')`
			`if video_id:`
			`entries.append({`
			`'_type': 'url_transparent',`
[lynda:course] Add webpage extraction fallback (closes #12238) 2017-02-23 23:01:31 +01:00			`'url': item_template % video_id,`
[lynda] Extract chapter metadata (#8993) 2016-03-26 21:00:36 +01:00			`'ie_key': LyndaIE.ie_key(),`
			`'chapter': chapter.get('Title'),`
			`'chapter_number': int_or_none(chapter.get('ChapterIndex')),`
			`'chapter_id': compat_str(chapter.get('ID')),`
			`})`
[lynda] Add support for lynda.com (#1966) 2013-12-26 09:48:24 +01:00
			`if unaccessible_videos > 0:`
[lynda] Completely skip videos we don't have access to, extract base class and modernize (Closes #5093) 2015-03-02 17:12:10 +01:00			`self._downloader.report_warning(`
			`'%s videos are only available for members (or paid members) and will not be downloaded. '`
			`% unaccessible_videos + self._ACCOUNT_CREDENTIALS_HINT)`
[lynda] Add support for lynda.com (#1966) 2013-12-26 09:48:24 +01:00
[lynda:course] Modernize and make more robust 2015-11-06 18:10:07 +01:00			`course_title = course.get('Title')`
[Lynda] Extract course description 2016-06-10 19:17:58 +02:00			`course_description = course.get('Description')`
[lynda] Add support for lynda.com (#1966) 2013-12-26 09:48:24 +01:00
[Lynda] Extract course description 2016-06-10 19:17:58 +02:00			`return self.playlist_result(entries, course_id, course_title, course_description)`