mirror of
https://codeberg.org/polarisfm/youtube-dl
synced 2024-11-30 04:08:01 +01:00
This commit is contained in:
parent
2dbc0967f2
commit
04a93ac37a
@ -28,6 +28,7 @@ from youtube_dl.extractor import (
|
|||||||
RTVEALaCartaIE,
|
RTVEALaCartaIE,
|
||||||
FunnyOrDieIE,
|
FunnyOrDieIE,
|
||||||
DemocracynowIE,
|
DemocracynowIE,
|
||||||
|
LinkedInLearningIE,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -219,6 +220,18 @@ class TestLyndaSubtitles(BaseTestSubtitles):
|
|||||||
self.assertEqual(md5(subtitles['en']), '09bbe67222259bed60deaa26997d73a7')
|
self.assertEqual(md5(subtitles['en']), '09bbe67222259bed60deaa26997d73a7')
|
||||||
|
|
||||||
|
|
||||||
|
class TestLinkedInSubtitles(BaseTestSubtitles):
|
||||||
|
url = 'https://www.linkedin.com/learning/programming-foundations-fundamentals/welcome?autoplay=true'
|
||||||
|
IE = LinkedInLearningIE
|
||||||
|
|
||||||
|
def test_allsubtitles(self):
|
||||||
|
self.DL.params['writesubtitles'] = True
|
||||||
|
self.DL.params['allsubtitles'] = True
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertEqual(set(subtitles.keys()), set(['en']))
|
||||||
|
self.assertEqual(md5(subtitles['en']), 'b329730e94e7fbdbac0307b3cad1221a')
|
||||||
|
|
||||||
|
|
||||||
class TestNPOSubtitles(BaseTestSubtitles):
|
class TestNPOSubtitles(BaseTestSubtitles):
|
||||||
url = 'http://www.npo.nl/nos-journaal/28-08-2014/POW_00722860'
|
url = 'http://www.npo.nl/nos-journaal/28-08-2014/POW_00722860'
|
||||||
IE = NPOIE
|
IE = NPOIE
|
||||||
|
@ -8,6 +8,7 @@ from ..utils import (
|
|||||||
ExtractorError,
|
ExtractorError,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
srt_subtitles_timecode,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
urljoin,
|
urljoin,
|
||||||
)
|
)
|
||||||
@ -31,10 +32,16 @@ class LinkedInLearningBaseIE(InfoExtractor):
|
|||||||
})
|
})
|
||||||
sub = ' %dp' % resolution
|
sub = ' %dp' % resolution
|
||||||
api_url = 'https://www.linkedin.com/learning-api/detailedCourses'
|
api_url = 'https://www.linkedin.com/learning-api/detailedCourses'
|
||||||
|
cookies = self._get_cookies(api_url)
|
||||||
|
|
||||||
|
headers = {}
|
||||||
|
if 'JSESSIONID' in cookies:
|
||||||
|
headers['Csrf-Token'] = cookies['JSESSIONID'].value
|
||||||
|
|
||||||
return self._download_json(
|
return self._download_json(
|
||||||
api_url, video_slug, 'Downloading%s JSON metadata' % sub, headers={
|
api_url, video_slug, 'Downloading%s JSON metadata' % sub,
|
||||||
'Csrf-Token': self._get_cookies(api_url)['JSESSIONID'].value,
|
headers=headers,
|
||||||
}, query=query)['elements'][0]
|
query=query)['elements'][0]
|
||||||
|
|
||||||
def _get_urn_id(self, video_data):
|
def _get_urn_id(self, video_data):
|
||||||
urn = video_data.get('urn')
|
urn = video_data.get('urn')
|
||||||
@ -47,12 +54,14 @@ class LinkedInLearningBaseIE(InfoExtractor):
|
|||||||
return self._get_urn_id(video_data) or '%s/%s' % (course_slug, video_slug)
|
return self._get_urn_id(video_data) or '%s/%s' % (course_slug, video_slug)
|
||||||
|
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
|
# We need the JSESSIONID from the login page, even if we're not logging in
|
||||||
|
login_page = self._download_webpage(
|
||||||
|
self._LOGIN_URL, None, 'Downloading login page')
|
||||||
|
|
||||||
email, password = self._get_login_info()
|
email, password = self._get_login_info()
|
||||||
if email is None:
|
if email is None:
|
||||||
return
|
return
|
||||||
|
|
||||||
login_page = self._download_webpage(
|
|
||||||
self._LOGIN_URL, None, 'Downloading login page')
|
|
||||||
action_url = urljoin(self._LOGIN_URL, self._search_regex(
|
action_url = urljoin(self._LOGIN_URL, self._search_regex(
|
||||||
r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page, 'post url',
|
r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page, 'post url',
|
||||||
default='https://www.linkedin.com/uas/login-submit', group='url'))
|
default='https://www.linkedin.com/uas/login-submit', group='url'))
|
||||||
@ -126,6 +135,8 @@ class LinkedInLearningIE(LinkedInLearningBaseIE):
|
|||||||
|
|
||||||
self._sort_formats(formats, ('width', 'height', 'source_preference', 'tbr', 'abr'))
|
self._sort_formats(formats, ('width', 'height', 'source_preference', 'tbr', 'abr'))
|
||||||
|
|
||||||
|
subtitles = self.extract_subtitles(video_data)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': self._get_video_id(video_data, course_slug, video_slug),
|
'id': self._get_video_id(video_data, course_slug, video_slug),
|
||||||
'title': title,
|
'title': title,
|
||||||
@ -133,8 +144,40 @@ class LinkedInLearningIE(LinkedInLearningBaseIE):
|
|||||||
'thumbnail': video_data.get('defaultThumbnail'),
|
'thumbnail': video_data.get('defaultThumbnail'),
|
||||||
'timestamp': float_or_none(video_data.get('publishedOn'), 1000),
|
'timestamp': float_or_none(video_data.get('publishedOn'), 1000),
|
||||||
'duration': int_or_none(video_data.get('durationInSeconds')),
|
'duration': int_or_none(video_data.get('durationInSeconds')),
|
||||||
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def _get_subtitles(self, video_data):
|
||||||
|
transcript = video_data.get('transcript')
|
||||||
|
if not transcript:
|
||||||
|
return {}
|
||||||
|
lines = transcript.get('lines')
|
||||||
|
if not lines:
|
||||||
|
return {}
|
||||||
|
fixed_subs = self._fix_subtitles(lines)
|
||||||
|
if fixed_subs:
|
||||||
|
return {'en': [{'ext': 'srt', 'data': fixed_subs}]}
|
||||||
|
return {}
|
||||||
|
|
||||||
|
def _fix_subtitles(self, lines):
|
||||||
|
srt = ''
|
||||||
|
seq_counter = 0
|
||||||
|
for pos in range(0, len(lines) - 1):
|
||||||
|
seq_current = lines[pos]
|
||||||
|
seq_next = lines[pos + 1]
|
||||||
|
|
||||||
|
appear_time = self._timecode(seq_current['transcriptStartAt'])
|
||||||
|
disappear_time = self._timecode(seq_next['transcriptStartAt'])
|
||||||
|
text = seq_current['caption'].strip()
|
||||||
|
|
||||||
|
if text:
|
||||||
|
seq_counter += 1
|
||||||
|
srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (seq_counter, appear_time, disappear_time, text)
|
||||||
|
return srt
|
||||||
|
|
||||||
|
def _timecode(self, ms):
|
||||||
|
return srt_subtitles_timecode(ms / 1000.0)
|
||||||
|
|
||||||
|
|
||||||
class LinkedInLearningCourseIE(LinkedInLearningBaseIE):
|
class LinkedInLearningCourseIE(LinkedInLearningBaseIE):
|
||||||
IE_NAME = 'linkedin:learning:course'
|
IE_NAME = 'linkedin:learning:course'
|
||||||
|
Loading…
Reference in New Issue
Block a user