From 7d30dd3cee5ebb4d689a149ef7f57d1c2092dace Mon Sep 17 00:00:00 2001 From: Kevin Mark Date: Wed, 21 Jun 2017 01:45:13 -0400 Subject: [PATCH 01/12] [Panopto] Add Panopto extractors No test cases are included as I am not aware of any publicly available Panopto recordings that this extractor will work with. Supports downloading individual recordings or entire folders recursively. Folders are seperated with a ' -- ' in the playlist title. Cookies are likely required to use this extractor specifically their .ASPXAUTH cookie which can be obtained from your browser after logging in. --write-all-thumbnails can be used to download PowerPoint slides if they are not included as a video stream. Suggested output format is 'out/%(playlist)s/%(title)s.%(ext)s' --- youtube_dl/extractor/extractors.py | 4 + youtube_dl/extractor/panopto.py | 200 +++++++++++++++++++++++++++++ 2 files changed, 204 insertions(+) create mode 100644 youtube_dl/extractor/panopto.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index e97691daa..a0ac7cff1 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -756,6 +756,10 @@ from .packtpub import ( ) from .pandatv import PandaTVIE from .pandoratv import PandoraTVIE +from .panopto import ( + PanoptoIE, + PanoptoFolderIE, +) from .parliamentliveuk import ParliamentLiveUKIE from .patreon import PatreonIE from .pbs import PBSIE diff --git a/youtube_dl/extractor/panopto.py b/youtube_dl/extractor/panopto.py new file mode 100644 index 000000000..56d65805f --- /dev/null +++ b/youtube_dl/extractor/panopto.py @@ -0,0 +1,200 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + +from ..compat import compat_str + +from ..utils import ( + ExtractorError, + smuggle_url, + unsmuggle_url, +) + +import re +from random import random +import json + + +class PanoptoBaseIE(InfoExtractor): + + @classmethod + def _match_organization(cls, url): + if '_VALID_URL_RE' not in cls.__dict__: + cls._VALID_URL_RE = re.compile(cls._VALID_URL) + m = cls._VALID_URL_RE.match(url) + assert m + return compat_str(m.group('org')) + + +class PanoptoIE(PanoptoBaseIE): + + _VALID_URL = r'^https?:\/\/(?P[a-z0-9]+)\.hosted\.panopto.com\/Panopto\/Pages\/Viewer\.aspx\?id=(?P[a-f0-9-]+)' + + def _get_contribs_str(self, contribs): + s = '' + for c in contribs: + s += '%s, ' % c['DisplayName'] + return s[:-2] if len(contribs) else '' + + def _real_extract(self, url): + video_id = self._match_id(url) + org = self._match_organization(url) + + delivery_info = self._download_json( + 'https://%s.hosted.panopto.com/Panopto/Pages/Viewer/DeliveryInfo.aspx' % org, + video_id, + query={ + 'deliveryId': video_id, + 'invocationId': '', + 'isLiveNotes': 'false', + 'refreshAuthCookie': 'true', + 'isActiveBroadcast': 'false', + 'isEditing': 'false', + 'isKollectiveAgentInstalled': 'false', + 'isEmbed': 'false', + 'responseType': 'json', + } + ) + + if 'ErrorCode' in delivery_info: + self._downloader.report_warning("If the video you are trying to download requires you to sign-in, you will " + "need to provide a cookies file that allows the downloader to authenticate " + "with Panopto. If the error below is about unauthorized access, this is " + "most likely the issue.") + raise ExtractorError( + 'API error: (%s) %s' % + (delivery_info['ErrorCode'], delivery_info['ErrorMessage'] if 'ErrorMessage' in delivery_info else '') + ) + + streams = [] + for this_stream in delivery_info['Delivery']['Streams']: + new_stream = { + 'id': this_stream['PublicID'], + 'title': this_stream['Tag'], + 'formats': [], + } + if 'StreamUrl' in this_stream: + new_stream['formats'].append({ + 'url': this_stream['StreamUrl'], + }) + if 'StreamHttpUrl' in this_stream: + new_stream['formats'].append({ + 'url': this_stream['StreamHttpUrl'], + }) + if len(new_stream['formats']): + streams.append(new_stream) + + if not streams: + raise ExtractorError('No streams found.') + + result = { + 'id': video_id, + 'title': delivery_info['Delivery']['SessionName'], + 'thumbnail': 'https://%s.hosted.panopto.com/Panopto/Services/FrameGrabber.svc/FrameRedirect?objectId=%s&mode=Delivery&random=%s' % + (org, video_id, random()), + } + + if len(streams) == 1: + result['formats'] = streams[0]['formats'] + else: + result['_type'] = 'multi_video' + result['entries'] = streams + + if 'Contributors' in delivery_info['Delivery']: + result['uploader'] = self._get_contribs_str(delivery_info['Delivery']['Contributors']) + + if 'SessionStartTime' in delivery_info['Delivery']: + result['timestamp'] = delivery_info['Delivery']['SessionStartTime'] - 11640000000 + + if 'Duration' in delivery_info['Delivery']: + result['duration'] = delivery_info['Delivery']['Duration'] + + thumbnails = [] + if 'Timestamps' in delivery_info['Delivery']: + for timestamp in delivery_info['Delivery']['Timestamps']: + thumbnails.append({ + # 'url': 'https://%s.hosted.panopto.com/Panopto/Pages/Viewer/Thumb.aspx?eventTargetPID=%s&sessionPID=%s&number=%s&isPrimary=false&absoluteTime=%s' % + # (org, timestamp['ObjectPublicIdentifier'], timestamp['SessionID'], timestamp['ObjectSequenceNumber'], timestamp['AbsoluteTime']), + 'url': 'https://%s.hosted.panopto.com/Panopto/Pages/Viewer/Image.aspx?id=%s&number=%s&x=undefined' % + (org, timestamp['ObjectIdentifier'], timestamp['ObjectSequenceNumber']) + }) + + if len(thumbnails): + if result.get('entries') is not None: + result['entries'][1]['thumbnails'] = thumbnails + else: + result['thumbnails'] = thumbnails + + return result + + +class PanoptoFolderIE(PanoptoBaseIE): + _VALID_URL = r'^https?:\/\/(?P[a-z0-9]+)\.hosted\.panopto.com\/Panopto\/Pages\/Sessions\/List\.aspx#folderID=(?:"|%22)(?P[a-f0-9-]+)' + + def _real_extract(self, url): + url, smuggled = unsmuggle_url(url) + if smuggled is None: + smuggled = {} + folder_id = self._match_id(url) + org = self._match_organization(url) + + folder_data = self._download_json( + 'https://%s.hosted.panopto.com/Panopto/Services/Data.svc/GetSessions' % org, + folder_id, + 'Downloading folder listing', + 'Failed to download folder listing', + data=json.dumps({ + 'queryParameters': { + 'query': None, + 'sortColumn': 1, + 'sortAscending': False, + 'maxResults': 10000, + 'page': 0, + 'startDate': None, + 'endDate': None, + 'folderID': folder_id, + 'bookmarked': False, + 'getFolderData': True, + 'isSharedWithMe': False, + }, + }, ensure_ascii=False).encode('utf-8'), + headers={'Content-Type': 'application/json'})['d'] + + entries = [] + if 'Results' in folder_data and folder_data['Results'] is not None: + for video in folder_data['Results']: + new_video = { + 'id': video['DeliveryID'], + 'title': video['SessionName'], + 'url': video['ViewerUrl'], + '_type': 'url_transparent', + 'ie_key': 'Panopto', + } + if 'prev_folders' in smuggled: + new_video['title'] = smuggled['prev_folders'] + ' -- ' + new_video['title'] + entries.append(new_video) + + if 'Subfolders' in folder_data and folder_data['Subfolders'] is not None: + for subfolder in folder_data['Subfolders']: + new_folder = { + 'id': subfolder['ID'], + 'title': subfolder['Name'], + '_type': 'url_transparent', + 'ie_key': 'PanoptoFolder', + } + if 'prev_folders' in smuggled: + new_folder['title'] = smuggled['prev_folders'] + ' -- ' + new_folder['title'] + new_folder['url'] = smuggle_url('https://%s.hosted.panopto.com/Panopto/Pages/Sessions/List.aspx#folderID="%s"' % + (org, subfolder['ID']), {'prev_folders': new_folder['title']}) + entries.append(new_folder) + + if not entries: + raise ExtractorError('Folder is empty or authentication failed') + + return { + 'id': folder_id, + 'title': folder_data['Results'][0]['FolderName'] if len(folder_data['Results']) else folder_data['Subfolders'][0]['ParentFolderName'], + '_type': 'playlist', + 'entries': entries, + } From cbb1753395979dbde1f7488f1bec76de0fc6b6d3 Mon Sep 17 00:00:00 2001 From: Kevin Mark Date: Thu, 6 Jul 2017 20:11:49 -0400 Subject: [PATCH 02/12] [Panopto] Document classes wrt PEP 257 --- youtube_dl/extractor/panopto.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/extractor/panopto.py b/youtube_dl/extractor/panopto.py index 56d65805f..0165f292c 100644 --- a/youtube_dl/extractor/panopto.py +++ b/youtube_dl/extractor/panopto.py @@ -17,6 +17,7 @@ import json class PanoptoBaseIE(InfoExtractor): + """The base class with common methods for Panopto extractors.""" @classmethod def _match_organization(cls, url): @@ -28,6 +29,7 @@ class PanoptoBaseIE(InfoExtractor): class PanoptoIE(PanoptoBaseIE): + """Extracts a single Panopto video including all available streams.""" _VALID_URL = r'^https?:\/\/(?P[a-z0-9]+)\.hosted\.panopto.com\/Panopto\/Pages\/Viewer\.aspx\?id=(?P[a-f0-9-]+)' @@ -130,6 +132,8 @@ class PanoptoIE(PanoptoBaseIE): class PanoptoFolderIE(PanoptoBaseIE): + """Recursively extracts a folder of Panopto videos, digging as far as possible into subfolders.""" + _VALID_URL = r'^https?:\/\/(?P[a-z0-9]+)\.hosted\.panopto.com\/Panopto\/Pages\/Sessions\/List\.aspx#folderID=(?:"|%22)(?P[a-f0-9-]+)' def _real_extract(self, url): From bd7d8149d7a7f68c0e021267b132fcfa1cfecd68 Mon Sep 17 00:00:00 2001 From: Kevin Mark Date: Thu, 6 Jul 2017 20:16:33 -0400 Subject: [PATCH 03/12] [Panopto] Preferring static over instance methods --- youtube_dl/extractor/panopto.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/panopto.py b/youtube_dl/extractor/panopto.py index 0165f292c..2b283dafb 100644 --- a/youtube_dl/extractor/panopto.py +++ b/youtube_dl/extractor/panopto.py @@ -33,7 +33,8 @@ class PanoptoIE(PanoptoBaseIE): _VALID_URL = r'^https?:\/\/(?P[a-z0-9]+)\.hosted\.panopto.com\/Panopto\/Pages\/Viewer\.aspx\?id=(?P[a-f0-9-]+)' - def _get_contribs_str(self, contribs): + @staticmethod + def _get_contribs_str(contribs): s = '' for c in contribs: s += '%s, ' % c['DisplayName'] From 0807e39d85ce0bbfc5d8fb665897267efc9a408b Mon Sep 17 00:00:00 2001 From: Kevin Mark Date: Thu, 6 Jul 2017 20:39:39 -0400 Subject: [PATCH 04/12] [Panopto] Replace % with format() wrt PEP 3101 --- youtube_dl/extractor/panopto.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/panopto.py b/youtube_dl/extractor/panopto.py index 2b283dafb..21b8bfdb6 100644 --- a/youtube_dl/extractor/panopto.py +++ b/youtube_dl/extractor/panopto.py @@ -37,7 +37,7 @@ class PanoptoIE(PanoptoBaseIE): def _get_contribs_str(contribs): s = '' for c in contribs: - s += '%s, ' % c['DisplayName'] + s += '{}, ' .format(c['DisplayName']) return s[:-2] if len(contribs) else '' def _real_extract(self, url): @@ -45,7 +45,7 @@ class PanoptoIE(PanoptoBaseIE): org = self._match_organization(url) delivery_info = self._download_json( - 'https://%s.hosted.panopto.com/Panopto/Pages/Viewer/DeliveryInfo.aspx' % org, + 'https://{}.hosted.panopto.com/Panopto/Pages/Viewer/DeliveryInfo.aspx'.format(org), video_id, query={ 'deliveryId': video_id, @@ -66,8 +66,8 @@ class PanoptoIE(PanoptoBaseIE): "with Panopto. If the error below is about unauthorized access, this is " "most likely the issue.") raise ExtractorError( - 'API error: (%s) %s' % - (delivery_info['ErrorCode'], delivery_info['ErrorMessage'] if 'ErrorMessage' in delivery_info else '') + 'API error: ({}) {}'.format(delivery_info['ErrorCode'], + delivery_info['ErrorMessage'] if 'ErrorMessage' in delivery_info else '') ) streams = [] @@ -94,8 +94,8 @@ class PanoptoIE(PanoptoBaseIE): result = { 'id': video_id, 'title': delivery_info['Delivery']['SessionName'], - 'thumbnail': 'https://%s.hosted.panopto.com/Panopto/Services/FrameGrabber.svc/FrameRedirect?objectId=%s&mode=Delivery&random=%s' % - (org, video_id, random()), + 'thumbnail': 'https://{}.hosted.panopto.com/Panopto/Services/FrameGrabber.svc/FrameRedirect?objectId={}&mode=Delivery&random={}'.format( + org, video_id, random()), } if len(streams) == 1: @@ -117,10 +117,10 @@ class PanoptoIE(PanoptoBaseIE): if 'Timestamps' in delivery_info['Delivery']: for timestamp in delivery_info['Delivery']['Timestamps']: thumbnails.append({ - # 'url': 'https://%s.hosted.panopto.com/Panopto/Pages/Viewer/Thumb.aspx?eventTargetPID=%s&sessionPID=%s&number=%s&isPrimary=false&absoluteTime=%s' % - # (org, timestamp['ObjectPublicIdentifier'], timestamp['SessionID'], timestamp['ObjectSequenceNumber'], timestamp['AbsoluteTime']), - 'url': 'https://%s.hosted.panopto.com/Panopto/Pages/Viewer/Image.aspx?id=%s&number=%s&x=undefined' % - (org, timestamp['ObjectIdentifier'], timestamp['ObjectSequenceNumber']) + # 'url': 'https://{}.hosted.panopto.com/Panopto/Pages/Viewer/Thumb.aspx?eventTargetPID={}&sessionPID={}&number={}&isPrimary=false&absoluteTime={}'.format( + # org, timestamp['ObjectPublicIdentifier'], timestamp['SessionID'], timestamp['ObjectSequenceNumber'], timestamp['AbsoluteTime']), + 'url': 'https://{}.hosted.panopto.com/Panopto/Pages/Viewer/Image.aspx?id={}&number={}&x=undefined'.format( + org, timestamp['ObjectIdentifier'], timestamp['ObjectSequenceNumber']) }) if len(thumbnails): @@ -145,7 +145,7 @@ class PanoptoFolderIE(PanoptoBaseIE): org = self._match_organization(url) folder_data = self._download_json( - 'https://%s.hosted.panopto.com/Panopto/Services/Data.svc/GetSessions' % org, + 'https://{}.hosted.panopto.com/Panopto/Services/Data.svc/GetSessions'.format(org), folder_id, 'Downloading folder listing', 'Failed to download folder listing', @@ -190,8 +190,8 @@ class PanoptoFolderIE(PanoptoBaseIE): } if 'prev_folders' in smuggled: new_folder['title'] = smuggled['prev_folders'] + ' -- ' + new_folder['title'] - new_folder['url'] = smuggle_url('https://%s.hosted.panopto.com/Panopto/Pages/Sessions/List.aspx#folderID="%s"' % - (org, subfolder['ID']), {'prev_folders': new_folder['title']}) + new_folder['url'] = smuggle_url('https://{}.hosted.panopto.com/Panopto/Pages/Sessions/List.aspx#folderID="{}"' + .format(org, subfolder['ID']), {'prev_folders': new_folder['title']}) entries.append(new_folder) if not entries: From a29e634cfeb820c9dfdb79f069383f6daec14173 Mon Sep 17 00:00:00 2001 From: Kevin Mark Date: Thu, 6 Jul 2017 20:47:41 -0400 Subject: [PATCH 05/12] [Panopto] Document methods wrt PEP 257 --- youtube_dl/extractor/panopto.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/extractor/panopto.py b/youtube_dl/extractor/panopto.py index 21b8bfdb6..d2a4c6c88 100644 --- a/youtube_dl/extractor/panopto.py +++ b/youtube_dl/extractor/panopto.py @@ -21,6 +21,7 @@ class PanoptoBaseIE(InfoExtractor): @classmethod def _match_organization(cls, url): + """Match and return the organization part of a Panopto hosted URL.""" if '_VALID_URL_RE' not in cls.__dict__: cls._VALID_URL_RE = re.compile(cls._VALID_URL) m = cls._VALID_URL_RE.match(url) @@ -35,12 +36,14 @@ class PanoptoIE(PanoptoBaseIE): @staticmethod def _get_contribs_str(contribs): + """Returns a comma-delimited string of contributors.""" s = '' for c in contribs: s += '{}, ' .format(c['DisplayName']) return s[:-2] if len(contribs) else '' def _real_extract(self, url): + """Extracts the video and stream information for the given Panopto hosted URL.""" video_id = self._match_id(url) org = self._match_organization(url) @@ -138,6 +141,7 @@ class PanoptoFolderIE(PanoptoBaseIE): _VALID_URL = r'^https?:\/\/(?P[a-z0-9]+)\.hosted\.panopto.com\/Panopto\/Pages\/Sessions\/List\.aspx#folderID=(?:"|%22)(?P[a-f0-9-]+)' def _real_extract(self, url): + """Recursively extracts the video and stream information for the given Panopto hosted URL.""" url, smuggled = unsmuggle_url(url) if smuggled is None: smuggled = {} From fed0a6a9392e3436132314121ab7e07e778459b0 Mon Sep 17 00:00:00 2001 From: Kevin Mark Date: Thu, 6 Jul 2017 21:32:17 -0400 Subject: [PATCH 06/12] [Panopto] Bring the code in line with conventions --- youtube_dl/extractor/panopto.py | 46 ++++++++++++++++++++++----------- 1 file changed, 31 insertions(+), 15 deletions(-) diff --git a/youtube_dl/extractor/panopto.py b/youtube_dl/extractor/panopto.py index d2a4c6c88..8fbdd3987 100644 --- a/youtube_dl/extractor/panopto.py +++ b/youtube_dl/extractor/panopto.py @@ -39,7 +39,9 @@ class PanoptoIE(PanoptoBaseIE): """Returns a comma-delimited string of contributors.""" s = '' for c in contribs: - s += '{}, ' .format(c['DisplayName']) + display_name = c.get('DisplayName') + if display_name is not None: + s += '{}, '.format(display_name) return s[:-2] if len(contribs) else '' def _real_extract(self, url): @@ -69,8 +71,7 @@ class PanoptoIE(PanoptoBaseIE): "with Panopto. If the error below is about unauthorized access, this is " "most likely the issue.") raise ExtractorError( - 'API error: ({}) {}'.format(delivery_info['ErrorCode'], - delivery_info['ErrorMessage'] if 'ErrorMessage' in delivery_info else '') + 'API error: ({}) {}'.format(delivery_info.get('ErrorCode', '?'), delivery_info.get('ErrorMessage', '?')) ) streams = [] @@ -107,24 +108,39 @@ class PanoptoIE(PanoptoBaseIE): result['_type'] = 'multi_video' result['entries'] = streams - if 'Contributors' in delivery_info['Delivery']: - result['uploader'] = self._get_contribs_str(delivery_info['Delivery']['Contributors']) + # We already know Delivery exists since we need it for stream extraction + contributors = delivery_info['Delivery'].get('Contributors') + if contributors is not None: + result['uploader'] = self._get_contribs_str(contributors) - if 'SessionStartTime' in delivery_info['Delivery']: - result['timestamp'] = delivery_info['Delivery']['SessionStartTime'] - 11640000000 + session_start_time = delivery_info['Delivery'].get('SessionStartTime') + if session_start_time is not None: + result['timestamp'] = session_start_time - 11640000000 - if 'Duration' in delivery_info['Delivery']: - result['duration'] = delivery_info['Delivery']['Duration'] + duration = delivery_info['Delivery'].get('Duration') + if duration is not None: + result['duration'] = duration thumbnails = [] if 'Timestamps' in delivery_info['Delivery']: for timestamp in delivery_info['Delivery']['Timestamps']: - thumbnails.append({ - # 'url': 'https://{}.hosted.panopto.com/Panopto/Pages/Viewer/Thumb.aspx?eventTargetPID={}&sessionPID={}&number={}&isPrimary=false&absoluteTime={}'.format( - # org, timestamp['ObjectPublicIdentifier'], timestamp['SessionID'], timestamp['ObjectSequenceNumber'], timestamp['AbsoluteTime']), - 'url': 'https://{}.hosted.panopto.com/Panopto/Pages/Viewer/Image.aspx?id={}&number={}&x=undefined'.format( - org, timestamp['ObjectIdentifier'], timestamp['ObjectSequenceNumber']) - }) + object_id = timestamp.get('ObjectIdentifier') + object_sequence_num = timestamp.get('ObjectSequenceNumber') + if object_id is not None and object_sequence_num is not None: + thumbnails.append({ + 'url': 'https://{}.hosted.panopto.com/Panopto/Pages/Viewer/Image.aspx?id={}&number={}&x=undefined'.format( + org, object_id, object_sequence_num) + }) + + # This provides actual thumbnails instead of the above which allows for downloading of real slides + # object_public_id = timestamp.get('ObjectPublicIdentifier') + # session_id = timestamp.get('SessionID') + # absolute_time = timestamp.get('AbsoluteTime') + # if object_public_id is not None and session_id is not None and object_sequence_num is not None and absolute_time is not None: + # thumbnails.append({ + # 'url': 'https://{}.hosted.panopto.com/Panopto/Pages/Viewer/Thumb.aspx?eventTargetPID={}&sessionPID={}&number={}&isPrimary=false&absoluteTime={}'.format( + # org, object_public_id, session_id, object_sequence_num, absolute_time), + # }) if len(thumbnails): if result.get('entries') is not None: From dec449e22f7212baaad04ae759024ca818ba1bd2 Mon Sep 17 00:00:00 2001 From: Kevin Mark Date: Thu, 6 Jul 2017 22:03:05 -0400 Subject: [PATCH 07/12] [Panopto] Prefer the highest quality stream --- youtube_dl/extractor/panopto.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/panopto.py b/youtube_dl/extractor/panopto.py index 8fbdd3987..716987622 100644 --- a/youtube_dl/extractor/panopto.py +++ b/youtube_dl/extractor/panopto.py @@ -81,14 +81,14 @@ class PanoptoIE(PanoptoBaseIE): 'title': this_stream['Tag'], 'formats': [], } - if 'StreamUrl' in this_stream: - new_stream['formats'].append({ - 'url': this_stream['StreamUrl'], - }) if 'StreamHttpUrl' in this_stream: new_stream['formats'].append({ 'url': this_stream['StreamHttpUrl'], }) + if 'StreamUrl' in this_stream: + m3u8_formats = self._extract_m3u8_formats(this_stream['StreamUrl'], video_id, 'mp4') + self._sort_formats(m3u8_formats) + new_stream['formats'].extend(m3u8_formats) if len(new_stream['formats']): streams.append(new_stream) From b313bc69f85a1a9e26f8e3c63e7704e280504aeb Mon Sep 17 00:00:00 2001 From: Kevin Mark Date: Thu, 6 Jul 2017 23:15:23 -0400 Subject: [PATCH 08/12] [Panopto] Add tests Testing may be impossible for the Folder extractor, or I'm just doing it wrong. With the current test we enter a catch-22 where it claims we need an 'ext' entry to continue testing but upon adding that it claims it expected None. --- youtube_dl/extractor/panopto.py | 71 +++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/youtube_dl/extractor/panopto.py b/youtube_dl/extractor/panopto.py index 716987622..f7124bfa1 100644 --- a/youtube_dl/extractor/panopto.py +++ b/youtube_dl/extractor/panopto.py @@ -33,6 +33,48 @@ class PanoptoIE(PanoptoBaseIE): """Extracts a single Panopto video including all available streams.""" _VALID_URL = r'^https?:\/\/(?P[a-z0-9]+)\.hosted\.panopto.com\/Panopto\/Pages\/Viewer\.aspx\?id=(?P[a-f0-9-]+)' + _TESTS = [ + { + 'url': 'https://demo.hosted.panopto.com/Panopto/Pages/Viewer.aspx?id=26b3ae9e-4a48-4dcc-96ba-0befba08a0fb', + 'md5': '06fb292a3510aa5bc5f0c950fe58c9f7', + 'info_dict': { + 'id': '26b3ae9e-4a48-4dcc-96ba-0befba08a0fb', + 'ext': 'mp4', + 'title': 'Panopto for Business', + 'uploader': 'Ari Bixhorn', + 'upload_date': '20160328', + 'timestamp': 1459184200.3759995, + }, + }, + { + 'url': 'https://demo.hosted.panopto.com/Panopto/Pages/Viewer.aspx?id=ed01b077-c9e5-4c7b-b8ff-15fa306d7a59', + 'info_dict': { + 'id': 'ed01b077-c9e5-4c7b-b8ff-15fa306d7a59', + 'title': 'Overcoming Top 4 Challenges of Enterprise Video', + 'uploader': 'Panopto Support', + 'timestamp': 1449409251.8579998, + }, + 'playlist': [ + { + 'md5': 'e22b5a284789ba2681e4fe215352d816', + 'info_dict': { + 'id': '15ad06ef-3f7d-4074-aa4a-87c41dd18f9c', + 'ext': 'mp4', + 'title': 'OBJECT', + }, + }, + { + 'md5': '4396cbff07e7b883ca522a6783dc6a70', + 'info_dict': { + 'id': '7668d6b2-dc81-421d-9853-20653689e2e8', + 'ext': 'mp4', + 'title': 'DV', + }, + }, + ], + 'playlist_count': 2, + }, + ] @staticmethod def _get_contribs_str(contribs): @@ -155,6 +197,35 @@ class PanoptoFolderIE(PanoptoBaseIE): """Recursively extracts a folder of Panopto videos, digging as far as possible into subfolders.""" _VALID_URL = r'^https?:\/\/(?P[a-z0-9]+)\.hosted\.panopto.com\/Panopto\/Pages\/Sessions\/List\.aspx#folderID=(?:"|%22)(?P[a-f0-9-]+)' + _TESTS = [ + { + 'url': 'https://demo.hosted.panopto.com/Panopto/Pages/Sessions/List.aspx#folderID=%222a0546e0-c6c0-4ab1-bc79-5c0b0e801c4f%22', + 'info_dict': { + 'id': '2a0546e0-c6c0-4ab1-bc79-5c0b0e801c4f', + 'title': 'End-to-End Demo', + }, + 'playlist': [ + { + 'info_dict': { + 'id': '70f7441d-01b5-4319-b399-6591e456b935', + # Fails before download with this line (it claims it needs an ext field) + # but fails after download when it's included because 'ext' should be None + 'ext': 'a', + 'title': 'b', + }, + 'playlist': [ + { + 'info_dict': { + 'id': 'c', + 'ext': 'd', + 'title': 'e', + } + } + ], + }, + ], + }, + ] def _real_extract(self, url): """Recursively extracts the video and stream information for the given Panopto hosted URL.""" From 7063d5312930e8e6e3094d3f3e6f9f21586f1ef0 Mon Sep 17 00:00:00 2001 From: Kevin Mark Date: Sat, 8 Jul 2017 15:05:45 -0400 Subject: [PATCH 09/12] [Panopto] No need to escape / --- youtube_dl/extractor/panopto.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/panopto.py b/youtube_dl/extractor/panopto.py index f7124bfa1..2c792e832 100644 --- a/youtube_dl/extractor/panopto.py +++ b/youtube_dl/extractor/panopto.py @@ -32,7 +32,7 @@ class PanoptoBaseIE(InfoExtractor): class PanoptoIE(PanoptoBaseIE): """Extracts a single Panopto video including all available streams.""" - _VALID_URL = r'^https?:\/\/(?P[a-z0-9]+)\.hosted\.panopto.com\/Panopto\/Pages\/Viewer\.aspx\?id=(?P[a-f0-9-]+)' + _VALID_URL = r'^https?://(?P[a-z0-9]+)\.hosted\.panopto.com/Panopto/Pages/Viewer\.aspx\?id=(?P[a-f0-9-]+)' _TESTS = [ { 'url': 'https://demo.hosted.panopto.com/Panopto/Pages/Viewer.aspx?id=26b3ae9e-4a48-4dcc-96ba-0befba08a0fb', @@ -196,7 +196,7 @@ class PanoptoIE(PanoptoBaseIE): class PanoptoFolderIE(PanoptoBaseIE): """Recursively extracts a folder of Panopto videos, digging as far as possible into subfolders.""" - _VALID_URL = r'^https?:\/\/(?P[a-z0-9]+)\.hosted\.panopto.com\/Panopto\/Pages\/Sessions\/List\.aspx#folderID=(?:"|%22)(?P[a-f0-9-]+)' + _VALID_URL = r'^https?://(?P[a-z0-9]+)\.hosted\.panopto.com/Panopto/Pages/Sessions/List\.aspx#folderID=(?:"|%22)(?P[a-f0-9-]+)' _TESTS = [ { 'url': 'https://demo.hosted.panopto.com/Panopto/Pages/Sessions/List.aspx#folderID=%222a0546e0-c6c0-4ab1-bc79-5c0b0e801c4f%22', From d592aedc0b9a65e6b9e0511acb918fbdd7b56f5c Mon Sep 17 00:00:00 2001 From: Kevin Mark Date: Sat, 8 Jul 2017 15:10:20 -0400 Subject: [PATCH 10/12] [Panopto] {} does not work in Python 2.6 https://stackoverflow.com/a/19668429/238374 --- youtube_dl/extractor/panopto.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/panopto.py b/youtube_dl/extractor/panopto.py index 2c792e832..991f355a2 100644 --- a/youtube_dl/extractor/panopto.py +++ b/youtube_dl/extractor/panopto.py @@ -83,7 +83,7 @@ class PanoptoIE(PanoptoBaseIE): for c in contribs: display_name = c.get('DisplayName') if display_name is not None: - s += '{}, '.format(display_name) + s += '{0}, '.format(display_name) return s[:-2] if len(contribs) else '' def _real_extract(self, url): @@ -92,7 +92,7 @@ class PanoptoIE(PanoptoBaseIE): org = self._match_organization(url) delivery_info = self._download_json( - 'https://{}.hosted.panopto.com/Panopto/Pages/Viewer/DeliveryInfo.aspx'.format(org), + 'https://{0}.hosted.panopto.com/Panopto/Pages/Viewer/DeliveryInfo.aspx'.format(org), video_id, query={ 'deliveryId': video_id, @@ -113,7 +113,7 @@ class PanoptoIE(PanoptoBaseIE): "with Panopto. If the error below is about unauthorized access, this is " "most likely the issue.") raise ExtractorError( - 'API error: ({}) {}'.format(delivery_info.get('ErrorCode', '?'), delivery_info.get('ErrorMessage', '?')) + 'API error: ({0}) {1}'.format(delivery_info.get('ErrorCode', '?'), delivery_info.get('ErrorMessage', '?')) ) streams = [] @@ -140,7 +140,7 @@ class PanoptoIE(PanoptoBaseIE): result = { 'id': video_id, 'title': delivery_info['Delivery']['SessionName'], - 'thumbnail': 'https://{}.hosted.panopto.com/Panopto/Services/FrameGrabber.svc/FrameRedirect?objectId={}&mode=Delivery&random={}'.format( + 'thumbnail': 'https://{0}.hosted.panopto.com/Panopto/Services/FrameGrabber.svc/FrameRedirect?objectId={1}&mode=Delivery&random={2}'.format( org, video_id, random()), } @@ -170,7 +170,7 @@ class PanoptoIE(PanoptoBaseIE): object_sequence_num = timestamp.get('ObjectSequenceNumber') if object_id is not None and object_sequence_num is not None: thumbnails.append({ - 'url': 'https://{}.hosted.panopto.com/Panopto/Pages/Viewer/Image.aspx?id={}&number={}&x=undefined'.format( + 'url': 'https://{0}.hosted.panopto.com/Panopto/Pages/Viewer/Image.aspx?id={1}&number={2}&x=undefined'.format( org, object_id, object_sequence_num) }) @@ -180,7 +180,7 @@ class PanoptoIE(PanoptoBaseIE): # absolute_time = timestamp.get('AbsoluteTime') # if object_public_id is not None and session_id is not None and object_sequence_num is not None and absolute_time is not None: # thumbnails.append({ - # 'url': 'https://{}.hosted.panopto.com/Panopto/Pages/Viewer/Thumb.aspx?eventTargetPID={}&sessionPID={}&number={}&isPrimary=false&absoluteTime={}'.format( + # 'url': 'https://{0}.hosted.panopto.com/Panopto/Pages/Viewer/Thumb.aspx?eventTargetPID={1}&sessionPID={2}&number={3}&isPrimary=false&absoluteTime={4}'.format( # org, object_public_id, session_id, object_sequence_num, absolute_time), # }) @@ -236,7 +236,7 @@ class PanoptoFolderIE(PanoptoBaseIE): org = self._match_organization(url) folder_data = self._download_json( - 'https://{}.hosted.panopto.com/Panopto/Services/Data.svc/GetSessions'.format(org), + 'https://{0}.hosted.panopto.com/Panopto/Services/Data.svc/GetSessions'.format(org), folder_id, 'Downloading folder listing', 'Failed to download folder listing', @@ -281,7 +281,7 @@ class PanoptoFolderIE(PanoptoBaseIE): } if 'prev_folders' in smuggled: new_folder['title'] = smuggled['prev_folders'] + ' -- ' + new_folder['title'] - new_folder['url'] = smuggle_url('https://{}.hosted.panopto.com/Panopto/Pages/Sessions/List.aspx#folderID="{}"' + new_folder['url'] = smuggle_url('https://{0}.hosted.panopto.com/Panopto/Pages/Sessions/List.aspx#folderID="{1}"' .format(org, subfolder['ID']), {'prev_folders': new_folder['title']}) entries.append(new_folder) From 56a8efd5de26bac1390fd8c45c56adc38c6f889b Mon Sep 17 00:00:00 2001 From: Kevin Mark Date: Wed, 20 Dec 2017 06:09:37 -0500 Subject: [PATCH 11/12] [Panopto] Fix all tests --- youtube_dl/extractor/panopto.py | 35 ++++++++------------------------- 1 file changed, 8 insertions(+), 27 deletions(-) diff --git a/youtube_dl/extractor/panopto.py b/youtube_dl/extractor/panopto.py index 991f355a2..048be4cfa 100644 --- a/youtube_dl/extractor/panopto.py +++ b/youtube_dl/extractor/panopto.py @@ -36,11 +36,11 @@ class PanoptoIE(PanoptoBaseIE): _TESTS = [ { 'url': 'https://demo.hosted.panopto.com/Panopto/Pages/Viewer.aspx?id=26b3ae9e-4a48-4dcc-96ba-0befba08a0fb', - 'md5': '06fb292a3510aa5bc5f0c950fe58c9f7', + 'md5': 'e8e6ef6b0572dd5985f5f8c3e096f717', 'info_dict': { 'id': '26b3ae9e-4a48-4dcc-96ba-0befba08a0fb', 'ext': 'mp4', - 'title': 'Panopto for Business', + 'title': 'Panopto for Business - Use Cases', 'uploader': 'Ari Bixhorn', 'upload_date': '20160328', 'timestamp': 1459184200.3759995, @@ -64,7 +64,7 @@ class PanoptoIE(PanoptoBaseIE): }, }, { - 'md5': '4396cbff07e7b883ca522a6783dc6a70', + 'md5': 'a483b8116abbb04a7112a9a3ccc835ce', 'info_dict': { 'id': '7668d6b2-dc81-421d-9853-20653689e2e8', 'ext': 'mp4', @@ -199,32 +199,13 @@ class PanoptoFolderIE(PanoptoBaseIE): _VALID_URL = r'^https?://(?P[a-z0-9]+)\.hosted\.panopto.com/Panopto/Pages/Sessions/List\.aspx#folderID=(?:"|%22)(?P[a-f0-9-]+)' _TESTS = [ { - 'url': 'https://demo.hosted.panopto.com/Panopto/Pages/Sessions/List.aspx#folderID=%222a0546e0-c6c0-4ab1-bc79-5c0b0e801c4f%22', + 'url': 'https://demo.hosted.panopto.com/Panopto/Pages/Sessions/List.aspx#folderID=%224540f269-8bb1-4352-b5dc-64e5919d1c40%22', 'info_dict': { - 'id': '2a0546e0-c6c0-4ab1-bc79-5c0b0e801c4f', - 'title': 'End-to-End Demo', + 'id': '4540f269-8bb1-4352-b5dc-64e5919d1c40', + 'title': 'Demo', }, - 'playlist': [ - { - 'info_dict': { - 'id': '70f7441d-01b5-4319-b399-6591e456b935', - # Fails before download with this line (it claims it needs an ext field) - # but fails after download when it's included because 'ext' should be None - 'ext': 'a', - 'title': 'b', - }, - 'playlist': [ - { - 'info_dict': { - 'id': 'c', - 'ext': 'd', - 'title': 'e', - } - } - ], - }, - ], - }, + 'playlist_count': 4, + } ] def _real_extract(self, url): From 935b19570408e47163e21557b3e2acf20c88bcaf Mon Sep 17 00:00:00 2001 From: Kevin Mark Date: Wed, 20 Dec 2017 06:24:05 -0500 Subject: [PATCH 12/12] [Panopto] More resilient folder regex --- youtube_dl/extractor/panopto.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/panopto.py b/youtube_dl/extractor/panopto.py index 048be4cfa..f239e7a1d 100644 --- a/youtube_dl/extractor/panopto.py +++ b/youtube_dl/extractor/panopto.py @@ -196,7 +196,7 @@ class PanoptoIE(PanoptoBaseIE): class PanoptoFolderIE(PanoptoBaseIE): """Recursively extracts a folder of Panopto videos, digging as far as possible into subfolders.""" - _VALID_URL = r'^https?://(?P[a-z0-9]+)\.hosted\.panopto.com/Panopto/Pages/Sessions/List\.aspx#folderID=(?:"|%22)(?P[a-f0-9-]+)' + _VALID_URL = r'^https?://(?P[a-z0-9]+)\.hosted\.panopto.com/Panopto/Pages/Sessions/List\.aspx(?:\?.*)?#(?:.*&)?folderID=(?:"|%22)(?P[a-f0-9-]+)' _TESTS = [ { 'url': 'https://demo.hosted.panopto.com/Panopto/Pages/Sessions/List.aspx#folderID=%224540f269-8bb1-4352-b5dc-64e5919d1c40%22',