2017-06-21 07:45:13 +02:00
|
|
|
# coding: utf-8
|
|
|
|
from __future__ import unicode_literals
|
|
|
|
|
|
|
|
from .common import InfoExtractor
|
|
|
|
|
|
|
|
from ..compat import compat_str
|
|
|
|
|
|
|
|
from ..utils import (
|
|
|
|
ExtractorError,
|
|
|
|
smuggle_url,
|
|
|
|
unsmuggle_url,
|
|
|
|
)
|
|
|
|
|
|
|
|
import re
|
|
|
|
from random import random
|
|
|
|
import json
|
|
|
|
|
|
|
|
|
|
|
|
class PanoptoBaseIE(InfoExtractor):
|
2017-07-07 02:11:49 +02:00
|
|
|
"""The base class with common methods for Panopto extractors."""
|
2017-06-21 07:45:13 +02:00
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def _match_organization(cls, url):
|
2017-07-07 02:47:41 +02:00
|
|
|
"""Match and return the organization part of a Panopto hosted URL."""
|
2017-06-21 07:45:13 +02:00
|
|
|
if '_VALID_URL_RE' not in cls.__dict__:
|
|
|
|
cls._VALID_URL_RE = re.compile(cls._VALID_URL)
|
|
|
|
m = cls._VALID_URL_RE.match(url)
|
|
|
|
assert m
|
|
|
|
return compat_str(m.group('org'))
|
|
|
|
|
|
|
|
|
|
|
|
class PanoptoIE(PanoptoBaseIE):
|
2017-07-07 02:11:49 +02:00
|
|
|
"""Extracts a single Panopto video including all available streams."""
|
2017-06-21 07:45:13 +02:00
|
|
|
|
2017-07-08 21:05:45 +02:00
|
|
|
_VALID_URL = r'^https?://(?P<org>[a-z0-9]+)\.hosted\.panopto.com/Panopto/Pages/Viewer\.aspx\?id=(?P<id>[a-f0-9-]+)'
|
2017-07-07 05:15:23 +02:00
|
|
|
_TESTS = [
|
|
|
|
{
|
|
|
|
'url': 'https://demo.hosted.panopto.com/Panopto/Pages/Viewer.aspx?id=26b3ae9e-4a48-4dcc-96ba-0befba08a0fb',
|
|
|
|
'md5': '06fb292a3510aa5bc5f0c950fe58c9f7',
|
|
|
|
'info_dict': {
|
|
|
|
'id': '26b3ae9e-4a48-4dcc-96ba-0befba08a0fb',
|
|
|
|
'ext': 'mp4',
|
|
|
|
'title': 'Panopto for Business',
|
|
|
|
'uploader': 'Ari Bixhorn',
|
|
|
|
'upload_date': '20160328',
|
|
|
|
'timestamp': 1459184200.3759995,
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
'url': 'https://demo.hosted.panopto.com/Panopto/Pages/Viewer.aspx?id=ed01b077-c9e5-4c7b-b8ff-15fa306d7a59',
|
|
|
|
'info_dict': {
|
|
|
|
'id': 'ed01b077-c9e5-4c7b-b8ff-15fa306d7a59',
|
|
|
|
'title': 'Overcoming Top 4 Challenges of Enterprise Video',
|
|
|
|
'uploader': 'Panopto Support',
|
|
|
|
'timestamp': 1449409251.8579998,
|
|
|
|
},
|
|
|
|
'playlist': [
|
|
|
|
{
|
|
|
|
'md5': 'e22b5a284789ba2681e4fe215352d816',
|
|
|
|
'info_dict': {
|
|
|
|
'id': '15ad06ef-3f7d-4074-aa4a-87c41dd18f9c',
|
|
|
|
'ext': 'mp4',
|
|
|
|
'title': 'OBJECT',
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
'md5': '4396cbff07e7b883ca522a6783dc6a70',
|
|
|
|
'info_dict': {
|
|
|
|
'id': '7668d6b2-dc81-421d-9853-20653689e2e8',
|
|
|
|
'ext': 'mp4',
|
|
|
|
'title': 'DV',
|
|
|
|
},
|
|
|
|
},
|
|
|
|
],
|
|
|
|
'playlist_count': 2,
|
|
|
|
},
|
|
|
|
]
|
2017-06-21 07:45:13 +02:00
|
|
|
|
2017-07-07 02:16:33 +02:00
|
|
|
@staticmethod
|
|
|
|
def _get_contribs_str(contribs):
|
2017-07-07 02:47:41 +02:00
|
|
|
"""Returns a comma-delimited string of contributors."""
|
2017-06-21 07:45:13 +02:00
|
|
|
s = ''
|
|
|
|
for c in contribs:
|
2017-07-07 03:32:17 +02:00
|
|
|
display_name = c.get('DisplayName')
|
|
|
|
if display_name is not None:
|
|
|
|
s += '{}, '.format(display_name)
|
2017-06-21 07:45:13 +02:00
|
|
|
return s[:-2] if len(contribs) else ''
|
|
|
|
|
|
|
|
def _real_extract(self, url):
|
2017-07-07 02:47:41 +02:00
|
|
|
"""Extracts the video and stream information for the given Panopto hosted URL."""
|
2017-06-21 07:45:13 +02:00
|
|
|
video_id = self._match_id(url)
|
|
|
|
org = self._match_organization(url)
|
|
|
|
|
|
|
|
delivery_info = self._download_json(
|
2017-07-07 02:39:39 +02:00
|
|
|
'https://{}.hosted.panopto.com/Panopto/Pages/Viewer/DeliveryInfo.aspx'.format(org),
|
2017-06-21 07:45:13 +02:00
|
|
|
video_id,
|
|
|
|
query={
|
|
|
|
'deliveryId': video_id,
|
|
|
|
'invocationId': '',
|
|
|
|
'isLiveNotes': 'false',
|
|
|
|
'refreshAuthCookie': 'true',
|
|
|
|
'isActiveBroadcast': 'false',
|
|
|
|
'isEditing': 'false',
|
|
|
|
'isKollectiveAgentInstalled': 'false',
|
|
|
|
'isEmbed': 'false',
|
|
|
|
'responseType': 'json',
|
|
|
|
}
|
|
|
|
)
|
|
|
|
|
|
|
|
if 'ErrorCode' in delivery_info:
|
|
|
|
self._downloader.report_warning("If the video you are trying to download requires you to sign-in, you will "
|
|
|
|
"need to provide a cookies file that allows the downloader to authenticate "
|
|
|
|
"with Panopto. If the error below is about unauthorized access, this is "
|
|
|
|
"most likely the issue.")
|
|
|
|
raise ExtractorError(
|
2017-07-07 03:32:17 +02:00
|
|
|
'API error: ({}) {}'.format(delivery_info.get('ErrorCode', '?'), delivery_info.get('ErrorMessage', '?'))
|
2017-06-21 07:45:13 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
streams = []
|
|
|
|
for this_stream in delivery_info['Delivery']['Streams']:
|
|
|
|
new_stream = {
|
|
|
|
'id': this_stream['PublicID'],
|
|
|
|
'title': this_stream['Tag'],
|
|
|
|
'formats': [],
|
|
|
|
}
|
|
|
|
if 'StreamHttpUrl' in this_stream:
|
|
|
|
new_stream['formats'].append({
|
|
|
|
'url': this_stream['StreamHttpUrl'],
|
|
|
|
})
|
2017-07-07 04:03:05 +02:00
|
|
|
if 'StreamUrl' in this_stream:
|
|
|
|
m3u8_formats = self._extract_m3u8_formats(this_stream['StreamUrl'], video_id, 'mp4')
|
|
|
|
self._sort_formats(m3u8_formats)
|
|
|
|
new_stream['formats'].extend(m3u8_formats)
|
2017-06-21 07:45:13 +02:00
|
|
|
if len(new_stream['formats']):
|
|
|
|
streams.append(new_stream)
|
|
|
|
|
|
|
|
if not streams:
|
|
|
|
raise ExtractorError('No streams found.')
|
|
|
|
|
|
|
|
result = {
|
|
|
|
'id': video_id,
|
|
|
|
'title': delivery_info['Delivery']['SessionName'],
|
2017-07-07 02:39:39 +02:00
|
|
|
'thumbnail': 'https://{}.hosted.panopto.com/Panopto/Services/FrameGrabber.svc/FrameRedirect?objectId={}&mode=Delivery&random={}'.format(
|
|
|
|
org, video_id, random()),
|
2017-06-21 07:45:13 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
if len(streams) == 1:
|
|
|
|
result['formats'] = streams[0]['formats']
|
|
|
|
else:
|
|
|
|
result['_type'] = 'multi_video'
|
|
|
|
result['entries'] = streams
|
|
|
|
|
2017-07-07 03:32:17 +02:00
|
|
|
# We already know Delivery exists since we need it for stream extraction
|
|
|
|
contributors = delivery_info['Delivery'].get('Contributors')
|
|
|
|
if contributors is not None:
|
|
|
|
result['uploader'] = self._get_contribs_str(contributors)
|
2017-06-21 07:45:13 +02:00
|
|
|
|
2017-07-07 03:32:17 +02:00
|
|
|
session_start_time = delivery_info['Delivery'].get('SessionStartTime')
|
|
|
|
if session_start_time is not None:
|
|
|
|
result['timestamp'] = session_start_time - 11640000000
|
2017-06-21 07:45:13 +02:00
|
|
|
|
2017-07-07 03:32:17 +02:00
|
|
|
duration = delivery_info['Delivery'].get('Duration')
|
|
|
|
if duration is not None:
|
|
|
|
result['duration'] = duration
|
2017-06-21 07:45:13 +02:00
|
|
|
|
|
|
|
thumbnails = []
|
|
|
|
if 'Timestamps' in delivery_info['Delivery']:
|
|
|
|
for timestamp in delivery_info['Delivery']['Timestamps']:
|
2017-07-07 03:32:17 +02:00
|
|
|
object_id = timestamp.get('ObjectIdentifier')
|
|
|
|
object_sequence_num = timestamp.get('ObjectSequenceNumber')
|
|
|
|
if object_id is not None and object_sequence_num is not None:
|
|
|
|
thumbnails.append({
|
|
|
|
'url': 'https://{}.hosted.panopto.com/Panopto/Pages/Viewer/Image.aspx?id={}&number={}&x=undefined'.format(
|
|
|
|
org, object_id, object_sequence_num)
|
|
|
|
})
|
|
|
|
|
|
|
|
# This provides actual thumbnails instead of the above which allows for downloading of real slides
|
|
|
|
# object_public_id = timestamp.get('ObjectPublicIdentifier')
|
|
|
|
# session_id = timestamp.get('SessionID')
|
|
|
|
# absolute_time = timestamp.get('AbsoluteTime')
|
|
|
|
# if object_public_id is not None and session_id is not None and object_sequence_num is not None and absolute_time is not None:
|
|
|
|
# thumbnails.append({
|
|
|
|
# 'url': 'https://{}.hosted.panopto.com/Panopto/Pages/Viewer/Thumb.aspx?eventTargetPID={}&sessionPID={}&number={}&isPrimary=false&absoluteTime={}'.format(
|
|
|
|
# org, object_public_id, session_id, object_sequence_num, absolute_time),
|
|
|
|
# })
|
2017-06-21 07:45:13 +02:00
|
|
|
|
|
|
|
if len(thumbnails):
|
|
|
|
if result.get('entries') is not None:
|
|
|
|
result['entries'][1]['thumbnails'] = thumbnails
|
|
|
|
else:
|
|
|
|
result['thumbnails'] = thumbnails
|
|
|
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
|
|
class PanoptoFolderIE(PanoptoBaseIE):
|
2017-07-07 02:11:49 +02:00
|
|
|
"""Recursively extracts a folder of Panopto videos, digging as far as possible into subfolders."""
|
|
|
|
|
2017-07-08 21:05:45 +02:00
|
|
|
_VALID_URL = r'^https?://(?P<org>[a-z0-9]+)\.hosted\.panopto.com/Panopto/Pages/Sessions/List\.aspx#folderID=(?:"|%22)(?P<id>[a-f0-9-]+)'
|
2017-07-07 05:15:23 +02:00
|
|
|
_TESTS = [
|
|
|
|
{
|
|
|
|
'url': 'https://demo.hosted.panopto.com/Panopto/Pages/Sessions/List.aspx#folderID=%222a0546e0-c6c0-4ab1-bc79-5c0b0e801c4f%22',
|
|
|
|
'info_dict': {
|
|
|
|
'id': '2a0546e0-c6c0-4ab1-bc79-5c0b0e801c4f',
|
|
|
|
'title': 'End-to-End Demo',
|
|
|
|
},
|
|
|
|
'playlist': [
|
|
|
|
{
|
|
|
|
'info_dict': {
|
|
|
|
'id': '70f7441d-01b5-4319-b399-6591e456b935',
|
|
|
|
# Fails before download with this line (it claims it needs an ext field)
|
|
|
|
# but fails after download when it's included because 'ext' should be None
|
|
|
|
'ext': 'a',
|
|
|
|
'title': 'b',
|
|
|
|
},
|
|
|
|
'playlist': [
|
|
|
|
{
|
|
|
|
'info_dict': {
|
|
|
|
'id': 'c',
|
|
|
|
'ext': 'd',
|
|
|
|
'title': 'e',
|
|
|
|
}
|
|
|
|
}
|
|
|
|
],
|
|
|
|
},
|
|
|
|
],
|
|
|
|
},
|
|
|
|
]
|
2017-06-21 07:45:13 +02:00
|
|
|
|
|
|
|
def _real_extract(self, url):
|
2017-07-07 02:47:41 +02:00
|
|
|
"""Recursively extracts the video and stream information for the given Panopto hosted URL."""
|
2017-06-21 07:45:13 +02:00
|
|
|
url, smuggled = unsmuggle_url(url)
|
|
|
|
if smuggled is None:
|
|
|
|
smuggled = {}
|
|
|
|
folder_id = self._match_id(url)
|
|
|
|
org = self._match_organization(url)
|
|
|
|
|
|
|
|
folder_data = self._download_json(
|
2017-07-07 02:39:39 +02:00
|
|
|
'https://{}.hosted.panopto.com/Panopto/Services/Data.svc/GetSessions'.format(org),
|
2017-06-21 07:45:13 +02:00
|
|
|
folder_id,
|
|
|
|
'Downloading folder listing',
|
|
|
|
'Failed to download folder listing',
|
|
|
|
data=json.dumps({
|
|
|
|
'queryParameters': {
|
|
|
|
'query': None,
|
|
|
|
'sortColumn': 1,
|
|
|
|
'sortAscending': False,
|
|
|
|
'maxResults': 10000,
|
|
|
|
'page': 0,
|
|
|
|
'startDate': None,
|
|
|
|
'endDate': None,
|
|
|
|
'folderID': folder_id,
|
|
|
|
'bookmarked': False,
|
|
|
|
'getFolderData': True,
|
|
|
|
'isSharedWithMe': False,
|
|
|
|
},
|
|
|
|
}, ensure_ascii=False).encode('utf-8'),
|
|
|
|
headers={'Content-Type': 'application/json'})['d']
|
|
|
|
|
|
|
|
entries = []
|
|
|
|
if 'Results' in folder_data and folder_data['Results'] is not None:
|
|
|
|
for video in folder_data['Results']:
|
|
|
|
new_video = {
|
|
|
|
'id': video['DeliveryID'],
|
|
|
|
'title': video['SessionName'],
|
|
|
|
'url': video['ViewerUrl'],
|
|
|
|
'_type': 'url_transparent',
|
|
|
|
'ie_key': 'Panopto',
|
|
|
|
}
|
|
|
|
if 'prev_folders' in smuggled:
|
|
|
|
new_video['title'] = smuggled['prev_folders'] + ' -- ' + new_video['title']
|
|
|
|
entries.append(new_video)
|
|
|
|
|
|
|
|
if 'Subfolders' in folder_data and folder_data['Subfolders'] is not None:
|
|
|
|
for subfolder in folder_data['Subfolders']:
|
|
|
|
new_folder = {
|
|
|
|
'id': subfolder['ID'],
|
|
|
|
'title': subfolder['Name'],
|
|
|
|
'_type': 'url_transparent',
|
|
|
|
'ie_key': 'PanoptoFolder',
|
|
|
|
}
|
|
|
|
if 'prev_folders' in smuggled:
|
|
|
|
new_folder['title'] = smuggled['prev_folders'] + ' -- ' + new_folder['title']
|
2017-07-07 02:39:39 +02:00
|
|
|
new_folder['url'] = smuggle_url('https://{}.hosted.panopto.com/Panopto/Pages/Sessions/List.aspx#folderID="{}"'
|
|
|
|
.format(org, subfolder['ID']), {'prev_folders': new_folder['title']})
|
2017-06-21 07:45:13 +02:00
|
|
|
entries.append(new_folder)
|
|
|
|
|
|
|
|
if not entries:
|
|
|
|
raise ExtractorError('Folder is empty or authentication failed')
|
|
|
|
|
|
|
|
return {
|
|
|
|
'id': folder_id,
|
|
|
|
'title': folder_data['Results'][0]['FolderName'] if len(folder_data['Results']) else folder_data['Subfolders'][0]['ParentFolderName'],
|
|
|
|
'_type': 'playlist',
|
|
|
|
'entries': entries,
|
|
|
|
}
|