Merge remote-tracking branch 'origin/master'

2025-02-18 18:17:55 +01:00 · 2013-08-28 12:57:44 +02:00 · 2013-08-28 12:57:44 +02:00 · 204da0d3e3
commit 204da0d3e3
parent c496ca96e7 67b22dd036
2 changed files with 77 additions and 0 deletions
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -50,6 +50,7 @@ from .keek import KeekIE
 from .liveleak import LiveLeakIE
 from .livestream import LivestreamIE
 from .metacafe import MetacafeIE
 from .mit import TechTVMITIE, MITIE
 from .mixcloud import MixcloudIE
 from .mtv import MTVIE
 from .muzu import MuzuTVIE
--- a/youtube_dl/extractor/mit.py
+++ b/youtube_dl/extractor/mit.py
@ -0,0 +1,76 @@
 import re
 import json
 from .common import InfoExtractor
 from ..utils import (
    clean_html,
    get_element_by_id,
 )
 class TechTVMITIE(InfoExtractor):
    IE_NAME = u'techtv.mit.edu'
    _VALID_URL = r'https?://techtv\.mit\.edu/(videos|embeds)/(?P<id>\d+)'
    _TEST = {
        u'url': u'http://techtv.mit.edu/videos/25418-mit-dna-learning-center-set',
        u'file': u'25418.mp4',
        u'md5': u'1f8cb3e170d41fd74add04d3c9330e5f',
        u'info_dict': {
            u'title': u'MIT DNA Learning Center Set',
            u'description': u'md5:82313335e8a8a3f243351ba55bc1b474',
        },
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        webpage = self._download_webpage(
            'http://techtv.mit.edu/videos/%s' % video_id, video_id)
        embed_page = self._download_webpage(
            'http://techtv.mit.edu/embeds/%s/' % video_id, video_id,
            note=u'Downloading embed page')
        base_url = self._search_regex(r'ipadUrl: \'(.+?cloudfront.net/)',
            embed_page, u'base url')
        formats_json = self._search_regex(r'bitrates: (\[.+?\])', embed_page,
            u'video formats')
        formats = json.loads(formats_json)
        formats = sorted(formats, key=lambda f: f['bitrate'])
        title = get_element_by_id('edit-title', webpage)
        description = clean_html(get_element_by_id('edit-description', webpage))
        thumbnail = self._search_regex(r'playlist:.*?url: \'(.+?)\'',
            embed_page, u'thumbnail', flags=re.DOTALL)
        return {'id': video_id,
                'title': title,
                'url': base_url + formats[-1]['url'].replace('mp4:', ''),
                'ext': 'mp4',
                'description': description,
                'thumbnail': thumbnail,
                }
 class MITIE(TechTVMITIE):
    IE_NAME = u'video.mit.edu'
    _VALID_URL = r'https?://video\.mit\.edu/watch/(?P<title>[^/]+)'
    _TEST = {
        u'url': u'http://video.mit.edu/watch/the-government-is-profiling-you-13222/',
        u'file': u'21783.mp4',
        u'md5': u'7db01d5ccc1895fc5010e9c9e13648da',
        u'info_dict': {
            u'title': u'The Government is Profiling You',
            u'description': u'md5:ad5795fe1e1623b73620dbfd47df9afd',
        },
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        page_title = mobj.group('title')
        webpage = self._download_webpage(url, page_title)
        self.to_screen('%s: Extracting %s url' % (page_title, TechTVMITIE.IE_NAME))
        embed_url = self._search_regex(r'<iframe .*?src="(.+?)"', webpage,
            u'embed url')
        return self.url_result(embed_url, ie='TechTVMIT')