youtube-dl/youtube_dl/extractor/photobucket.py

from __future__ import unicode_literals

import datetime
import json
import re

from .common import InfoExtractor


class PhotobucketIE(InfoExtractor):
    _VALID_URL = r'http://(?:[a-z0-9]+\.)?photobucket\.com/.*(([\?\&]current=)|_)(?P<id>.*)\.(?P<ext>(flv)|(mp4))'
    _TEST = {
        u'url': u'http://media.photobucket.com/user/rachaneronas/media/TiredofLinkBuildingTryBacklinkMyDomaincom_zpsc0c3b9fa.mp4.html?filters[term]=search&filters[primary]=videos&filters[secondary]=images&sort=1&o=0',
        u'file': u'zpsc0c3b9fa.mp4',
        u'md5': u'7dabfb92b0a31f6c16cebc0f8e60ff99',
        u'info_dict': {
            'upload_date': '20130504',
            'uploader': 'rachaneronas',
            'title': 'Tired of Link Building? Try BacklinkMyDomain.com!',
        }
    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        video_extension = mobj.group('ext')

        webpage = self._download_webpage(url, video_id)

        # Extract URL, uploader, and title from webpage
        self.report_extraction(video_id)
        info_json = self._search_regex(r'Pb\.Data\.Shared\.put\(Pb\.Data\.Shared\.MEDIA, (.*?)\);',
            webpage, 'info json')
        info = json.loads(info_json)
        return {
            'id': video_id,
            'url': info[u'downloadUrl'],
            'uploader': info[u'username'],
            'upload_date': datetime.date.fromtimestamp(info[u'creationDate']).strftime('%Y%m%d'),
            'title': info[u'title'],
            'ext': video_extension,
            'thumbnail': info[u'thumbUrl'],
        }
[photobucket] Modernize and remove the old extraction code 2014-03-09 19:36:46 +01:00			`from __future__ import unicode_literals`

Move Photobucket into its own file 2013-06-23 20:12:18 +02:00			`import datetime`
			`import json`
			`import re`

			`from .common import InfoExtractor`


			`class PhotobucketIE(InfoExtractor):`
[photobucket] Modernize and remove the old extraction code 2014-03-09 19:36:46 +01:00			`_VALID_URL = r'http://(?:[a-z0-9]+\.)?photobucket\.com/.(([\?\&]current=)\|_)(?P<id>.)\.(?P<ext>(flv)\|(mp4))'`
Move tests to the IE definitions 2013-06-27 20:46:46 +02:00			`_TEST = {`
			`u'url': u'http://media.photobucket.com/user/rachaneronas/media/TiredofLinkBuildingTryBacklinkMyDomaincom_zpsc0c3b9fa.mp4.html?filters[term]=search&filters[primary]=videos&filters[secondary]=images&sort=1&o=0',`
			`u'file': u'zpsc0c3b9fa.mp4',`
			`u'md5': u'7dabfb92b0a31f6c16cebc0f8e60ff99',`
			`u'info_dict': {`
[photobucket] Modernize and remove the old extraction code 2014-03-09 19:36:46 +01:00			`'upload_date': '20130504',`
			`'uploader': 'rachaneronas',`
			`'title': 'Tired of Link Building? Try BacklinkMyDomain.com!',`
Move tests to the IE definitions 2013-06-27 20:46:46 +02:00			`}`
			`}`
Move Photobucket into its own file 2013-06-23 20:12:18 +02:00
			`def _real_extract(self, url):`
			`mobj = re.match(self._VALID_URL, url)`
			`video_id = mobj.group('id')`
			`video_extension = mobj.group('ext')`

			`webpage = self._download_webpage(url, video_id)`

			`# Extract URL, uploader, and title from webpage`
			`self.report_extraction(video_id)`
[photobucket] Modernize and remove the old extraction code 2014-03-09 19:36:46 +01:00			`info_json = self._search_regex(r'Pb\.Data\.Shared\.put\(Pb\.Data\.Shared\.MEDIA, (.*?)\);',`
			`webpage, 'info json')`
			`info = json.loads(info_json)`
			`return {`
			`'id': video_id,`
			`'url': info[u'downloadUrl'],`
			`'uploader': info[u'username'],`
			`'upload_date': datetime.date.fromtimestamp(info[u'creationDate']).strftime('%Y%m%d'),`
			`'title': info[u'title'],`
			`'ext': video_extension,`
			`'thumbnail': info[u'thumbUrl'],`
			`}`