diff --git a/youtube_dl/extractor/boundhub.py b/youtube_dl/extractor/boundhub.py new file mode 100644 index 000000000..7de8a6512 --- /dev/null +++ b/youtube_dl/extractor/boundhub.py @@ -0,0 +1,51 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import int_or_none + + +class BoundHubIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?boundhub\.com/videos/(?P[0-9]+)' + _TEST = { + 'url': 'https://www.boundhub.com/videos/205969/tamina-in-species-appropriate-cage-system-housing/', + 'md5': '6381e2491e6a42cc8e95c529b6da50a8', + 'info_dict': { + 'id': '205969', + 'title': 'Tamina in species-appropriate cage system housing', + 'description': 'Tamina in Straitjacket gagged an locked into a small cage for the afternoon.', + 'display_id': 'tamina-in-species-appropriate-cage-system-housing', + 'duration': 314, + 'ext': 'mp4', + 'thumbnail': 'https://cnt.boundhub.com/contents/videos_screenshots/205000/205969/preview.mp4.jpg', + 'uploader': 'Tamina', + 'uploader_id': 39278, + 'uploader_url': 'https://www.boundhub.com/members/39278/' + } + } + + def _real_extract(self, url): + webpage = self._download_webpage(url, self._match_id(url)) + + # Parse duration + duration_text = self._search_regex(r'\s*Duration:\s*([\w ]*)', webpage, 'duration_text', fatal=False) + minutes = self._html_search_regex(r'(\d*)min', duration_text, 'minutes', fatal=False) + seconds = self._html_search_regex(r'(\d*)sec', duration_text, 'seconds', fatal=False) + + # Get uploader url + uploader_url = self._search_regex(r'\s*\s*

(.*)

', webpage, 'title', default=None) or self._og_search_title(webpage), + 'url': self._search_regex(r'video_url: [\"\']([^\"\']*)[\"\']', webpage, 'url'), + 'description': self._search_regex(r'\s*Description:\s*(.*)<\/em>', webpage, 'description', fatal=False), + 'display_id': self._html_search_regex(r'https?://(?:www\.)?boundhub\.com/videos/[0-9]+/([\w-]*)', url, 'display_id', fatal=False), + 'duration': int_or_none((int(minutes) * 60) + int(seconds)), + 'ext': self._html_search_regex(r'postfix:\s*[\"\']\.([^\"\']*)[\"\']', webpage, 'ext', fatal=False), + 'thumbnail': self._html_search_regex(r'preview_url:\s*[\"\']([^\"\']*)[\"\']', webpage, 'thumbnail', fatal=False), + 'uploader': self._search_regex(r'\s*\s*(.*)\s*
', webpage, 'uploader', fatal=False), + 'uploader_id': int_or_none(self._html_search_regex(r'https?://(?:www\.)?boundhub\.com/members/(\d+)', uploader_url, 'uploader_id', fatal=False)), + 'uploader_url': uploader_url, + 'views': int_or_none(self._search_regex(r'\s*Views:\s*([\w ]*)', webpage, 'views_text', fatal=False).replace(' ', '')) + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index ae7079a6a..1dce2a7a9 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -121,6 +121,7 @@ from .blinkx import BlinkxIE from .bloomberg import BloombergIE from .bokecc import BokeCCIE from .bostonglobe import BostonGlobeIE +from .boundhub import BoundHubIE from .bpb import BpbIE from .br import ( BRIE,