.+?<\/a>',
+ webpage, 'uploader_id', fatal=False
+ )
+ # CC licenses get a image with an appropriate alt text
+ license_img = get_element_by_id('mt_watch_license', webpage)
+ if license_img:
+ license = self._search_regex(
+ r'alt="(.+)"', license_img, 'license_img', fatal=False
+ )
+ if not license_img or not license:
+ # other licenses are just text
+ license = self._html_search_regex(
+ r'(.+)<\/div>',
+ webpage, 'license_text', fatal=False
+ )
+ upload_date = _date(self._html_search_regex(
+ r'
(.+?)<\/span>',
+ webpage, 'upload_date', fatal=False
+ ))
+ category = self._html_search_regex(
+ r'(.+)', webpage, 'category', fatal=False
+ )
+ tags_html = get_element_by_id('mt_watch_info_tag_list', webpage)
+ tags = _tags(tags_html)
return {
'id': video_id,
'title': title,
'description': description,
+ 'license': license,
+ 'categories': [category], # there's just one category per video
+ 'tags': tags,
'uploader': uploader,
+ 'uploader_id': uploader_id,
+ 'upload_date': upload_date,
'thumbnail': thumbnail,
'formats': formats,
}
+
+
+def _date(str_containing_date):
+ """Parse the string 'at (M)M/(D)D/YYYY' to YYYYMMDD."""
+ return unified_strdate(str_containing_date.split(' ')[1], day_first=False)
+
+
+def _tags(tags_html):
+ """Parse the HTML markup containing the tags."""
+ matches = re.findall(r'(.+)<\/a>', tags_html)
+ return [match.rstrip(',') for match in matches]