From 8b348cf9ff8a1c4f3ef1ef4e31d0cf38404eea41 Mon Sep 17 00:00:00 2001 From: Hanif Birgani Date: Mon, 16 Mar 2020 11:33:18 +0330 Subject: [PATCH] Remove info extraction from json_ld Aparat uses an invalid ld+json format in some pages, and it causes to JSON parser errors in some URLs, so it is better to get title and description from og instead of ld+json --- youtube_dl/extractor/aparat.py | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/aparat.py b/youtube_dl/extractor/aparat.py index 6c83c8d28..dbaae7f71 100644 --- a/youtube_dl/extractor/aparat.py +++ b/youtube_dl/extractor/aparat.py @@ -20,11 +20,7 @@ class AparatIE(InfoExtractor): 'id': 'wP8On', 'ext': 'mp4', 'title': 'تیم گلکسی 11 - زومیت', - 'description': 'md5:096bdabcdcc4569f2b8a5e903a3b3028', - 'duration': 231, - 'timestamp': 1387394859, - 'upload_date': '20131218', - 'view_count': int, + 'description': 'md5:096bdabcdcc4569f2b8a5e903a3b3028' }, }, { # multiple formats @@ -80,14 +76,14 @@ class AparatIE(InfoExtractor): self._sort_formats( formats, field_preference=('height', 'width', 'tbr', 'format_id')) - info = self._search_json_ld(webpage, video_id, default={}) + title = self._og_search_title(webpage) + description = self._og_search_description(webpage) - if not info.get('title'): - info['title'] = options['title'] - - return merge_dicts(info, { + return { + 'title': title, + 'description': description, 'id': video_id, 'thumbnail': url_or_none(options.get('poster')), 'duration': int_or_none(options.get('duration')), 'formats': formats, - }) + }