From c9c1c46d47c3dec8c01cae33dcd1fe7e640fdc2d Mon Sep 17 00:00:00 2001 From: David Roizenman Date: Sun, 21 Jun 2020 03:00:17 -0700 Subject: [PATCH] [pinterest] Add new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/pinterest.py | 70 ++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+) create mode 100644 youtube_dl/extractor/pinterest.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 4b3092028..9c03c0a05 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -842,6 +842,7 @@ from .picarto import ( ) from .piksel import PikselIE from .pinkbike import PinkbikeIE +from .pinterest import PinterestIE from .pladform import PladformIE from .platzi import ( PlatziIE, diff --git a/youtube_dl/extractor/pinterest.py b/youtube_dl/extractor/pinterest.py new file mode 100644 index 000000000..a4ed71b05 --- /dev/null +++ b/youtube_dl/extractor/pinterest.py @@ -0,0 +1,70 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re +import json + +from .common import InfoExtractor +from ..utils import ExtractorError + + +class PinterestIE(InfoExtractor): + _VALID_URL = r"https?://(?:www\.)?pinterest\.(?:com|fr|de|ch|jp|cl|ca|it|co.uk|nz|ru|com.au|at|pt|co.kr|es|com.mx|dk|ph|biz|th|com.pt|com.uy|co|nl|info|kr|ie|vn|com.vn|ec|mx|in|pe|co.at|hu|co.in|co.nz|id|co.id|com.ec|com.py|engineering|tw|be|uk|com.bo|com.pe)/pin/(?P[0-9]+)" + _TEST = { + "url": "https://www.pinterest.ca/pin/585890232762351770", + "md5": "f51309dfca161c82a9cccb835ab10572", + "info_dict": { + "id": "585890232762351770", + "ext": "mp4", + "title": "Origami", + "thumbnail": "https://i.pinimg.com/videos/thumbnails/originals/12/83/f0/1283f06c1c8fa040011cd7231f33f069.0000001.jpg", + "uploader": "Sara Mashal", + "description": "This Pin was discovered by Sara Mashal. Discover (and save!) your own Pins on Pinterest.", + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + clean_url = re.search(self._VALID_URL, url).group(0) + + webpage = self._download_webpage(clean_url, video_id) + + pin_info_json = self._search_regex( + r"", + webpage, + "Pin data JSON", + ) + pin_info_full = json.loads(pin_info_json) + pin_info = next( + ( + r + for r in pin_info_full["resourceResponses"] + if r["name"] == "PinResource" + ), + None, + ) + + if pin_info: + pin_data = pin_info["response"]["data"] + video_urls = pin_data.get("videos", {}).get("video_list", {}) + video_data = video_urls.get("V_HLSV4") + video_url = video_data.get("url") + video_thumb = video_data.get("thumbnail") + if not video_url: + raise ExtractorError("Can't find a video stream URL") + title = pin_data.get("title").strip() or "pinterest_video" + pinner = pin_data.get("pinner", {}) + uploader = pinner.get("full_name") or pinner.get("username") + else: + raise ExtractorError("Can't find Pin data") + + return { + "id": video_id, + "title": title, + "description": self._og_search_description(webpage), + "uploader": uploader, + "url": video_url, + "ext": "mp4", + "manifest_url": video_url, + "thumbnail": video_thumb, + }