diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py index e4d986f73..fce5755de 100644 --- a/youtube_dl/extractor/niconico.py +++ b/youtube_dl/extractor/niconico.py @@ -4,7 +4,6 @@ from __future__ import unicode_literals import json import datetime import re -import datetime from .common import InfoExtractor, SearchInfoExtractor from ..compat import ( @@ -471,50 +470,52 @@ class NiconicoPlaylistIE(InfoExtractor): 'entries': entries, } - #USAGE: youtube-dl "nicosearch:" + +# USAGE: youtube-dl "nicosearch:" class NicovideoIE(SearchInfoExtractor): IE_DESC = 'Nico video search' _MAX_RESULTS = 100000 _SEARCH_KEY = 'nicosearch' + def _get_n_results(self, query, n): """Get a specified number of results for a query""" entries = [] currDate = datetime.datetime.now().date() - - while True: + + while True: search_url = "http://www.nicovideo.jp/search/%s?sort=f&order=d" % (query) print(search_url) r = self._get_entries_for_date(search_url, query, currDate) - #did we gather more entries in the last few pages than were asked for? If so, only add as many as are needed to reach the desired number. + # did we gather more entries in the last few pages than were asked for? If so, only add as many as are needed to reach the desired number. m = n - len(entries) entries += r[0:min(m, len(r))] - - #for a given search, nicovideo will show a maximum of 50 pages. My way around this is specifying a date for the search, down to the date, which for the most part - #is a guarantee that the number of pages in the search results will not exceed 50. For any given search for a day, we extract everything available, and move on, until - #finding as many entries as were requested. + + # for a given search, nicovideo will show a maximum of 50 pages. My way around this is specifying a date for the search, down to the date, which for the most part + # is a guarantee that the number of pages in the search results will not exceed 50. For any given search for a day, we extract everything available, and move on, until + # finding as many entries as were requested. currDate -= datetime.timedelta(days=1) if(len(entries) >= n): break - + return { '_type': 'playlist', 'id': query, 'entries': entries - } + } - def _get_entries_for_date(self, url, query, date, pageNumber = 1): + def _get_entries_for_date(self, url, query, date, pageNumber=1): link = url + "&page=" + str(pageNumber) + "&start=" + str(date) + "&end=" + str(date) results = self._download_webpage(link, query, note='Downloading results page %s for date %s' % (pageNumber, date)) entries = [] - r = re.findall(r'= 32): entries += self._get_entries_for_date(url, query, date, pageNumber + 1) return entries