1
0
mirror of https://codeberg.org/polarisfm/youtube-dl synced 2024-12-02 13:27:56 +01:00

Added nicovideo search extractor

This commit is contained in:
john 2018-09-08 13:46:40 -07:00
parent 2e4350eec6
commit 9ccab97034
3 changed files with 61 additions and 12 deletions

View File

@ -77,8 +77,8 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
repairs broken URLs, but emits an error if repairs broken URLs, but emits an error if
this is not possible instead of searching. this is not possible instead of searching.
--ignore-config Do not read configuration files. When given --ignore-config Do not read configuration files. When given
in the global configuration file in the global configuration file /etc
/etc/youtube-dl.conf: Do not read the user /youtube-dl.conf: Do not read the user
configuration in ~/.config/youtube- configuration in ~/.config/youtube-
dl/config (%APPDATA%/youtube-dl/config.txt dl/config (%APPDATA%/youtube-dl/config.txt
on Windows) on Windows)
@ -108,8 +108,8 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
proxy specified by --proxy (or none, if the proxy specified by --proxy (or none, if the
option is not present) is used for the option is not present) is used for the
actual downloading. actual downloading.
--geo-bypass Bypass geographic restriction via faking --geo-bypass Bypass geographic restriction via faking X
X-Forwarded-For HTTP header -Forwarded-For HTTP header
--no-geo-bypass Do not bypass geographic restriction via --no-geo-bypass Do not bypass geographic restriction via
faking X-Forwarded-For HTTP header faking X-Forwarded-For HTTP header
--geo-bypass-country CODE Force bypass geographic restriction with --geo-bypass-country CODE Force bypass geographic restriction with
@ -258,12 +258,11 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
jar in jar in
--cache-dir DIR Location in the filesystem where youtube-dl --cache-dir DIR Location in the filesystem where youtube-dl
can store some downloaded information can store some downloaded information
permanently. By default permanently. By default $XDG_CACHE_HOME
$XDG_CACHE_HOME/youtube-dl or /youtube-dl or ~/.cache/youtube-dl . At the
~/.cache/youtube-dl . At the moment, only moment, only YouTube player files (for
YouTube player files (for videos with videos with obfuscated signatures) are
obfuscated signatures) are cached, but that cached, but that may change.
may change.
--no-cache-dir Disable filesystem caching --no-cache-dir Disable filesystem caching
--rm-cache-dir Delete all filesystem cache files --rm-cache-dir Delete all filesystem cache files

View File

@ -730,7 +730,7 @@ from .nick import (
NickNightIE, NickNightIE,
NickRuIE, NickRuIE,
) )
from .niconico import NiconicoIE, NiconicoPlaylistIE from .niconico import NiconicoIE, NiconicoPlaylistIE, NicovideoIE
from .ninecninemedia import NineCNineMediaIE from .ninecninemedia import NineCNineMediaIE
from .ninegag import NineGagIE from .ninegag import NineGagIE
from .ninenow import NineNowIE from .ninenow import NineNowIE

View File

@ -3,8 +3,10 @@ from __future__ import unicode_literals
import json import json
import datetime import datetime
import re
import datetime
from .common import InfoExtractor from .common import InfoExtractor, SearchInfoExtractor
from ..compat import ( from ..compat import (
compat_parse_qs, compat_parse_qs,
compat_urlparse, compat_urlparse,
@ -468,3 +470,51 @@ class NiconicoPlaylistIE(InfoExtractor):
'id': list_id, 'id': list_id,
'entries': entries, 'entries': entries,
} }
#USAGE: youtube-dl "nicosearch<NUMBER OF ENTRIES>:<SEARCH STRING>"
class NicovideoIE(SearchInfoExtractor):
IE_DESC = 'Nico video search'
_MAX_RESULTS = 100000
_SEARCH_KEY = 'nicosearch'
def _get_n_results(self, query, n):
"""Get a specified number of results for a query"""
entries = []
currDate = datetime.datetime.now().date()
while True:
search_url = "http://www.nicovideo.jp/search/%s?sort=f&order=d" % (query)
print(search_url)
r = self._get_entries_for_date(search_url, query, currDate)
#did we gather more entries in the last few pages than were asked for? If so, only add as many as are needed to reach the desired number.
m = n - len(entries)
entries += r[0:min(m, len(r))]
#for a given search, nicovideo will show a maximum of 50 pages. My way around this is specifying a date for the search, down to the date, which for the most part
#is a guarantee that the number of pages in the search results will not exceed 50. For any given search for a day, we extract everything available, and move on, until
#finding as many entries as were requested.
currDate -= datetime.timedelta(days=1)
if(len(entries) >= n):
break
return {
'_type': 'playlist',
'id': query,
'entries': entries
}
def _get_entries_for_date(self, url, query, date, pageNumber = 1):
link = url + "&page=" + str(pageNumber) + "&start=" + str(date) + "&end=" + str(date)
results = self._download_webpage(link, query, note='Downloading results page %s for date %s' % (pageNumber, date))
entries = []
r = re.findall(r'<a href="/watch/(..[0-9]{1,8})\?', results)
for item in r:
e = self.url_result("http://www.nicovideo.jp/watch/" + str(item), 'Niconico')
entries.append(e)
#each page holds a maximum of 32 entries. If we've seen 32 entries on the current page,
#it's possible there may be another, so we can check. It's a little awkward, but it works.
if(len(r) >= 32):
entries += self._get_entries_for_date(url, query, date, pageNumber + 1)
return entries