mirror of
https://codeberg.org/polarisfm/youtube-dl
synced 2024-12-02 13:27:56 +01:00
Added nicovideo search extractor
This commit is contained in:
parent
2e4350eec6
commit
9ccab97034
19
README.md
19
README.md
@ -77,8 +77,8 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
|||||||
repairs broken URLs, but emits an error if
|
repairs broken URLs, but emits an error if
|
||||||
this is not possible instead of searching.
|
this is not possible instead of searching.
|
||||||
--ignore-config Do not read configuration files. When given
|
--ignore-config Do not read configuration files. When given
|
||||||
in the global configuration file
|
in the global configuration file /etc
|
||||||
/etc/youtube-dl.conf: Do not read the user
|
/youtube-dl.conf: Do not read the user
|
||||||
configuration in ~/.config/youtube-
|
configuration in ~/.config/youtube-
|
||||||
dl/config (%APPDATA%/youtube-dl/config.txt
|
dl/config (%APPDATA%/youtube-dl/config.txt
|
||||||
on Windows)
|
on Windows)
|
||||||
@ -108,8 +108,8 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
|||||||
proxy specified by --proxy (or none, if the
|
proxy specified by --proxy (or none, if the
|
||||||
option is not present) is used for the
|
option is not present) is used for the
|
||||||
actual downloading.
|
actual downloading.
|
||||||
--geo-bypass Bypass geographic restriction via faking
|
--geo-bypass Bypass geographic restriction via faking X
|
||||||
X-Forwarded-For HTTP header
|
-Forwarded-For HTTP header
|
||||||
--no-geo-bypass Do not bypass geographic restriction via
|
--no-geo-bypass Do not bypass geographic restriction via
|
||||||
faking X-Forwarded-For HTTP header
|
faking X-Forwarded-For HTTP header
|
||||||
--geo-bypass-country CODE Force bypass geographic restriction with
|
--geo-bypass-country CODE Force bypass geographic restriction with
|
||||||
@ -258,12 +258,11 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
|||||||
jar in
|
jar in
|
||||||
--cache-dir DIR Location in the filesystem where youtube-dl
|
--cache-dir DIR Location in the filesystem where youtube-dl
|
||||||
can store some downloaded information
|
can store some downloaded information
|
||||||
permanently. By default
|
permanently. By default $XDG_CACHE_HOME
|
||||||
$XDG_CACHE_HOME/youtube-dl or
|
/youtube-dl or ~/.cache/youtube-dl . At the
|
||||||
~/.cache/youtube-dl . At the moment, only
|
moment, only YouTube player files (for
|
||||||
YouTube player files (for videos with
|
videos with obfuscated signatures) are
|
||||||
obfuscated signatures) are cached, but that
|
cached, but that may change.
|
||||||
may change.
|
|
||||||
--no-cache-dir Disable filesystem caching
|
--no-cache-dir Disable filesystem caching
|
||||||
--rm-cache-dir Delete all filesystem cache files
|
--rm-cache-dir Delete all filesystem cache files
|
||||||
|
|
||||||
|
@ -730,7 +730,7 @@ from .nick import (
|
|||||||
NickNightIE,
|
NickNightIE,
|
||||||
NickRuIE,
|
NickRuIE,
|
||||||
)
|
)
|
||||||
from .niconico import NiconicoIE, NiconicoPlaylistIE
|
from .niconico import NiconicoIE, NiconicoPlaylistIE, NicovideoIE
|
||||||
from .ninecninemedia import NineCNineMediaIE
|
from .ninecninemedia import NineCNineMediaIE
|
||||||
from .ninegag import NineGagIE
|
from .ninegag import NineGagIE
|
||||||
from .ninenow import NineNowIE
|
from .ninenow import NineNowIE
|
||||||
|
@ -3,8 +3,10 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
import json
|
import json
|
||||||
import datetime
|
import datetime
|
||||||
|
import re
|
||||||
|
import datetime
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor, SearchInfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_parse_qs,
|
compat_parse_qs,
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
@ -468,3 +470,51 @@ class NiconicoPlaylistIE(InfoExtractor):
|
|||||||
'id': list_id,
|
'id': list_id,
|
||||||
'entries': entries,
|
'entries': entries,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#USAGE: youtube-dl "nicosearch<NUMBER OF ENTRIES>:<SEARCH STRING>"
|
||||||
|
class NicovideoIE(SearchInfoExtractor):
|
||||||
|
IE_DESC = 'Nico video search'
|
||||||
|
_MAX_RESULTS = 100000
|
||||||
|
_SEARCH_KEY = 'nicosearch'
|
||||||
|
def _get_n_results(self, query, n):
|
||||||
|
"""Get a specified number of results for a query"""
|
||||||
|
entries = []
|
||||||
|
currDate = datetime.datetime.now().date()
|
||||||
|
|
||||||
|
while True:
|
||||||
|
search_url = "http://www.nicovideo.jp/search/%s?sort=f&order=d" % (query)
|
||||||
|
print(search_url)
|
||||||
|
r = self._get_entries_for_date(search_url, query, currDate)
|
||||||
|
|
||||||
|
#did we gather more entries in the last few pages than were asked for? If so, only add as many as are needed to reach the desired number.
|
||||||
|
m = n - len(entries)
|
||||||
|
entries += r[0:min(m, len(r))]
|
||||||
|
|
||||||
|
#for a given search, nicovideo will show a maximum of 50 pages. My way around this is specifying a date for the search, down to the date, which for the most part
|
||||||
|
#is a guarantee that the number of pages in the search results will not exceed 50. For any given search for a day, we extract everything available, and move on, until
|
||||||
|
#finding as many entries as were requested.
|
||||||
|
currDate -= datetime.timedelta(days=1)
|
||||||
|
if(len(entries) >= n):
|
||||||
|
break
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'playlist',
|
||||||
|
'id': query,
|
||||||
|
'entries': entries
|
||||||
|
}
|
||||||
|
|
||||||
|
def _get_entries_for_date(self, url, query, date, pageNumber = 1):
|
||||||
|
link = url + "&page=" + str(pageNumber) + "&start=" + str(date) + "&end=" + str(date)
|
||||||
|
results = self._download_webpage(link, query, note='Downloading results page %s for date %s' % (pageNumber, date))
|
||||||
|
entries = []
|
||||||
|
r = re.findall(r'<a href="/watch/(..[0-9]{1,8})\?', results)
|
||||||
|
|
||||||
|
for item in r:
|
||||||
|
e = self.url_result("http://www.nicovideo.jp/watch/" + str(item), 'Niconico')
|
||||||
|
entries.append(e)
|
||||||
|
|
||||||
|
#each page holds a maximum of 32 entries. If we've seen 32 entries on the current page,
|
||||||
|
#it's possible there may be another, so we can check. It's a little awkward, but it works.
|
||||||
|
if(len(r) >= 32):
|
||||||
|
entries += self._get_entries_for_date(url, query, date, pageNumber + 1)
|
||||||
|
return entries
|
||||||
|
Loading…
Reference in New Issue
Block a user