mirror of
https://codeberg.org/polarisfm/youtube-dl
synced 2024-11-14 21:54:33 +01:00
commit
28bbb8e555
6
.github/ISSUE_TEMPLATE/1_broken_site.md
vendored
6
.github/ISSUE_TEMPLATE/1_broken_site.md
vendored
@ -18,7 +18,7 @@ title: ''
|
|||||||
|
|
||||||
<!--
|
<!--
|
||||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.07.02. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.09.20. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||||
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
|
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
|
||||||
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||||
@ -26,7 +26,7 @@ Carefully read and work through this check list in order to prevent the most com
|
|||||||
-->
|
-->
|
||||||
|
|
||||||
- [ ] I'm reporting a broken site support
|
- [ ] I'm reporting a broken site support
|
||||||
- [ ] I've verified that I'm running youtube-dl version **2019.07.02**
|
- [ ] I've verified that I'm running youtube-dl version **2020.09.20**
|
||||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||||
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
|
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
|
||||||
- [ ] I've searched the bugtracker for similar issues including closed ones
|
- [ ] I've searched the bugtracker for similar issues including closed ones
|
||||||
@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
|
|||||||
[debug] User config: []
|
[debug] User config: []
|
||||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||||
[debug] youtube-dl version 2019.07.02
|
[debug] youtube-dl version 2020.09.20
|
||||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||||
[debug] Proxy map: {}
|
[debug] Proxy map: {}
|
||||||
|
@ -19,7 +19,7 @@ labels: 'site-support-request'
|
|||||||
|
|
||||||
<!--
|
<!--
|
||||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.07.02. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.09.20. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||||
- Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights.
|
- Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights.
|
||||||
- Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
- Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||||
@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
|
|||||||
-->
|
-->
|
||||||
|
|
||||||
- [ ] I'm reporting a new site support request
|
- [ ] I'm reporting a new site support request
|
||||||
- [ ] I've verified that I'm running youtube-dl version **2019.07.02**
|
- [ ] I've verified that I'm running youtube-dl version **2020.09.20**
|
||||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||||
- [ ] I've checked that none of provided URLs violate any copyrights
|
- [ ] I've checked that none of provided URLs violate any copyrights
|
||||||
- [ ] I've searched the bugtracker for similar site support requests including closed ones
|
- [ ] I've searched the bugtracker for similar site support requests including closed ones
|
||||||
|
@ -18,13 +18,13 @@ title: ''
|
|||||||
|
|
||||||
<!--
|
<!--
|
||||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.07.02. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.09.20. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||||
- Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
- Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||||
- Finally, put x into all relevant boxes (like this [x])
|
- Finally, put x into all relevant boxes (like this [x])
|
||||||
-->
|
-->
|
||||||
|
|
||||||
- [ ] I'm reporting a site feature request
|
- [ ] I'm reporting a site feature request
|
||||||
- [ ] I've verified that I'm running youtube-dl version **2019.07.02**
|
- [ ] I've verified that I'm running youtube-dl version **2020.09.20**
|
||||||
- [ ] I've searched the bugtracker for similar site feature requests including closed ones
|
- [ ] I've searched the bugtracker for similar site feature requests including closed ones
|
||||||
|
|
||||||
|
|
||||||
|
6
.github/ISSUE_TEMPLATE/4_bug_report.md
vendored
6
.github/ISSUE_TEMPLATE/4_bug_report.md
vendored
@ -18,7 +18,7 @@ title: ''
|
|||||||
|
|
||||||
<!--
|
<!--
|
||||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.07.02. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.09.20. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||||
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
|
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
|
||||||
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||||
@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
|
|||||||
-->
|
-->
|
||||||
|
|
||||||
- [ ] I'm reporting a broken site support issue
|
- [ ] I'm reporting a broken site support issue
|
||||||
- [ ] I've verified that I'm running youtube-dl version **2019.07.02**
|
- [ ] I've verified that I'm running youtube-dl version **2020.09.20**
|
||||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||||
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
|
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
|
||||||
- [ ] I've searched the bugtracker for similar bug reports including closed ones
|
- [ ] I've searched the bugtracker for similar bug reports including closed ones
|
||||||
@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
|
|||||||
[debug] User config: []
|
[debug] User config: []
|
||||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||||
[debug] youtube-dl version 2019.07.02
|
[debug] youtube-dl version 2020.09.20
|
||||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||||
[debug] Proxy map: {}
|
[debug] Proxy map: {}
|
||||||
|
4
.github/ISSUE_TEMPLATE/5_feature_request.md
vendored
4
.github/ISSUE_TEMPLATE/5_feature_request.md
vendored
@ -19,13 +19,13 @@ labels: 'request'
|
|||||||
|
|
||||||
<!--
|
<!--
|
||||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.07.02. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.09.20. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||||
- Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
- Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||||
- Finally, put x into all relevant boxes (like this [x])
|
- Finally, put x into all relevant boxes (like this [x])
|
||||||
-->
|
-->
|
||||||
|
|
||||||
- [ ] I'm reporting a feature request
|
- [ ] I'm reporting a feature request
|
||||||
- [ ] I've verified that I'm running youtube-dl version **2019.07.02**
|
- [ ] I've verified that I'm running youtube-dl version **2020.09.20**
|
||||||
- [ ] I've searched the bugtracker for similar feature requests including closed ones
|
- [ ] I've searched the bugtracker for similar feature requests including closed ones
|
||||||
|
|
||||||
|
|
||||||
|
13
.travis.yml
13
.travis.yml
@ -13,7 +13,7 @@ dist: trusty
|
|||||||
env:
|
env:
|
||||||
- YTDL_TEST_SET=core
|
- YTDL_TEST_SET=core
|
||||||
- YTDL_TEST_SET=download
|
- YTDL_TEST_SET=download
|
||||||
matrix:
|
jobs:
|
||||||
include:
|
include:
|
||||||
- python: 3.7
|
- python: 3.7
|
||||||
dist: xenial
|
dist: xenial
|
||||||
@ -21,6 +21,12 @@ matrix:
|
|||||||
- python: 3.7
|
- python: 3.7
|
||||||
dist: xenial
|
dist: xenial
|
||||||
env: YTDL_TEST_SET=download
|
env: YTDL_TEST_SET=download
|
||||||
|
- python: 3.8
|
||||||
|
dist: xenial
|
||||||
|
env: YTDL_TEST_SET=core
|
||||||
|
- python: 3.8
|
||||||
|
dist: xenial
|
||||||
|
env: YTDL_TEST_SET=download
|
||||||
- python: 3.8-dev
|
- python: 3.8-dev
|
||||||
dist: xenial
|
dist: xenial
|
||||||
env: YTDL_TEST_SET=core
|
env: YTDL_TEST_SET=core
|
||||||
@ -29,6 +35,11 @@ matrix:
|
|||||||
env: YTDL_TEST_SET=download
|
env: YTDL_TEST_SET=download
|
||||||
- env: JYTHON=true; YTDL_TEST_SET=core
|
- env: JYTHON=true; YTDL_TEST_SET=core
|
||||||
- env: JYTHON=true; YTDL_TEST_SET=download
|
- env: JYTHON=true; YTDL_TEST_SET=download
|
||||||
|
- name: flake8
|
||||||
|
python: 3.8
|
||||||
|
dist: xenial
|
||||||
|
install: pip install flake8
|
||||||
|
script: flake8 .
|
||||||
fast_finish: true
|
fast_finish: true
|
||||||
allow_failures:
|
allow_failures:
|
||||||
- env: YTDL_TEST_SET=download
|
- env: YTDL_TEST_SET=download
|
||||||
|
@ -153,7 +153,7 @@ After you have ensured this site is distributing its content legally, you can fo
|
|||||||
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
|
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
|
||||||
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in.
|
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in.
|
||||||
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want.
|
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want.
|
||||||
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](http://flake8.pycqa.org/en/latest/index.html#quickstart):
|
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://flake8.pycqa.org/en/latest/index.html#quickstart):
|
||||||
|
|
||||||
$ flake8 youtube_dl/extractor/yourextractor.py
|
$ flake8 youtube_dl/extractor/yourextractor.py
|
||||||
|
|
||||||
@ -339,6 +339,72 @@ Incorrect:
|
|||||||
'PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4'
|
'PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4'
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Inline values
|
||||||
|
|
||||||
|
Extracting variables is acceptable for reducing code duplication and improving readability of complex expressions. However, you should avoid extracting variables used only once and moving them to opposite parts of the extractor file, which makes reading the linear flow difficult.
|
||||||
|
|
||||||
|
#### Example
|
||||||
|
|
||||||
|
Correct:
|
||||||
|
|
||||||
|
```python
|
||||||
|
title = self._html_search_regex(r'<title>([^<]+)</title>', webpage, 'title')
|
||||||
|
```
|
||||||
|
|
||||||
|
Incorrect:
|
||||||
|
|
||||||
|
```python
|
||||||
|
TITLE_RE = r'<title>([^<]+)</title>'
|
||||||
|
# ...some lines of code...
|
||||||
|
title = self._html_search_regex(TITLE_RE, webpage, 'title')
|
||||||
|
```
|
||||||
|
|
||||||
|
### Collapse fallbacks
|
||||||
|
|
||||||
|
Multiple fallback values can quickly become unwieldy. Collapse multiple fallback values into a single expression via a list of patterns.
|
||||||
|
|
||||||
|
#### Example
|
||||||
|
|
||||||
|
Good:
|
||||||
|
|
||||||
|
```python
|
||||||
|
description = self._html_search_meta(
|
||||||
|
['og:description', 'description', 'twitter:description'],
|
||||||
|
webpage, 'description', default=None)
|
||||||
|
```
|
||||||
|
|
||||||
|
Unwieldy:
|
||||||
|
|
||||||
|
```python
|
||||||
|
description = (
|
||||||
|
self._og_search_description(webpage, default=None)
|
||||||
|
or self._html_search_meta('description', webpage, default=None)
|
||||||
|
or self._html_search_meta('twitter:description', webpage, default=None))
|
||||||
|
```
|
||||||
|
|
||||||
|
Methods supporting list of patterns are: `_search_regex`, `_html_search_regex`, `_og_search_property`, `_html_search_meta`.
|
||||||
|
|
||||||
|
### Trailing parentheses
|
||||||
|
|
||||||
|
Always move trailing parentheses after the last argument.
|
||||||
|
|
||||||
|
#### Example
|
||||||
|
|
||||||
|
Correct:
|
||||||
|
|
||||||
|
```python
|
||||||
|
lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'],
|
||||||
|
list)
|
||||||
|
```
|
||||||
|
|
||||||
|
Incorrect:
|
||||||
|
|
||||||
|
```python
|
||||||
|
lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'],
|
||||||
|
list,
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
### Use convenience conversion and parsing functions
|
### Use convenience conversion and parsing functions
|
||||||
|
|
||||||
Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
|
Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
|
||||||
|
828
ChangeLog
828
ChangeLog
@ -1,3 +1,827 @@
|
|||||||
|
version 2020.09.20
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [extractor/common] Relax interaction count extraction in _json_ld
|
||||||
|
+ [extractor/common] Extract author as uploader for VideoObject in _json_ld
|
||||||
|
* [downloader/hls] Fix incorrect end byte in Range HTTP header for
|
||||||
|
media segments with EXT-X-BYTERANGE (#14748, #24512)
|
||||||
|
* [extractor/common] Handle ssl.CertificateError in _request_webpage (#26601)
|
||||||
|
* [downloader/http] Improve timeout detection when reading block of data
|
||||||
|
(#10935)
|
||||||
|
* [downloader/http] Retry download when urlopen times out (#10935, #26603)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [redtube] Extend URL regular expression (#26506)
|
||||||
|
* [twitch] Refactor
|
||||||
|
* [twitch:stream] Switch to GraphQL and fix reruns (#26535)
|
||||||
|
+ [telequebec] Add support for brightcove videos (#25833)
|
||||||
|
* [pornhub] Extract metadata from JSON-LD (#26614)
|
||||||
|
* [pornhub] Fix view count extraction (#26621, #26614)
|
||||||
|
|
||||||
|
|
||||||
|
version 2020.09.14
|
||||||
|
|
||||||
|
Core
|
||||||
|
+ [postprocessor/embedthumbnail] Add support for non jpg/png thumbnails
|
||||||
|
(#25687, #25717)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [rtlnl] Extend URL regular expression (#26549, #25821)
|
||||||
|
* [youtube] Fix empty description extraction (#26575, #26006)
|
||||||
|
* [srgssr] Extend URL regular expression (#26555, #26556, #26578)
|
||||||
|
* [googledrive] Use redirect URLs for source format (#18877, #23919, #24689,
|
||||||
|
#26565)
|
||||||
|
* [svtplay] Fix id extraction (#26576)
|
||||||
|
* [redbulltv] Improve support for rebull.com TV localized URLs (#22063)
|
||||||
|
+ [redbulltv] Add support for new redbull.com TV URLs (#22037, #22063)
|
||||||
|
* [soundcloud:pagedplaylist] Reduce pagination limit (#26557)
|
||||||
|
|
||||||
|
|
||||||
|
version 2020.09.06
|
||||||
|
|
||||||
|
Core
|
||||||
|
+ [utils] Recognize wav mimetype (#26463)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [nrktv:episode] Improve video id extraction (#25594, #26369, #26409)
|
||||||
|
* [youtube] Fix age gate content detection (#26100, #26152, #26311, #26384)
|
||||||
|
* [youtube:user] Extend URL regular expression (#26443)
|
||||||
|
* [xhamster] Improve initials regular expression (#26526, #26353)
|
||||||
|
* [svtplay] Fix video id extraction (#26425, #26428, #26438)
|
||||||
|
* [twitch] Rework extractors (#12297, #20414, #20604, #21811, #21812, #22979,
|
||||||
|
#24263, #25010, #25553, #25606)
|
||||||
|
* Switch to GraphQL
|
||||||
|
+ Add support for collections
|
||||||
|
+ Add support for clips and collections playlists
|
||||||
|
* [biqle] Improve video ext extraction
|
||||||
|
* [xhamster] Fix extraction (#26157, #26254)
|
||||||
|
* [xhamster] Extend URL regular expression (#25789, #25804, #25927))
|
||||||
|
|
||||||
|
|
||||||
|
version 2020.07.28
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [youtube] Fix sigfunc name extraction (#26134, #26135, #26136, #26137)
|
||||||
|
* [youtube] Improve description extraction (#25937, #25980)
|
||||||
|
* [wistia] Restrict embed regular expression (#25969)
|
||||||
|
* [youtube] Prevent excess HTTP 301 (#25786)
|
||||||
|
+ [youtube:playlists] Extend URL regular expression (#25810)
|
||||||
|
+ [bellmedia] Add support for cp24.com clip URLs (#25764)
|
||||||
|
* [brightcove] Improve embed detection (#25674)
|
||||||
|
|
||||||
|
|
||||||
|
version 2020.06.16.1
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [youtube] Force old layout (#25682, #25683, #25680, #25686)
|
||||||
|
* [youtube] Fix categories and improve tags extraction
|
||||||
|
|
||||||
|
|
||||||
|
version 2020.06.16
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [youtube] Fix uploader id and uploader URL extraction
|
||||||
|
* [youtube] Improve view count extraction
|
||||||
|
* [youtube] Fix upload date extraction (#25677)
|
||||||
|
* [youtube] Fix thumbnails extraction (#25676)
|
||||||
|
* [youtube] Fix playlist and feed extraction (#25675)
|
||||||
|
+ [facebook] Add support for single-video ID links
|
||||||
|
+ [youtube] Extract chapters from JSON (#24819)
|
||||||
|
+ [kaltura] Add support for multiple embeds on a webpage (#25523)
|
||||||
|
|
||||||
|
|
||||||
|
version 2020.06.06
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [tele5] Bypass geo restriction
|
||||||
|
+ [jwplatform] Add support for bypass geo restriction
|
||||||
|
* [tele5] Prefer jwplatform over nexx (#25533)
|
||||||
|
* [twitch:stream] Expect 400 and 410 HTTP errors from API
|
||||||
|
* [twitch:stream] Fix extraction (#25528)
|
||||||
|
* [twitch] Fix thumbnails extraction (#25531)
|
||||||
|
+ [twitch] Pass v5 Accept HTTP header (#25531)
|
||||||
|
* [brightcove] Fix subtitles extraction (#25540)
|
||||||
|
+ [malltv] Add support for sk.mall.tv (#25445)
|
||||||
|
* [periscope] Fix untitled broadcasts (#25482)
|
||||||
|
* [jwplatform] Improve embeds extraction (#25467)
|
||||||
|
|
||||||
|
|
||||||
|
version 2020.05.29
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [postprocessor/ffmpeg] Embed series metadata with --add-metadata
|
||||||
|
* [utils] Fix file permissions in write_json_file (#12471, #25122)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [ard:beta] Extend URL regular expression (#25405)
|
||||||
|
+ [youtube] Add support for more invidious instances (#25417)
|
||||||
|
* [giantbomb] Extend URL regular expression (#25222)
|
||||||
|
* [ard] Improve URL regular expression (#25134, #25198)
|
||||||
|
* [redtube] Improve formats extraction and extract m3u8 formats (#25311,
|
||||||
|
#25321)
|
||||||
|
* [indavideo] Switch to HTTPS for API request (#25191)
|
||||||
|
* [redtube] Improve title extraction (#25208)
|
||||||
|
* [vimeo] Improve format extraction and sorting (#25285)
|
||||||
|
* [soundcloud] Reduce API playlist page limit (#25274)
|
||||||
|
+ [youtube] Add support for yewtu.be (#25226)
|
||||||
|
* [mailru] Fix extraction (#24530, #25239)
|
||||||
|
* [bellator] Fix mgid extraction (#25195)
|
||||||
|
|
||||||
|
|
||||||
|
version 2020.05.08
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [downloader/http] Request last data block of exact remaining size
|
||||||
|
* [downloader/http] Finish downloading once received data length matches
|
||||||
|
expected
|
||||||
|
* [extractor/common] Use compat_cookiejar_Cookie for _set_cookie to always
|
||||||
|
ensure cookie name and value are bytestrings on python 2 (#23256, #24776)
|
||||||
|
+ [compat] Introduce compat_cookiejar_Cookie
|
||||||
|
* [utils] Improve cookie files support
|
||||||
|
+ Add support for UTF-8 in cookie files
|
||||||
|
* Skip malformed cookie file entries instead of crashing (invalid entry
|
||||||
|
length, invalid expires at)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [youtube] Improve signature cipher extraction (#25187, #25188)
|
||||||
|
* [iprima] Improve extraction (#25138)
|
||||||
|
* [uol] Fix extraction (#22007)
|
||||||
|
+ [orf] Add support for more radio stations (#24938, #24968)
|
||||||
|
* [dailymotion] Fix typo
|
||||||
|
- [puhutv] Remove no longer available HTTP formats (#25124)
|
||||||
|
|
||||||
|
|
||||||
|
version 2020.05.03
|
||||||
|
|
||||||
|
Core
|
||||||
|
+ [extractor/common] Extract multiple JSON-LD entries
|
||||||
|
* [options] Clarify doc on --exec command (#19087, #24883)
|
||||||
|
* [extractor/common] Skip malformed ISM manifest XMLs while extracting
|
||||||
|
ISM formats (#24667)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [crunchyroll] Fix and improve extraction (#25096, #25060)
|
||||||
|
* [youtube] Improve player id extraction
|
||||||
|
* [youtube] Use redirected video id if any (#25063)
|
||||||
|
* [yahoo] Fix GYAO Player extraction and relax URL regular expression
|
||||||
|
(#24178, #24778)
|
||||||
|
* [tvplay] Fix Viafree extraction (#15189, #24473, #24789)
|
||||||
|
* [tenplay] Relax URL regular expression (#25001)
|
||||||
|
+ [prosiebensat1] Extract series metadata
|
||||||
|
* [prosiebensat1] Improve extraction and remove 7tv.de support (#24948)
|
||||||
|
- [prosiebensat1] Remove 7tv.de support (#24948)
|
||||||
|
* [youtube] Fix DRM videos detection (#24736)
|
||||||
|
* [thisoldhouse] Fix video id extraction (#24548, #24549)
|
||||||
|
+ [soundcloud] Extract AAC format (#19173, #24708)
|
||||||
|
* [youtube] Skip broken multifeed videos (#24711)
|
||||||
|
* [nova:embed] Fix extraction (#24700)
|
||||||
|
* [motherless] Fix extraction (#24699)
|
||||||
|
* [twitch:clips] Extend URL regular expression (#24290, #24642)
|
||||||
|
* [tv4] Fix ISM formats extraction (#24667)
|
||||||
|
* [tele5] Fix extraction (#24553)
|
||||||
|
+ [mofosex] Add support for generic embeds (#24633)
|
||||||
|
+ [youporn] Add support for generic embeds
|
||||||
|
+ [spankwire] Add support for generic embeds (#24633)
|
||||||
|
* [spankwire] Fix extraction (#18924, #20648)
|
||||||
|
|
||||||
|
|
||||||
|
version 2020.03.24
|
||||||
|
|
||||||
|
Core
|
||||||
|
- [utils] Revert support for cookie files with spaces used instead of tabs
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [teachable] Update upskillcourses and gns3 domains
|
||||||
|
* [generic] Look for teachable embeds before wistia
|
||||||
|
+ [teachable] Extract chapter metadata (#24421)
|
||||||
|
+ [bilibili] Add support for player.bilibili.com (#24402)
|
||||||
|
+ [bilibili] Add support for new URL schema with BV ids (#24439, #24442)
|
||||||
|
* [limelight] Remove disabled API requests (#24255)
|
||||||
|
* [soundcloud] Fix download URL extraction (#24394)
|
||||||
|
+ [cbc:watch] Add support for authentication (#19160)
|
||||||
|
* [hellporno] Fix extraction (#24399)
|
||||||
|
* [xtube] Fix formats extraction (#24348)
|
||||||
|
* [ndr] Fix extraction (#24326)
|
||||||
|
* [nhk] Update m3u8 URL and use native HLS downloader (#24329)
|
||||||
|
- [nhk] Remove obsolete rtmp formats (#24329)
|
||||||
|
* [nhk] Relax URL regular expression (#24329)
|
||||||
|
- [vimeo] Revert fix showcase password protected video extraction (#24224)
|
||||||
|
|
||||||
|
|
||||||
|
version 2020.03.08
|
||||||
|
|
||||||
|
Core
|
||||||
|
+ [utils] Add support for cookie files with spaces used instead of tabs
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [pornhub] Add support for pornhubpremium.com (#24288)
|
||||||
|
- [youtube] Remove outdated code and unnecessary requests
|
||||||
|
* [youtube] Improve extraction in 429 HTTP error conditions (#24283)
|
||||||
|
* [nhk] Update API version (#24270)
|
||||||
|
|
||||||
|
|
||||||
|
version 2020.03.06
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [youtube] Fix age-gated videos support without login (#24248)
|
||||||
|
* [vimeo] Fix showcase password protected video extraction (#24224)
|
||||||
|
* [pornhub] Improve title extraction (#24184)
|
||||||
|
* [peertube] Improve extraction (#23657)
|
||||||
|
+ [servus] Add support for new URL schema (#23475, #23583, #24142)
|
||||||
|
* [vimeo] Fix subtitles URLs (#24209)
|
||||||
|
|
||||||
|
|
||||||
|
version 2020.03.01
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [YoutubeDL] Force redirect URL to unicode on python 2
|
||||||
|
- [options] Remove duplicate short option -v for --version (#24162)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [xhamster] Fix extraction (#24205)
|
||||||
|
* [franceculture] Fix extraction (#24204)
|
||||||
|
+ [telecinco] Add support for article opening videos
|
||||||
|
* [telecinco] Fix extraction (#24195)
|
||||||
|
* [xtube] Fix metadata extraction (#21073, #22455)
|
||||||
|
* [youjizz] Fix extraction (#24181)
|
||||||
|
- Remove no longer needed compat_str around geturl
|
||||||
|
* [pornhd] Fix extraction (#24128)
|
||||||
|
+ [teachable] Add support for multiple videos per lecture (#24101)
|
||||||
|
+ [wistia] Add support for multiple generic embeds (#8347, 11385)
|
||||||
|
* [imdb] Fix extraction (#23443)
|
||||||
|
* [tv2dk:bornholm:play] Fix extraction (#24076)
|
||||||
|
|
||||||
|
|
||||||
|
version 2020.02.16
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [YoutubeDL] Fix playlist entry indexing with --playlist-items (#10591,
|
||||||
|
#10622)
|
||||||
|
* [update] Fix updating via symlinks (#23991)
|
||||||
|
+ [compat] Introduce compat_realpath (#23991)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [npr] Add support for streams (#24042)
|
||||||
|
+ [24video] Add support for porn.24video.net (#23779, #23784)
|
||||||
|
- [jpopsuki] Remove extractor (#23858)
|
||||||
|
* [nova] Improve extraction (#23690)
|
||||||
|
* [nova:embed] Improve (#23690)
|
||||||
|
* [nova:embed] Fix extraction (#23672)
|
||||||
|
+ [abc:iview] Add support for 720p (#22907, #22921)
|
||||||
|
* [nytimes] Improve format sorting (#24010)
|
||||||
|
+ [toggle] Add support for mewatch.sg (#23895, #23930)
|
||||||
|
* [thisoldhouse] Fix extraction (#23951)
|
||||||
|
+ [popcorntimes] Add support for popcorntimes.tv (#23949)
|
||||||
|
* [sportdeutschland] Update to new API
|
||||||
|
* [twitch:stream] Lowercase channel id for stream request (#23917)
|
||||||
|
* [tv5mondeplus] Fix extraction (#23907, #23911)
|
||||||
|
* [tva] Relax URL regular expression (#23903)
|
||||||
|
* [vimeo] Fix album extraction (#23864)
|
||||||
|
* [viewlift] Improve extraction
|
||||||
|
* Fix extraction (#23851)
|
||||||
|
+ Add support for authentication
|
||||||
|
+ Add support for more domains
|
||||||
|
* [svt] Fix series extraction (#22297)
|
||||||
|
* [svt] Fix article extraction (#22897, #22919)
|
||||||
|
* [soundcloud] Imporve private playlist/set tracks extraction (#3707)
|
||||||
|
|
||||||
|
|
||||||
|
version 2020.01.24
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [youtube] Fix sigfunc name extraction (#23819)
|
||||||
|
* [stretchinternet] Fix extraction (#4319)
|
||||||
|
* [voicerepublic] Fix extraction
|
||||||
|
* [azmedien] Fix extraction (#23783)
|
||||||
|
* [businessinsider] Fix jwplatform id extraction (#22929, #22954)
|
||||||
|
+ [24video] Add support for 24video.vip (#23753)
|
||||||
|
* [ivi:compilation] Fix entries extraction (#23770)
|
||||||
|
* [ard] Improve extraction (#23761)
|
||||||
|
* Simplify extraction
|
||||||
|
+ Extract age limit and series
|
||||||
|
* Bypass geo-restriction
|
||||||
|
+ [nbc] Add support for nbc multi network URLs (#23049)
|
||||||
|
* [americastestkitchen] Fix extraction
|
||||||
|
* [zype] Improve extraction
|
||||||
|
+ Extract subtitles (#21258)
|
||||||
|
+ Support URLs with alternative keys/tokens (#21258)
|
||||||
|
+ Extract more metadata
|
||||||
|
* [orf:tvthek] Improve geo restricted videos detection (#23741)
|
||||||
|
* [soundcloud] Restore previews extraction (#23739)
|
||||||
|
|
||||||
|
|
||||||
|
version 2020.01.15
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [yourporn] Fix extraction (#21645, #22255, #23459)
|
||||||
|
+ [canvas] Add support for new API endpoint (#17680, #18629)
|
||||||
|
* [ndr:base:embed] Improve thumbnails extraction (#23731)
|
||||||
|
+ [vodplatform] Add support for embed.kwikmotion.com domain
|
||||||
|
+ [twitter] Add support for promo_video_website cards (#23711)
|
||||||
|
* [orf:radio] Clean description and improve extraction
|
||||||
|
* [orf:fm4] Fix extraction (#23599)
|
||||||
|
* [safari] Fix kaltura session extraction (#23679, #23670)
|
||||||
|
* [lego] Fix extraction and extract subtitle (#23687)
|
||||||
|
* [cloudflarestream] Improve extraction
|
||||||
|
+ Add support for bytehighway.net domain
|
||||||
|
+ Add support for signed URLs
|
||||||
|
+ Extract thumbnail
|
||||||
|
* [naver] Improve extraction
|
||||||
|
* Improve geo-restriction handling
|
||||||
|
+ Extract automatic captions
|
||||||
|
+ Extract uploader metadata
|
||||||
|
+ Extract VLive HLS formats
|
||||||
|
* Improve metadata extraction
|
||||||
|
- [pandatv] Remove extractor (#23630)
|
||||||
|
* [dctp] Fix format extraction (#23656)
|
||||||
|
+ [scrippsnetworks] Add support for www.discovery.com videos
|
||||||
|
* [discovery] Fix anonymous token extraction (#23650)
|
||||||
|
* [nrktv:seriebase] Fix extraction (#23625, #23537)
|
||||||
|
* [wistia] Improve format extraction and extract subtitles (#22590)
|
||||||
|
* [vice] Improve extraction (#23631)
|
||||||
|
* [redtube] Detect private videos (#23518)
|
||||||
|
|
||||||
|
|
||||||
|
version 2020.01.01
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [brightcove] Invalidate policy key cache on failing requests
|
||||||
|
* [pornhub] Improve locked videos detection (#22449, #22780)
|
||||||
|
+ [pornhub] Add support for m3u8 formats
|
||||||
|
* [pornhub] Fix extraction (#22749, #23082)
|
||||||
|
* [brightcove] Update policy key on failing requests
|
||||||
|
* [spankbang] Improve removed video detection (#23423)
|
||||||
|
* [spankbang] Fix extraction (#23307, #23423, #23444)
|
||||||
|
* [soundcloud] Automatically update client id on failing requests
|
||||||
|
* [prosiebensat1] Improve geo restriction handling (#23571)
|
||||||
|
* [brightcove] Cache brightcove player policy keys
|
||||||
|
* [teachable] Fail with error message if no video URL found
|
||||||
|
* [teachable] Improve locked lessons detection (#23528)
|
||||||
|
+ [scrippsnetworks] Add support for Scripps Networks sites (#19857, #22981)
|
||||||
|
* [mitele] Fix extraction (#21354, #23456)
|
||||||
|
* [soundcloud] Update client id (#23516)
|
||||||
|
* [mailru] Relax URL regular expressions (#23509)
|
||||||
|
|
||||||
|
|
||||||
|
version 2019.12.25
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [utils] Improve str_to_int
|
||||||
|
+ [downloader/hls] Add ability to override AES decryption key URL (#17521)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [mediaset] Fix parse formats (#23508)
|
||||||
|
+ [tv2dk:bornholm:play] Add support for play.tv2bornholm.dk (#23291)
|
||||||
|
+ [slideslive] Add support for url and vimeo service names (#23414)
|
||||||
|
* [slideslive] Fix extraction (#23413)
|
||||||
|
* [twitch:clips] Fix extraction (#23375)
|
||||||
|
+ [soundcloud] Add support for token protected embeds (#18954)
|
||||||
|
* [vk] Improve extraction
|
||||||
|
* Fix User Videos extraction (#23356)
|
||||||
|
* Extract all videos for lists with more than 1000 videos (#23356)
|
||||||
|
+ Add support for video albums (#14327, #14492)
|
||||||
|
- [kontrtube] Remove extractor
|
||||||
|
- [videopremium] Remove extractor
|
||||||
|
- [musicplayon] Remove extractor (#9225)
|
||||||
|
+ [ufctv] Add support for ufcfightpass.imgdge.com and
|
||||||
|
ufcfightpass.imggaming.com (#23343)
|
||||||
|
+ [twitch] Extract m3u8 formats frame rate (#23333)
|
||||||
|
+ [imggaming] Add support for playlists and extract subtitles
|
||||||
|
+ [ufcarabia] Add support for UFC Arabia (#23312)
|
||||||
|
* [ufctv] Fix extraction
|
||||||
|
* [yahoo] Fix gyao brightcove player id (#23303)
|
||||||
|
* [vzaar] Override AES decryption key URL (#17521)
|
||||||
|
+ [vzaar] Add support for AES HLS manifests (#17521, #23299)
|
||||||
|
* [nrl] Fix extraction
|
||||||
|
* [teachingchannel] Fix extraction
|
||||||
|
* [nintendo] Fix extraction and partially add support for Nintendo Direct
|
||||||
|
videos (#4592)
|
||||||
|
+ [ooyala] Add better fallback values for domain and streams variables
|
||||||
|
+ [youtube] Add support youtubekids.com (#23272)
|
||||||
|
* [tv2] Detect DRM protection
|
||||||
|
+ [tv2] Add support for katsomo.fi and mtv.fi (#10543)
|
||||||
|
* [tv2] Fix tv2.no article extraction
|
||||||
|
* [msn] Improve extraction
|
||||||
|
+ Add support for YouTube and NBCSports embeds
|
||||||
|
+ Add support for articles with multiple videos
|
||||||
|
* Improve AOL embed support
|
||||||
|
* Improve format extraction
|
||||||
|
* [abcotvs] Relax URL regular expression and improve metadata extraction
|
||||||
|
(#18014)
|
||||||
|
* [channel9] Reduce response size
|
||||||
|
* [adobetv] Improve extaction
|
||||||
|
* Use OnDemandPagedList for list extractors
|
||||||
|
* Reduce show extraction requests
|
||||||
|
* Extract original video format and subtitles
|
||||||
|
+ Add support for adobe tv embeds
|
||||||
|
|
||||||
|
|
||||||
|
version 2019.11.28
|
||||||
|
|
||||||
|
Core
|
||||||
|
+ [utils] Add generic caesar cipher and rot47
|
||||||
|
* [utils] Handle rd-suffixed day parts in unified_strdate (#23199)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [vimeo] Improve extraction
|
||||||
|
* Fix review extraction
|
||||||
|
* Fix ondemand extraction
|
||||||
|
* Make password protected player case as an expected error (#22896)
|
||||||
|
* Simplify channel based extractors code
|
||||||
|
- [openload] Remove extractor (#11999)
|
||||||
|
- [verystream] Remove extractor
|
||||||
|
- [streamango] Remove extractor (#15406)
|
||||||
|
* [dailymotion] Improve extraction
|
||||||
|
* Extract http formats included in m3u8 manifest
|
||||||
|
* Fix user extraction (#3553, #21415)
|
||||||
|
+ Add suport for User Authentication (#11491)
|
||||||
|
* Fix password protected videos extraction (#23176)
|
||||||
|
* Respect age limit option and family filter cookie value (#18437)
|
||||||
|
* Handle video url playlist query param
|
||||||
|
* Report allowed countries for geo-restricted videos
|
||||||
|
* [corus] Improve extraction
|
||||||
|
+ Add support for Series Plus, W Network, YTV, ABC Spark, disneychannel.com
|
||||||
|
and disneylachaine.ca (#20861)
|
||||||
|
+ Add support for self hosted videos (#22075)
|
||||||
|
* Detect DRM protection (#14910, #9164)
|
||||||
|
* [vivo] Fix extraction (#22328, #22279)
|
||||||
|
+ [bitchute] Extract upload date (#22990, #23193)
|
||||||
|
* [soundcloud] Update client id (#23214)
|
||||||
|
|
||||||
|
|
||||||
|
version 2019.11.22
|
||||||
|
|
||||||
|
Core
|
||||||
|
+ [extractor/common] Clean jwplayer description HTML tags
|
||||||
|
+ [extractor/common] Add data, headers and query to all major extract formats
|
||||||
|
methods
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [chaturbate] Fix extraction (#23010, #23012)
|
||||||
|
+ [ntvru] Add support for non relative file URLs (#23140)
|
||||||
|
* [vk] Fix wall audio thumbnails extraction (#23135)
|
||||||
|
* [ivi] Fix format extraction (#21991)
|
||||||
|
- [comcarcoff] Remove extractor
|
||||||
|
+ [drtv] Add support for new URL schema (#23059)
|
||||||
|
+ [nexx] Add support for Multi Player JS Setup (#23052)
|
||||||
|
+ [teamcoco] Add support for new videos (#23054)
|
||||||
|
* [soundcloud] Check if the soundtrack has downloads left (#23045)
|
||||||
|
* [facebook] Fix posts video data extraction (#22473)
|
||||||
|
- [addanime] Remove extractor
|
||||||
|
- [minhateca] Remove extractor
|
||||||
|
- [daisuki] Remove extractor
|
||||||
|
* [seeker] Fix extraction
|
||||||
|
- [revision3] Remove extractors
|
||||||
|
* [twitch] Fix video comments URL (#18593, #15828)
|
||||||
|
* [twitter] Improve extraction
|
||||||
|
+ Add support for generic embeds (#22168)
|
||||||
|
* Always extract http formats for native videos (#14934)
|
||||||
|
+ Add support for Twitter Broadcasts (#21369)
|
||||||
|
+ Extract more metadata
|
||||||
|
* Improve VMap format extraction
|
||||||
|
* Unify extraction code for both twitter statuses and cards
|
||||||
|
+ [twitch] Add support for Clip embed URLs
|
||||||
|
* [lnkgo] Fix extraction (#16834)
|
||||||
|
* [mixcloud] Improve extraction
|
||||||
|
* Improve metadata extraction (#11721)
|
||||||
|
* Fix playlist extraction (#22378)
|
||||||
|
* Fix user mixes extraction (#15197, #17865)
|
||||||
|
+ [kinja] Add support for Kinja embeds (#5756, #11282, #22237, #22384)
|
||||||
|
* [onionstudios] Fix extraction
|
||||||
|
+ [hotstar] Pass Referer header to format requests (#22836)
|
||||||
|
* [dplay] Minimize response size
|
||||||
|
+ [patreon] Extract uploader_id and filesize
|
||||||
|
* [patreon] Minimize response size
|
||||||
|
* [roosterteeth] Fix login request (#16094, #22689)
|
||||||
|
|
||||||
|
|
||||||
|
version 2019.11.05
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [scte] Add support for learning.scte.org (#22975)
|
||||||
|
+ [msn] Add support for Vidible and AOL embeds (#22195, #22227)
|
||||||
|
* [myspass] Fix video URL extraction and improve metadata extraction (#22448)
|
||||||
|
* [jamendo] Improve extraction
|
||||||
|
* Fix album extraction (#18564)
|
||||||
|
* Improve metadata extraction (#18565, #21379)
|
||||||
|
* [mediaset] Relax URL guid matching (#18352)
|
||||||
|
+ [mediaset] Extract unprotected M3U and MPD manifests (#17204)
|
||||||
|
* [telegraaf] Fix extraction
|
||||||
|
+ [bellmedia] Add support for marilyn.ca videos (#22193)
|
||||||
|
* [stv] Fix extraction (#22928)
|
||||||
|
- [iconosquare] Remove extractor
|
||||||
|
- [keek] Remove extractor
|
||||||
|
- [gameone] Remove extractor (#21778)
|
||||||
|
- [flipagram] Remove extractor
|
||||||
|
- [bambuser] Remove extractor
|
||||||
|
* [wistia] Reduce embed extraction false positives
|
||||||
|
+ [wistia] Add support for inline embeds (#22931)
|
||||||
|
- [go90] Remove extractor
|
||||||
|
* [kakao] Remove raw request
|
||||||
|
+ [kakao] Extract format total bitrate
|
||||||
|
* [daum] Fix VOD and Clip extracton (#15015)
|
||||||
|
* [kakao] Improve extraction
|
||||||
|
+ Add support for embed URLs
|
||||||
|
+ Add support for Kakao Legacy vid based embed URLs
|
||||||
|
* Only extract fields used for extraction
|
||||||
|
* Strip description and extract tags
|
||||||
|
* [mixcloud] Fix cloudcast data extraction (#22821)
|
||||||
|
* [yahoo] Improve extraction
|
||||||
|
+ Add support for live streams (#3597, #3779, #22178)
|
||||||
|
* Bypass cookie consent page for european domains (#16948, #22576)
|
||||||
|
+ Add generic support for embeds (#20332)
|
||||||
|
* [tv2] Fix and improve extraction (#22787)
|
||||||
|
+ [tv2dk] Add support for TV2 DK sites
|
||||||
|
* [onet] Improve extraction …
|
||||||
|
+ Add support for onet100.vod.pl
|
||||||
|
+ Extract m3u8 formats
|
||||||
|
* Correct audio only format info
|
||||||
|
* [fox9] Fix extraction
|
||||||
|
|
||||||
|
|
||||||
|
version 2019.10.29
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [utils] Actualize major IPv4 address blocks per country
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [go] Add support for abc.com and freeform.com (#22823, #22864)
|
||||||
|
+ [mtv] Add support for mtvjapan.com
|
||||||
|
* [mtv] Fix extraction for mtv.de (#22113)
|
||||||
|
* [videodetective] Fix extraction
|
||||||
|
* [internetvideoarchive] Fix extraction
|
||||||
|
* [nbcnews] Fix extraction (#12569, #12576, #21703, #21923)
|
||||||
|
- [hark] Remove extractor
|
||||||
|
- [tutv] Remove extractor
|
||||||
|
- [learnr] Remove extractor
|
||||||
|
- [macgamestore] Remove extractor
|
||||||
|
* [la7] Update Kaltura service URL (#22358)
|
||||||
|
* [thesun] Fix extraction (#16966)
|
||||||
|
- [makertv] Remove extractor
|
||||||
|
+ [tenplay] Add support for 10play.com.au (#21446)
|
||||||
|
* [soundcloud] Improve extraction
|
||||||
|
* Improve format extraction (#22123)
|
||||||
|
+ Extract uploader_id and uploader_url (#21916)
|
||||||
|
+ Extract all known thumbnails (#19071, #20659)
|
||||||
|
* Fix extration for private playlists (#20976)
|
||||||
|
+ Add support for playlist embeds (#20976)
|
||||||
|
* Skip preview formats (#22806)
|
||||||
|
* [dplay] Improve extraction
|
||||||
|
+ Add support for dplay.fi, dplay.jp and es.dplay.com (#16969)
|
||||||
|
* Fix it.dplay.com extraction (#22826)
|
||||||
|
+ Extract creator, tags and thumbnails
|
||||||
|
* Handle playback API call errors
|
||||||
|
+ [discoverynetworks] Add support for dplay.co.uk
|
||||||
|
* [vk] Improve extraction
|
||||||
|
+ Add support for Odnoklassniki embeds
|
||||||
|
+ Extract more videos from user lists (#4470)
|
||||||
|
+ Fix wall post audio extraction (#18332)
|
||||||
|
* Improve error detection (#22568)
|
||||||
|
+ [odnoklassniki] Add support for embeds
|
||||||
|
* [puhutv] Improve extraction
|
||||||
|
* Fix subtitles extraction
|
||||||
|
* Transform HLS URLs to HTTP URLs
|
||||||
|
* Improve metadata extraction
|
||||||
|
* [ceskatelevize] Skip DRM media
|
||||||
|
+ [facebook] Extract subtitles (#22777)
|
||||||
|
* [globo] Handle alternative hash signing method
|
||||||
|
|
||||||
|
|
||||||
|
version 2019.10.22
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [utils] Improve subtitles_filename (#22753)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [facebook] Bypass download rate limits (#21018)
|
||||||
|
+ [contv] Add support for contv.com
|
||||||
|
- [viewster] Remove extractor
|
||||||
|
* [xfileshare] Improve extractor (#17032, #17906, #18237, #18239)
|
||||||
|
* Update the list of domains
|
||||||
|
+ Add support for aa-encoded video data
|
||||||
|
* Improve jwplayer format extraction
|
||||||
|
+ Add support for Clappr sources
|
||||||
|
* [mangomolo] Fix video format extraction and add support for player URLs
|
||||||
|
* [audioboom] Improve metadata extraction
|
||||||
|
* [twitch] Update VOD URL matching (#22395, #22727)
|
||||||
|
- [mit] Remove support for video.mit.edu (#22403)
|
||||||
|
- [servingsys] Remove extractor (#22639)
|
||||||
|
* [dumpert] Fix extraction (#22428, #22564)
|
||||||
|
* [atresplayer] Fix extraction (#16277, #16716)
|
||||||
|
|
||||||
|
|
||||||
|
version 2019.10.16
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [extractor/common] Make _is_valid_url more relaxed
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [vimeo] Improve album videos id extraction (#22599)
|
||||||
|
+ [globo] Extract subtitles (#22713)
|
||||||
|
* [bokecc] Improve player params extraction (#22638)
|
||||||
|
* [nexx] Handle result list (#22666)
|
||||||
|
* [vimeo] Fix VHX embed extraction
|
||||||
|
* [nbc] Switch to graphql API (#18581, #22693, #22701)
|
||||||
|
- [vessel] Remove extractor
|
||||||
|
- [promptfile] Remove extractor (#6239)
|
||||||
|
* [kaltura] Fix service URL extraction (#22658)
|
||||||
|
* [kaltura] Fix embed info strip (#22658)
|
||||||
|
* [globo] Fix format extraction (#20319)
|
||||||
|
* [redtube] Improve metadata extraction (#22492, #22615)
|
||||||
|
* [pornhub:uservideos:upload] Fix extraction (#22619)
|
||||||
|
+ [telequebec:squat] Add support for squat.telequebec.tv (#18503)
|
||||||
|
- [wimp] Remove extractor (#22088, #22091)
|
||||||
|
+ [gfycat] Extend URL regular expression (#22225)
|
||||||
|
+ [chaturbate] Extend URL regular expression (#22309)
|
||||||
|
* [peertube] Update instances (#22414)
|
||||||
|
+ [telequebec] Add support for coucou.telequebec.tv (#22482)
|
||||||
|
+ [xvideos] Extend URL regular expression (#22471)
|
||||||
|
- [youtube] Remove support for invidious.enkirton.net (#22543)
|
||||||
|
+ [openload] Add support for oload.monster (#22592)
|
||||||
|
* [nrktv:seriebase] Fix extraction (#22596)
|
||||||
|
+ [youtube] Add support for yt.lelux.fi (#22597)
|
||||||
|
* [orf:tvthek] Make manifest requests non fatal (#22578)
|
||||||
|
* [teachable] Skip login when already logged in (#22572)
|
||||||
|
* [viewlift] Improve extraction (#22545)
|
||||||
|
* [nonktube] Fix extraction (#22544)
|
||||||
|
|
||||||
|
|
||||||
|
version 2019.09.28
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [YoutubeDL] Honour all --get-* options with --flat-playlist (#22493)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [vk] Fix extraction (#22522)
|
||||||
|
* [heise] Fix kaltura embeds extraction (#22514)
|
||||||
|
* [ted] Check for resources validity and extract subtitled downloads (#22513)
|
||||||
|
+ [youtube] Add support for
|
||||||
|
owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya.b32.i2p (#22292)
|
||||||
|
+ [nhk] Add support for clips
|
||||||
|
* [nhk] Fix video extraction (#22249, #22353)
|
||||||
|
* [byutv] Fix extraction (#22070)
|
||||||
|
+ [openload] Add support for oload.online (#22304)
|
||||||
|
+ [youtube] Add support for invidious.drycat.fr (#22451)
|
||||||
|
* [jwplatfom] Do not match video URLs (#20596, #22148)
|
||||||
|
* [youtube:playlist] Unescape playlist uploader (#22483)
|
||||||
|
+ [bilibili] Add support audio albums and songs (#21094)
|
||||||
|
+ [instagram] Add support for tv URLs
|
||||||
|
+ [mixcloud] Allow uppercase letters in format URLs (#19280)
|
||||||
|
* [brightcove] Delegate all supported legacy URLs to new extractor (#11523,
|
||||||
|
#12842, #13912, #15669, #16303)
|
||||||
|
* [hotstar] Use native HLS downloader by default
|
||||||
|
+ [hotstar] Extract more formats (#22323)
|
||||||
|
* [9now] Fix extraction (#22361)
|
||||||
|
* [zdf] Bypass geo restriction
|
||||||
|
+ [tv4] Extract series metadata
|
||||||
|
* [tv4] Fix extraction (#22443)
|
||||||
|
|
||||||
|
|
||||||
|
version 2019.09.12.1
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [youtube] Remove quality and tbr for itag 43 (#22372)
|
||||||
|
|
||||||
|
|
||||||
|
version 2019.09.12
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [youtube] Quick extraction tempfix (#22367, #22163)
|
||||||
|
|
||||||
|
|
||||||
|
version 2019.09.01
|
||||||
|
|
||||||
|
Core
|
||||||
|
+ [extractor/generic] Add support for squarespace embeds (#21294, #21802,
|
||||||
|
#21859)
|
||||||
|
+ [downloader/external] Respect mtime option for aria2c (#22242)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [xhamster:user] Add support for user pages (#16330, #18454)
|
||||||
|
+ [xhamster] Add support for more domains
|
||||||
|
+ [verystream] Add support for woof.tube (#22217)
|
||||||
|
+ [dailymotion] Add support for lequipe.fr (#21328, #22152)
|
||||||
|
+ [openload] Add support for oload.vip (#22205)
|
||||||
|
+ [bbccouk] Extend URL regular expression (#19200)
|
||||||
|
+ [youtube] Add support for invidious.nixnet.xyz and yt.elukerio.org (#22223)
|
||||||
|
* [safari] Fix authentication (#22161, #22184)
|
||||||
|
* [usanetwork] Fix extraction (#22105)
|
||||||
|
+ [einthusan] Add support for einthusan.ca (#22171)
|
||||||
|
* [youtube] Improve unavailable message extraction (#22117)
|
||||||
|
+ [piksel] Extract subtitles (#20506)
|
||||||
|
|
||||||
|
|
||||||
|
version 2019.08.13
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [downloader/fragment] Fix ETA calculation of resumed download (#21992)
|
||||||
|
* [YoutubeDL] Check annotations availability (#18582)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [youtube:playlist] Improve flat extraction (#21927)
|
||||||
|
* [youtube] Fix annotations extraction (#22045)
|
||||||
|
+ [discovery] Extract series meta field (#21808)
|
||||||
|
* [youtube] Improve error detection (#16445)
|
||||||
|
* [vimeo] Fix album extraction (#1933, #15704, #15855, #18967, #21986)
|
||||||
|
+ [roosterteeth] Add support for watch URLs
|
||||||
|
* [discovery] Limit video data by show slug (#21980)
|
||||||
|
|
||||||
|
|
||||||
|
version 2019.08.02
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [tvigle] Add support for HLS and DASH formats (#21967)
|
||||||
|
* [tvigle] Fix extraction (#21967)
|
||||||
|
+ [yandexvideo] Add support for DASH formats (#21971)
|
||||||
|
* [discovery] Use API call for video data extraction (#21808)
|
||||||
|
+ [mgtv] Extract format_note (#21881)
|
||||||
|
* [tvn24] Fix metadata extraction (#21833, #21834)
|
||||||
|
* [dlive] Relax URL regular expression (#21909)
|
||||||
|
+ [openload] Add support for oload.best (#21913)
|
||||||
|
* [youtube] Improve metadata extraction for age gate content (#21943)
|
||||||
|
|
||||||
|
|
||||||
|
version 2019.07.30
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [youtube] Fix and improve title and description extraction (#21934)
|
||||||
|
|
||||||
|
|
||||||
|
version 2019.07.27
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [yahoo:japannews] Add support for yahoo.co.jp (#21698, #21265)
|
||||||
|
+ [discovery] Add support go.discovery.com URLs
|
||||||
|
* [youtube:playlist] Relax video regular expression (#21844)
|
||||||
|
* [generic] Restrict --default-search schemeless URLs detection pattern
|
||||||
|
(#21842)
|
||||||
|
* [vrv] Fix CMS signing query extraction (#21809)
|
||||||
|
|
||||||
|
|
||||||
|
version 2019.07.16
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [asiancrush] Add support for yuyutv.com, midnightpulp.com and cocoro.tv
|
||||||
|
(#21281, #21290)
|
||||||
|
* [kaltura] Check source format URL (#21290)
|
||||||
|
* [ctsnews] Fix YouTube embeds extraction (#21678)
|
||||||
|
+ [einthusan] Add support for einthusan.com (#21748, #21775)
|
||||||
|
+ [youtube] Add support for invidious.mastodon.host (#21777)
|
||||||
|
+ [gfycat] Extend URL regular expression (#21779, #21780)
|
||||||
|
* [youtube] Restrict is_live extraction (#21782)
|
||||||
|
|
||||||
|
|
||||||
|
version 2019.07.14
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [porn91] Fix extraction (#21312)
|
||||||
|
+ [yandexmusic] Extract track number and disk number (#21421)
|
||||||
|
+ [yandexmusic] Add support for multi disk albums (#21420, #21421)
|
||||||
|
* [lynda] Handle missing subtitles (#20490, #20513)
|
||||||
|
+ [youtube] Add more invidious instances to URL regular expression (#21694)
|
||||||
|
* [twitter] Improve uploader id extraction (#21705)
|
||||||
|
* [spankbang] Fix and improve metadata extraction
|
||||||
|
* [spankbang] Fix extraction (#21763, #21764)
|
||||||
|
+ [dlive] Add support for dlive.tv (#18080)
|
||||||
|
+ [livejournal] Add support for livejournal.com (#21526)
|
||||||
|
* [roosterteeth] Fix free episode extraction (#16094)
|
||||||
|
* [dbtv] Fix extraction
|
||||||
|
* [bellator] Fix extraction
|
||||||
|
- [rudo] Remove extractor (#18430, #18474)
|
||||||
|
* [facebook] Fallback to twitter:image meta for thumbnail extraction (#21224)
|
||||||
|
* [bleacherreport] Fix Bleacher Report CMS extraction
|
||||||
|
* [espn] Fix fivethirtyeight.com extraction
|
||||||
|
* [5tv] Relax video URL regular expression and support https URLs
|
||||||
|
* [youtube] Fix is_live extraction (#21734)
|
||||||
|
* [youtube] Fix authentication (#11270)
|
||||||
|
|
||||||
|
|
||||||
|
version 2019.07.12
|
||||||
|
|
||||||
|
Core
|
||||||
|
+ [adobepass] Add support for AT&T U-verse (mso ATT) (#13938, #21016)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [mgtv] Pass Referer HTTP header for format URLs (#21726)
|
||||||
|
+ [beeg] Add support for api/v6 v2 URLs without t argument (#21701)
|
||||||
|
* [voxmedia:volume] Improvevox embed extraction (#16846)
|
||||||
|
* [funnyordie] Move extraction to VoxMedia extractor (#16846)
|
||||||
|
* [gameinformer] Fix extraction (#8895, #15363, #17206)
|
||||||
|
* [funk] Fix extraction (#17915)
|
||||||
|
* [packtpub] Relax lesson URL regular expression (#21695)
|
||||||
|
* [packtpub] Fix extraction (#21268)
|
||||||
|
* [philharmoniedeparis] Relax URL regular expression (#21672)
|
||||||
|
* [peertube] Detect embed URLs in generic extraction (#21666)
|
||||||
|
* [mixer:vod] Relax URL regular expression (#21657, #21658)
|
||||||
|
+ [lecturio] Add support id based URLs (#21630)
|
||||||
|
+ [go] Add site info for disneynow (#21613)
|
||||||
|
* [ted] Restrict info regular expression (#21631)
|
||||||
|
* [twitch:vod] Actualize m3u8 URL (#21538, #21607)
|
||||||
|
* [vzaar] Fix videos with empty title (#21606)
|
||||||
|
* [tvland] Fix extraction (#21384)
|
||||||
|
* [arte] Clean extractor (#15583, #21614)
|
||||||
|
|
||||||
|
|
||||||
version 2019.07.02
|
version 2019.07.02
|
||||||
|
|
||||||
Core
|
Core
|
||||||
@ -177,7 +1001,7 @@ Extractors
|
|||||||
version 2019.04.17
|
version 2019.04.17
|
||||||
|
|
||||||
Extractors
|
Extractors
|
||||||
* [openload] Randomize User-Agent (closes #20688)
|
* [openload] Randomize User-Agent (#20688)
|
||||||
+ [openload] Add support for oladblock domains (#20471)
|
+ [openload] Add support for oladblock domains (#20471)
|
||||||
* [adn] Fix subtitle extraction (#12724)
|
* [adn] Fix subtitle extraction (#12724)
|
||||||
+ [aol] Add support for localized websites
|
+ [aol] Add support for localized websites
|
||||||
@ -742,7 +1566,7 @@ Extractors
|
|||||||
+ [youtube] Extract channel meta fields (#9676, #12939)
|
+ [youtube] Extract channel meta fields (#9676, #12939)
|
||||||
* [porntube] Fix extraction (#17541)
|
* [porntube] Fix extraction (#17541)
|
||||||
* [asiancrush] Fix extraction (#15630)
|
* [asiancrush] Fix extraction (#15630)
|
||||||
+ [twitch:clips] Extend URL regular expression (closes #17559)
|
+ [twitch:clips] Extend URL regular expression (#17559)
|
||||||
+ [vzaar] Add support for HLS
|
+ [vzaar] Add support for HLS
|
||||||
* [tube8] Fix metadata extraction (#17520)
|
* [tube8] Fix metadata extraction (#17520)
|
||||||
* [eporner] Extract JSON-LD (#17519)
|
* [eporner] Extract JSON-LD (#17519)
|
||||||
|
84
README.md
84
README.md
@ -434,9 +434,9 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
|||||||
either the path to the binary or its
|
either the path to the binary or its
|
||||||
containing directory.
|
containing directory.
|
||||||
--exec CMD Execute a command on the file after
|
--exec CMD Execute a command on the file after
|
||||||
downloading, similar to find's -exec
|
downloading and post-processing, similar to
|
||||||
syntax. Example: --exec 'adb push {}
|
find's -exec syntax. Example: --exec 'adb
|
||||||
/sdcard/Music/ && rm {}'
|
push {} /sdcard/Music/ && rm {}'
|
||||||
--convert-subs FORMAT Convert the subtitles to other format
|
--convert-subs FORMAT Convert the subtitles to other format
|
||||||
(currently supported: srt|ass|vtt|lrc)
|
(currently supported: srt|ass|vtt|lrc)
|
||||||
|
|
||||||
@ -545,7 +545,7 @@ The basic usage is not to set any template arguments when downloading a single f
|
|||||||
- `extractor` (string): Name of the extractor
|
- `extractor` (string): Name of the extractor
|
||||||
- `extractor_key` (string): Key name of the extractor
|
- `extractor_key` (string): Key name of the extractor
|
||||||
- `epoch` (numeric): Unix epoch when creating the file
|
- `epoch` (numeric): Unix epoch when creating the file
|
||||||
- `autonumber` (numeric): Five-digit number that will be increased with each download, starting at zero
|
- `autonumber` (numeric): Number that will be increased with each download, starting at `--autonumber-start`
|
||||||
- `playlist` (string): Name or id of the playlist that contains the video
|
- `playlist` (string): Name or id of the playlist that contains the video
|
||||||
- `playlist_index` (numeric): Index of the video in the playlist padded with leading zeros according to the total length of the playlist
|
- `playlist_index` (numeric): Index of the video in the playlist padded with leading zeros according to the total length of the playlist
|
||||||
- `playlist_id` (string): Playlist identifier
|
- `playlist_id` (string): Playlist identifier
|
||||||
@ -752,8 +752,8 @@ As a last resort, you can also uninstall the version installed by your package m
|
|||||||
Afterwards, simply follow [our manual installation instructions](https://ytdl-org.github.io/youtube-dl/download.html):
|
Afterwards, simply follow [our manual installation instructions](https://ytdl-org.github.io/youtube-dl/download.html):
|
||||||
|
|
||||||
```
|
```
|
||||||
sudo wget https://yt-dl.org/latest/youtube-dl -O /usr/local/bin/youtube-dl
|
sudo wget https://yt-dl.org/downloads/latest/youtube-dl -O /usr/local/bin/youtube-dl
|
||||||
sudo chmod a+x /usr/local/bin/youtube-dl
|
sudo chmod a+rx /usr/local/bin/youtube-dl
|
||||||
hash -r
|
hash -r
|
||||||
```
|
```
|
||||||
|
|
||||||
@ -835,7 +835,9 @@ In February 2015, the new YouTube player contained a character sequence in a str
|
|||||||
|
|
||||||
### HTTP Error 429: Too Many Requests or 402: Payment Required
|
### HTTP Error 429: Too Many Requests or 402: Payment Required
|
||||||
|
|
||||||
These two error codes indicate that the service is blocking your IP address because of overuse. Contact the service and ask them to unblock your IP address, or - if you have acquired a whitelisted IP address already - use the [`--proxy` or `--source-address` options](#network-options) to select another IP address.
|
These two error codes indicate that the service is blocking your IP address because of overuse. Usually this is a soft block meaning that you can gain access again after solving CAPTCHA. Just open a browser and solve a CAPTCHA the service suggests you and after that [pass cookies](#how-do-i-pass-cookies-to-youtube-dl) to youtube-dl. Note that if your machine has multiple external IPs then you should also pass exactly the same IP you've used for solving CAPTCHA with [`--source-address`](#network-options). Also you may need to pass a `User-Agent` HTTP header of your browser with [`--user-agent`](#workarounds).
|
||||||
|
|
||||||
|
If this is not the case (no CAPTCHA suggested to solve by the service) then you can contact the service and ask them to unblock your IP address, or - if you have acquired a whitelisted IP address already - use the [`--proxy` or `--source-address` options](#network-options) to select another IP address.
|
||||||
|
|
||||||
### SyntaxError: Non-ASCII character
|
### SyntaxError: Non-ASCII character
|
||||||
|
|
||||||
@ -1030,7 +1032,7 @@ After you have ensured this site is distributing its content legally, you can fo
|
|||||||
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
|
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
|
||||||
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in.
|
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in.
|
||||||
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want.
|
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want.
|
||||||
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](http://flake8.pycqa.org/en/latest/index.html#quickstart):
|
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://flake8.pycqa.org/en/latest/index.html#quickstart):
|
||||||
|
|
||||||
$ flake8 youtube_dl/extractor/yourextractor.py
|
$ flake8 youtube_dl/extractor/yourextractor.py
|
||||||
|
|
||||||
@ -1216,6 +1218,72 @@ Incorrect:
|
|||||||
'PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4'
|
'PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4'
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Inline values
|
||||||
|
|
||||||
|
Extracting variables is acceptable for reducing code duplication and improving readability of complex expressions. However, you should avoid extracting variables used only once and moving them to opposite parts of the extractor file, which makes reading the linear flow difficult.
|
||||||
|
|
||||||
|
#### Example
|
||||||
|
|
||||||
|
Correct:
|
||||||
|
|
||||||
|
```python
|
||||||
|
title = self._html_search_regex(r'<title>([^<]+)</title>', webpage, 'title')
|
||||||
|
```
|
||||||
|
|
||||||
|
Incorrect:
|
||||||
|
|
||||||
|
```python
|
||||||
|
TITLE_RE = r'<title>([^<]+)</title>'
|
||||||
|
# ...some lines of code...
|
||||||
|
title = self._html_search_regex(TITLE_RE, webpage, 'title')
|
||||||
|
```
|
||||||
|
|
||||||
|
### Collapse fallbacks
|
||||||
|
|
||||||
|
Multiple fallback values can quickly become unwieldy. Collapse multiple fallback values into a single expression via a list of patterns.
|
||||||
|
|
||||||
|
#### Example
|
||||||
|
|
||||||
|
Good:
|
||||||
|
|
||||||
|
```python
|
||||||
|
description = self._html_search_meta(
|
||||||
|
['og:description', 'description', 'twitter:description'],
|
||||||
|
webpage, 'description', default=None)
|
||||||
|
```
|
||||||
|
|
||||||
|
Unwieldy:
|
||||||
|
|
||||||
|
```python
|
||||||
|
description = (
|
||||||
|
self._og_search_description(webpage, default=None)
|
||||||
|
or self._html_search_meta('description', webpage, default=None)
|
||||||
|
or self._html_search_meta('twitter:description', webpage, default=None))
|
||||||
|
```
|
||||||
|
|
||||||
|
Methods supporting list of patterns are: `_search_regex`, `_html_search_regex`, `_og_search_property`, `_html_search_meta`.
|
||||||
|
|
||||||
|
### Trailing parentheses
|
||||||
|
|
||||||
|
Always move trailing parentheses after the last argument.
|
||||||
|
|
||||||
|
#### Example
|
||||||
|
|
||||||
|
Correct:
|
||||||
|
|
||||||
|
```python
|
||||||
|
lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'],
|
||||||
|
list)
|
||||||
|
```
|
||||||
|
|
||||||
|
Incorrect:
|
||||||
|
|
||||||
|
```python
|
||||||
|
lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'],
|
||||||
|
list,
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
### Use convenience conversion and parsing functions
|
### Use convenience conversion and parsing functions
|
||||||
|
|
||||||
Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
|
Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
|
||||||
|
@ -1,7 +1,6 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import base64
|
|
||||||
import io
|
import io
|
||||||
import json
|
import json
|
||||||
import mimetypes
|
import mimetypes
|
||||||
@ -15,7 +14,6 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|||||||
|
|
||||||
from youtube_dl.compat import (
|
from youtube_dl.compat import (
|
||||||
compat_basestring,
|
compat_basestring,
|
||||||
compat_input,
|
|
||||||
compat_getpass,
|
compat_getpass,
|
||||||
compat_print,
|
compat_print,
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
@ -40,28 +38,20 @@ class GitHubReleaser(object):
|
|||||||
try:
|
try:
|
||||||
info = netrc.netrc().authenticators(self._NETRC_MACHINE)
|
info = netrc.netrc().authenticators(self._NETRC_MACHINE)
|
||||||
if info is not None:
|
if info is not None:
|
||||||
self._username = info[0]
|
self._token = info[2]
|
||||||
self._password = info[2]
|
|
||||||
compat_print('Using GitHub credentials found in .netrc...')
|
compat_print('Using GitHub credentials found in .netrc...')
|
||||||
return
|
return
|
||||||
else:
|
else:
|
||||||
compat_print('No GitHub credentials found in .netrc')
|
compat_print('No GitHub credentials found in .netrc')
|
||||||
except (IOError, netrc.NetrcParseError):
|
except (IOError, netrc.NetrcParseError):
|
||||||
compat_print('Unable to parse .netrc')
|
compat_print('Unable to parse .netrc')
|
||||||
self._username = compat_input(
|
self._token = compat_getpass(
|
||||||
'Type your GitHub username or email address and press [Return]: ')
|
'Type your GitHub PAT (personal access token) and press [Return]: ')
|
||||||
self._password = compat_getpass(
|
|
||||||
'Type your GitHub password and press [Return]: ')
|
|
||||||
|
|
||||||
def _call(self, req):
|
def _call(self, req):
|
||||||
if isinstance(req, compat_basestring):
|
if isinstance(req, compat_basestring):
|
||||||
req = sanitized_Request(req)
|
req = sanitized_Request(req)
|
||||||
# Authorizing manually since GitHub does not response with 401 with
|
req.add_header('Authorization', 'token %s' % self._token)
|
||||||
# WWW-Authenticate header set (see
|
|
||||||
# https://developer.github.com/v3/#basic-authentication)
|
|
||||||
b64 = base64.b64encode(
|
|
||||||
('%s:%s' % (self._username, self._password)).encode('utf-8')).decode('ascii')
|
|
||||||
req.add_header('Authorization', 'Basic %s' % b64)
|
|
||||||
response = self._opener.open(req).read().decode('utf-8')
|
response = self._opener.open(req).read().decode('utf-8')
|
||||||
return json.loads(response)
|
return json.loads(response)
|
||||||
|
|
||||||
|
@ -26,13 +26,13 @@
|
|||||||
- **AcademicEarth:Course**
|
- **AcademicEarth:Course**
|
||||||
- **acast**
|
- **acast**
|
||||||
- **acast:channel**
|
- **acast:channel**
|
||||||
- **AddAnime**
|
|
||||||
- **ADN**: Anime Digital Network
|
- **ADN**: Anime Digital Network
|
||||||
- **AdobeConnect**
|
- **AdobeConnect**
|
||||||
- **AdobeTV**
|
- **adobetv**
|
||||||
- **AdobeTVChannel**
|
- **adobetv:channel**
|
||||||
- **AdobeTVShow**
|
- **adobetv:embed**
|
||||||
- **AdobeTVVideo**
|
- **adobetv:show**
|
||||||
|
- **adobetv:video**
|
||||||
- **AdultSwim**
|
- **AdultSwim**
|
||||||
- **aenetworks**: A+E Networks: A&E, Lifetime, History.com, FYI Network and History Vault
|
- **aenetworks**: A+E Networks: A&E, Lifetime, History.com, FYI Network and History Vault
|
||||||
- **afreecatv**: afreecatv.com
|
- **afreecatv**: afreecatv.com
|
||||||
@ -58,16 +58,8 @@
|
|||||||
- **ARD:mediathek**
|
- **ARD:mediathek**
|
||||||
- **ARDBetaMediathek**
|
- **ARDBetaMediathek**
|
||||||
- **Arkena**
|
- **Arkena**
|
||||||
- **arte.tv**
|
|
||||||
- **arte.tv:+7**
|
- **arte.tv:+7**
|
||||||
- **arte.tv:cinema**
|
|
||||||
- **arte.tv:concert**
|
|
||||||
- **arte.tv:creative**
|
|
||||||
- **arte.tv:ddc**
|
|
||||||
- **arte.tv:embed**
|
- **arte.tv:embed**
|
||||||
- **arte.tv:future**
|
|
||||||
- **arte.tv:info**
|
|
||||||
- **arte.tv:magazine**
|
|
||||||
- **arte.tv:playlist**
|
- **arte.tv:playlist**
|
||||||
- **AsianCrush**
|
- **AsianCrush**
|
||||||
- **AsianCrushPlaylist**
|
- **AsianCrushPlaylist**
|
||||||
@ -84,8 +76,6 @@
|
|||||||
- **awaan:video**
|
- **awaan:video**
|
||||||
- **AZMedien**: AZ Medien videos
|
- **AZMedien**: AZ Medien videos
|
||||||
- **BaiduVideo**: 百度视频
|
- **BaiduVideo**: 百度视频
|
||||||
- **bambuser**
|
|
||||||
- **bambuser:channel**
|
|
||||||
- **Bandcamp**
|
- **Bandcamp**
|
||||||
- **Bandcamp:album**
|
- **Bandcamp:album**
|
||||||
- **Bandcamp:weekly**
|
- **Bandcamp:weekly**
|
||||||
@ -106,6 +96,9 @@
|
|||||||
- **Bigflix**
|
- **Bigflix**
|
||||||
- **Bild**: Bild.de
|
- **Bild**: Bild.de
|
||||||
- **BiliBili**
|
- **BiliBili**
|
||||||
|
- **BilibiliAudio**
|
||||||
|
- **BilibiliAudioAlbum**
|
||||||
|
- **BiliBiliPlayer**
|
||||||
- **BioBioChileTV**
|
- **BioBioChileTV**
|
||||||
- **BIQLE**
|
- **BIQLE**
|
||||||
- **BitChute**
|
- **BitChute**
|
||||||
@ -183,12 +176,12 @@
|
|||||||
- **CNN**
|
- **CNN**
|
||||||
- **CNNArticle**
|
- **CNNArticle**
|
||||||
- **CNNBlogs**
|
- **CNNBlogs**
|
||||||
- **ComCarCoff**
|
|
||||||
- **ComedyCentral**
|
- **ComedyCentral**
|
||||||
- **ComedyCentralFullEpisodes**
|
- **ComedyCentralFullEpisodes**
|
||||||
- **ComedyCentralShortname**
|
- **ComedyCentralShortname**
|
||||||
- **ComedyCentralTV**
|
- **ComedyCentralTV**
|
||||||
- **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED
|
- **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED
|
||||||
|
- **CONtv**
|
||||||
- **Corus**
|
- **Corus**
|
||||||
- **Coub**
|
- **Coub**
|
||||||
- **Cracked**
|
- **Cracked**
|
||||||
@ -210,8 +203,6 @@
|
|||||||
- **dailymotion**
|
- **dailymotion**
|
||||||
- **dailymotion:playlist**
|
- **dailymotion:playlist**
|
||||||
- **dailymotion:user**
|
- **dailymotion:user**
|
||||||
- **DaisukiMotto**
|
|
||||||
- **DaisukiMottoPlaylist**
|
|
||||||
- **daum.net**
|
- **daum.net**
|
||||||
- **daum.net:clip**
|
- **daum.net:clip**
|
||||||
- **daum.net:playlist**
|
- **daum.net:playlist**
|
||||||
@ -231,11 +222,12 @@
|
|||||||
- **DiscoveryNetworksDe**
|
- **DiscoveryNetworksDe**
|
||||||
- **DiscoveryVR**
|
- **DiscoveryVR**
|
||||||
- **Disney**
|
- **Disney**
|
||||||
|
- **dlive:stream**
|
||||||
|
- **dlive:vod**
|
||||||
- **Dotsub**
|
- **Dotsub**
|
||||||
- **DouyuShow**
|
- **DouyuShow**
|
||||||
- **DouyuTV**: 斗鱼
|
- **DouyuTV**: 斗鱼
|
||||||
- **DPlay**
|
- **DPlay**
|
||||||
- **DPlayIt**
|
|
||||||
- **DRBonanza**
|
- **DRBonanza**
|
||||||
- **Dropbox**
|
- **Dropbox**
|
||||||
- **DrTuber**
|
- **DrTuber**
|
||||||
@ -288,12 +280,12 @@
|
|||||||
- **FiveThirtyEight**
|
- **FiveThirtyEight**
|
||||||
- **FiveTV**
|
- **FiveTV**
|
||||||
- **Flickr**
|
- **Flickr**
|
||||||
- **Flipagram**
|
|
||||||
- **Folketinget**: Folketinget (ft.dk; Danish parliament)
|
- **Folketinget**: Folketinget (ft.dk; Danish parliament)
|
||||||
- **FootyRoom**
|
- **FootyRoom**
|
||||||
- **Formula1**
|
- **Formula1**
|
||||||
- **FOX**
|
- **FOX**
|
||||||
- **FOX9**
|
- **FOX9**
|
||||||
|
- **FOX9News**
|
||||||
- **Foxgay**
|
- **Foxgay**
|
||||||
- **foxnews**: Fox News and Fox Business Video
|
- **foxnews**: Fox News and Fox Business Video
|
||||||
- **foxnews:article**
|
- **foxnews:article**
|
||||||
@ -313,16 +305,12 @@
|
|||||||
- **FrontendMastersCourse**
|
- **FrontendMastersCourse**
|
||||||
- **FrontendMastersLesson**
|
- **FrontendMastersLesson**
|
||||||
- **Funimation**
|
- **Funimation**
|
||||||
- **FunkChannel**
|
- **Funk**
|
||||||
- **FunkMix**
|
|
||||||
- **FunnyOrDie**
|
|
||||||
- **Fusion**
|
- **Fusion**
|
||||||
- **Fux**
|
- **Fux**
|
||||||
- **FXNetworks**
|
- **FXNetworks**
|
||||||
- **Gaia**
|
- **Gaia**
|
||||||
- **GameInformer**
|
- **GameInformer**
|
||||||
- **GameOne**
|
|
||||||
- **gameone:playlist**
|
|
||||||
- **GameSpot**
|
- **GameSpot**
|
||||||
- **GameStar**
|
- **GameStar**
|
||||||
- **Gaskrank**
|
- **Gaskrank**
|
||||||
@ -337,14 +325,12 @@
|
|||||||
- **Globo**
|
- **Globo**
|
||||||
- **GloboArticle**
|
- **GloboArticle**
|
||||||
- **Go**
|
- **Go**
|
||||||
- **Go90**
|
|
||||||
- **GodTube**
|
- **GodTube**
|
||||||
- **Golem**
|
- **Golem**
|
||||||
- **GoogleDrive**
|
- **GoogleDrive**
|
||||||
- **Goshgay**
|
- **Goshgay**
|
||||||
- **GPUTechConf**
|
- **GPUTechConf**
|
||||||
- **Groupon**
|
- **Groupon**
|
||||||
- **Hark**
|
|
||||||
- **hbo**
|
- **hbo**
|
||||||
- **HearThisAt**
|
- **HearThisAt**
|
||||||
- **Heise**
|
- **Heise**
|
||||||
@ -373,7 +359,6 @@
|
|||||||
- **Hungama**
|
- **Hungama**
|
||||||
- **HungamaSong**
|
- **HungamaSong**
|
||||||
- **Hypem**
|
- **Hypem**
|
||||||
- **Iconosquare**
|
|
||||||
- **ign.com**
|
- **ign.com**
|
||||||
- **imdb**: Internet Movie Database trailers
|
- **imdb**: Internet Movie Database trailers
|
||||||
- **imdb:list**: Internet Movie Database lists
|
- **imdb:list**: Internet Movie Database lists
|
||||||
@ -405,7 +390,6 @@
|
|||||||
- **JeuxVideo**
|
- **JeuxVideo**
|
||||||
- **Joj**
|
- **Joj**
|
||||||
- **Jove**
|
- **Jove**
|
||||||
- **jpopsuki.tv**
|
|
||||||
- **JWPlatform**
|
- **JWPlatform**
|
||||||
- **Kakao**
|
- **Kakao**
|
||||||
- **Kaltura**
|
- **Kaltura**
|
||||||
@ -413,14 +397,14 @@
|
|||||||
- **Kankan**
|
- **Kankan**
|
||||||
- **Karaoketv**
|
- **Karaoketv**
|
||||||
- **KarriereVideos**
|
- **KarriereVideos**
|
||||||
- **keek**
|
- **Katsomo**
|
||||||
- **KeezMovies**
|
- **KeezMovies**
|
||||||
- **Ketnet**
|
- **Ketnet**
|
||||||
- **KhanAcademy**
|
- **KhanAcademy**
|
||||||
- **KickStarter**
|
- **KickStarter**
|
||||||
|
- **KinjaEmbed**
|
||||||
- **KinoPoisk**
|
- **KinoPoisk**
|
||||||
- **KonserthusetPlay**
|
- **KonserthusetPlay**
|
||||||
- **kontrtube**: KontrTube.ru - Труба зовёт
|
|
||||||
- **KrasView**: Красвью
|
- **KrasView**: Красвью
|
||||||
- **Ku6**
|
- **Ku6**
|
||||||
- **KUSI**
|
- **KUSI**
|
||||||
@ -437,7 +421,6 @@
|
|||||||
- **Lcp**
|
- **Lcp**
|
||||||
- **LcpPlay**
|
- **LcpPlay**
|
||||||
- **Le**: 乐视网
|
- **Le**: 乐视网
|
||||||
- **Learnr**
|
|
||||||
- **Lecture2Go**
|
- **Lecture2Go**
|
||||||
- **Lecturio**
|
- **Lecturio**
|
||||||
- **LecturioCourse**
|
- **LecturioCourse**
|
||||||
@ -458,6 +441,7 @@
|
|||||||
- **linkedin:learning:course**
|
- **linkedin:learning:course**
|
||||||
- **LinuxAcademy**
|
- **LinuxAcademy**
|
||||||
- **LiTV**
|
- **LiTV**
|
||||||
|
- **LiveJournal**
|
||||||
- **LiveLeak**
|
- **LiveLeak**
|
||||||
- **LiveLeakEmbed**
|
- **LiveLeakEmbed**
|
||||||
- **livestream**
|
- **livestream**
|
||||||
@ -470,11 +454,9 @@
|
|||||||
- **lynda**: lynda.com videos
|
- **lynda**: lynda.com videos
|
||||||
- **lynda:course**: lynda.com online courses
|
- **lynda:course**: lynda.com online courses
|
||||||
- **m6**
|
- **m6**
|
||||||
- **macgamestore**: MacGameStore trailers
|
|
||||||
- **mailru**: Видео@Mail.Ru
|
- **mailru**: Видео@Mail.Ru
|
||||||
- **mailru:music**: Музыка@Mail.Ru
|
- **mailru:music**: Музыка@Mail.Ru
|
||||||
- **mailru:music:search**: Музыка@Mail.Ru
|
- **mailru:music:search**: Музыка@Mail.Ru
|
||||||
- **MakerTV**
|
|
||||||
- **MallTV**
|
- **MallTV**
|
||||||
- **mangomolo:live**
|
- **mangomolo:live**
|
||||||
- **mangomolo:video**
|
- **mangomolo:video**
|
||||||
@ -501,14 +483,12 @@
|
|||||||
- **Mgoon**
|
- **Mgoon**
|
||||||
- **MGTV**: 芒果TV
|
- **MGTV**: 芒果TV
|
||||||
- **MiaoPai**
|
- **MiaoPai**
|
||||||
- **Minhateca**
|
|
||||||
- **MinistryGrid**
|
- **MinistryGrid**
|
||||||
- **Minoto**
|
- **Minoto**
|
||||||
- **miomio.tv**
|
- **miomio.tv**
|
||||||
- **MiTele**: mitele.es
|
- **MiTele**: mitele.es
|
||||||
- **mixcloud**
|
- **mixcloud**
|
||||||
- **mixcloud:playlist**
|
- **mixcloud:playlist**
|
||||||
- **mixcloud:stream**
|
|
||||||
- **mixcloud:user**
|
- **mixcloud:user**
|
||||||
- **Mixer:live**
|
- **Mixer:live**
|
||||||
- **Mixer:vod**
|
- **Mixer:vod**
|
||||||
@ -517,6 +497,7 @@
|
|||||||
- **MNetTV**
|
- **MNetTV**
|
||||||
- **MoeVideo**: LetitBit video services: moevideo.net, playreplay.net and videochart.net
|
- **MoeVideo**: LetitBit video services: moevideo.net, playreplay.net and videochart.net
|
||||||
- **Mofosex**
|
- **Mofosex**
|
||||||
|
- **MofosexEmbed**
|
||||||
- **Mojvideo**
|
- **Mojvideo**
|
||||||
- **Morningstar**: morningstar.com
|
- **Morningstar**: morningstar.com
|
||||||
- **Motherless**
|
- **Motherless**
|
||||||
@ -530,11 +511,10 @@
|
|||||||
- **mtg**: MTG services
|
- **mtg**: MTG services
|
||||||
- **mtv**
|
- **mtv**
|
||||||
- **mtv.de**
|
- **mtv.de**
|
||||||
- **mtv81**
|
|
||||||
- **mtv:video**
|
- **mtv:video**
|
||||||
|
- **mtvjapan**
|
||||||
- **mtvservices:embedded**
|
- **mtvservices:embedded**
|
||||||
- **MuenchenTV**: münchen.tv
|
- **MuenchenTV**: münchen.tv
|
||||||
- **MusicPlayOn**
|
|
||||||
- **mva**: Microsoft Virtual Academy videos
|
- **mva**: Microsoft Virtual Academy videos
|
||||||
- **mva:course**: Microsoft Virtual Academy courses
|
- **mva:course**: Microsoft Virtual Academy courses
|
||||||
- **Mwave**
|
- **Mwave**
|
||||||
@ -639,18 +619,26 @@
|
|||||||
- **OnionStudios**
|
- **OnionStudios**
|
||||||
- **Ooyala**
|
- **Ooyala**
|
||||||
- **OoyalaExternal**
|
- **OoyalaExternal**
|
||||||
- **Openload**
|
|
||||||
- **OraTV**
|
- **OraTV**
|
||||||
|
- **orf:burgenland**: Radio Burgenland
|
||||||
- **orf:fm4**: radio FM4
|
- **orf:fm4**: radio FM4
|
||||||
- **orf:fm4:story**: fm4.orf.at stories
|
- **orf:fm4:story**: fm4.orf.at stories
|
||||||
- **orf:iptv**: iptv.ORF.at
|
- **orf:iptv**: iptv.ORF.at
|
||||||
|
- **orf:kaernten**: Radio Kärnten
|
||||||
|
- **orf:noe**: Radio Niederösterreich
|
||||||
|
- **orf:oberoesterreich**: Radio Oberösterreich
|
||||||
- **orf:oe1**: Radio Österreich 1
|
- **orf:oe1**: Radio Österreich 1
|
||||||
|
- **orf:oe3**: Radio Österreich 3
|
||||||
|
- **orf:salzburg**: Radio Salzburg
|
||||||
|
- **orf:steiermark**: Radio Steiermark
|
||||||
|
- **orf:tirol**: Radio Tirol
|
||||||
- **orf:tvthek**: ORF TVthek
|
- **orf:tvthek**: ORF TVthek
|
||||||
|
- **orf:vorarlberg**: Radio Vorarlberg
|
||||||
|
- **orf:wien**: Radio Wien
|
||||||
- **OsnatelTV**
|
- **OsnatelTV**
|
||||||
- **OutsideTV**
|
- **OutsideTV**
|
||||||
- **PacktPub**
|
- **PacktPub**
|
||||||
- **PacktPubCourse**
|
- **PacktPubCourse**
|
||||||
- **PandaTV**: 熊猫TV
|
|
||||||
- **pandora.tv**: 판도라TV
|
- **pandora.tv**: 판도라TV
|
||||||
- **ParamountNetwork**
|
- **ParamountNetwork**
|
||||||
- **parliamentlive.tv**: UK parliament videos
|
- **parliamentlive.tv**: UK parliament videos
|
||||||
@ -686,6 +674,7 @@
|
|||||||
- **Pokemon**
|
- **Pokemon**
|
||||||
- **PolskieRadio**
|
- **PolskieRadio**
|
||||||
- **PolskieRadioCategory**
|
- **PolskieRadioCategory**
|
||||||
|
- **Popcorntimes**
|
||||||
- **PopcornTV**
|
- **PopcornTV**
|
||||||
- **PornCom**
|
- **PornCom**
|
||||||
- **PornerBros**
|
- **PornerBros**
|
||||||
@ -699,7 +688,6 @@
|
|||||||
- **PornoXO**
|
- **PornoXO**
|
||||||
- **PornTube**
|
- **PornTube**
|
||||||
- **PressTV**
|
- **PressTV**
|
||||||
- **PromptFile**
|
|
||||||
- **prosiebensat1**: ProSiebenSat.1 Digital
|
- **prosiebensat1**: ProSiebenSat.1 Digital
|
||||||
- **puhutv**
|
- **puhutv**
|
||||||
- **puhutv:serie**
|
- **puhutv:serie**
|
||||||
@ -729,6 +717,8 @@
|
|||||||
- **RayWenderlichCourse**
|
- **RayWenderlichCourse**
|
||||||
- **RBMARadio**
|
- **RBMARadio**
|
||||||
- **RDS**: RDS.ca
|
- **RDS**: RDS.ca
|
||||||
|
- **RedBull**
|
||||||
|
- **RedBullEmbed**
|
||||||
- **RedBullTV**
|
- **RedBullTV**
|
||||||
- **RedBullTVRrnContent**
|
- **RedBullTVRrnContent**
|
||||||
- **Reddit**
|
- **Reddit**
|
||||||
@ -740,8 +730,6 @@
|
|||||||
- **Restudy**
|
- **Restudy**
|
||||||
- **Reuters**
|
- **Reuters**
|
||||||
- **ReverbNation**
|
- **ReverbNation**
|
||||||
- **revision**
|
|
||||||
- **revision3:embed**
|
|
||||||
- **RICE**
|
- **RICE**
|
||||||
- **RMCDecouverte**
|
- **RMCDecouverte**
|
||||||
- **RockstarGames**
|
- **RockstarGames**
|
||||||
@ -764,7 +752,6 @@
|
|||||||
- **rtve.es:television**
|
- **rtve.es:television**
|
||||||
- **RTVNH**
|
- **RTVNH**
|
||||||
- **RTVS**
|
- **RTVS**
|
||||||
- **Rudo**
|
|
||||||
- **RUHD**
|
- **RUHD**
|
||||||
- **rutube**: Rutube videos
|
- **rutube**: Rutube videos
|
||||||
- **rutube:channel**: Rutube channels
|
- **rutube:channel**: Rutube channels
|
||||||
@ -787,11 +774,13 @@
|
|||||||
- **screen.yahoo:search**: Yahoo screen search
|
- **screen.yahoo:search**: Yahoo screen search
|
||||||
- **Screencast**
|
- **Screencast**
|
||||||
- **ScreencastOMatic**
|
- **ScreencastOMatic**
|
||||||
|
- **ScrippsNetworks**
|
||||||
- **scrippsnetworks:watch**
|
- **scrippsnetworks:watch**
|
||||||
|
- **SCTE**
|
||||||
|
- **SCTECourse**
|
||||||
- **Seeker**
|
- **Seeker**
|
||||||
- **SenateISVP**
|
- **SenateISVP**
|
||||||
- **SendtoNews**
|
- **SendtoNews**
|
||||||
- **ServingSys**
|
|
||||||
- **Servus**
|
- **Servus**
|
||||||
- **Sexu**
|
- **Sexu**
|
||||||
- **SeznamZpravy**
|
- **SeznamZpravy**
|
||||||
@ -822,6 +811,7 @@
|
|||||||
- **soundcloud:set**
|
- **soundcloud:set**
|
||||||
- **soundcloud:trackstation**
|
- **soundcloud:trackstation**
|
||||||
- **soundcloud:user**
|
- **soundcloud:user**
|
||||||
|
- **SoundcloudEmbed**
|
||||||
- **soundgasm**
|
- **soundgasm**
|
||||||
- **soundgasm:profile**
|
- **soundgasm:profile**
|
||||||
- **southpark.cc.com**
|
- **southpark.cc.com**
|
||||||
@ -848,7 +838,6 @@
|
|||||||
- **Steam**
|
- **Steam**
|
||||||
- **Stitcher**
|
- **Stitcher**
|
||||||
- **Streamable**
|
- **Streamable**
|
||||||
- **Streamango**
|
|
||||||
- **streamcloud.eu**
|
- **streamcloud.eu**
|
||||||
- **StreamCZ**
|
- **StreamCZ**
|
||||||
- **StreetVoice**
|
- **StreetVoice**
|
||||||
@ -890,13 +879,14 @@
|
|||||||
- **TeleQuebec**
|
- **TeleQuebec**
|
||||||
- **TeleQuebecEmission**
|
- **TeleQuebecEmission**
|
||||||
- **TeleQuebecLive**
|
- **TeleQuebecLive**
|
||||||
|
- **TeleQuebecSquat**
|
||||||
- **TeleTask**
|
- **TeleTask**
|
||||||
- **Telewebion**
|
- **Telewebion**
|
||||||
- **TennisTV**
|
- **TennisTV**
|
||||||
|
- **TenPlay**
|
||||||
- **TF1**
|
- **TF1**
|
||||||
- **TFO**
|
- **TFO**
|
||||||
- **TheIntercept**
|
- **TheIntercept**
|
||||||
- **theoperaplatform**
|
|
||||||
- **ThePlatform**
|
- **ThePlatform**
|
||||||
- **ThePlatformFeed**
|
- **ThePlatformFeed**
|
||||||
- **TheScene**
|
- **TheScene**
|
||||||
@ -932,11 +922,12 @@
|
|||||||
- **tunein:topic**
|
- **tunein:topic**
|
||||||
- **TunePk**
|
- **TunePk**
|
||||||
- **Turbo**
|
- **Turbo**
|
||||||
- **Tutv**
|
|
||||||
- **tv.dfb.de**
|
- **tv.dfb.de**
|
||||||
- **TV2**
|
- **TV2**
|
||||||
- **tv2.hu**
|
- **tv2.hu**
|
||||||
- **TV2Article**
|
- **TV2Article**
|
||||||
|
- **TV2DK**
|
||||||
|
- **TV2DKBornholmPlay**
|
||||||
- **TV4**: tv4.se and tv4play.se
|
- **TV4**: tv4.se and tv4play.se
|
||||||
- **TV5MondePlus**: TV5MONDE+
|
- **TV5MondePlus**: TV5MONDE+
|
||||||
- **TVA**
|
- **TVA**
|
||||||
@ -961,22 +952,21 @@
|
|||||||
- **TVPlayHome**
|
- **TVPlayHome**
|
||||||
- **Tweakers**
|
- **Tweakers**
|
||||||
- **TwitCasting**
|
- **TwitCasting**
|
||||||
- **twitch:chapter**
|
|
||||||
- **twitch:clips**
|
- **twitch:clips**
|
||||||
- **twitch:profile**
|
|
||||||
- **twitch:stream**
|
- **twitch:stream**
|
||||||
- **twitch:video**
|
|
||||||
- **twitch:videos:all**
|
|
||||||
- **twitch:videos:highlights**
|
|
||||||
- **twitch:videos:past-broadcasts**
|
|
||||||
- **twitch:videos:uploads**
|
|
||||||
- **twitch:vod**
|
- **twitch:vod**
|
||||||
|
- **TwitchCollection**
|
||||||
|
- **TwitchVideos**
|
||||||
|
- **TwitchVideosClips**
|
||||||
|
- **TwitchVideosCollections**
|
||||||
- **twitter**
|
- **twitter**
|
||||||
- **twitter:amplify**
|
- **twitter:amplify**
|
||||||
|
- **twitter:broadcast**
|
||||||
- **twitter:card**
|
- **twitter:card**
|
||||||
- **udemy**
|
- **udemy**
|
||||||
- **udemy:course**
|
- **udemy:course**
|
||||||
- **UDNEmbed**: 聯合影音
|
- **UDNEmbed**: 聯合影音
|
||||||
|
- **UFCArabia**
|
||||||
- **UFCTV**
|
- **UFCTV**
|
||||||
- **UKTVPlay**
|
- **UKTVPlay**
|
||||||
- **umg:de**: Universal Music Deutschland
|
- **umg:de**: Universal Music Deutschland
|
||||||
@ -997,8 +987,6 @@
|
|||||||
- **Vbox7**
|
- **Vbox7**
|
||||||
- **VeeHD**
|
- **VeeHD**
|
||||||
- **Veoh**
|
- **Veoh**
|
||||||
- **verystream**
|
|
||||||
- **Vessel**
|
|
||||||
- **Vesti**: Вести.Ru
|
- **Vesti**: Вести.Ru
|
||||||
- **Vevo**
|
- **Vevo**
|
||||||
- **VevoPlaylist**
|
- **VevoPlaylist**
|
||||||
@ -1013,13 +1001,11 @@
|
|||||||
- **Viddler**
|
- **Viddler**
|
||||||
- **Videa**
|
- **Videa**
|
||||||
- **video.google:search**: Google Video search
|
- **video.google:search**: Google Video search
|
||||||
- **video.mit.edu**
|
|
||||||
- **VideoDetective**
|
- **VideoDetective**
|
||||||
- **videofy.me**
|
- **videofy.me**
|
||||||
- **videomore**
|
- **videomore**
|
||||||
- **videomore:season**
|
- **videomore:season**
|
||||||
- **videomore:video**
|
- **videomore:video**
|
||||||
- **VideoPremium**
|
|
||||||
- **VideoPress**
|
- **VideoPress**
|
||||||
- **Vidio**
|
- **Vidio**
|
||||||
- **VidLii**
|
- **VidLii**
|
||||||
@ -1029,9 +1015,8 @@
|
|||||||
- **Vidzi**
|
- **Vidzi**
|
||||||
- **vier**: vier.be and vijf.be
|
- **vier**: vier.be and vijf.be
|
||||||
- **vier:videos**
|
- **vier:videos**
|
||||||
- **ViewLift**
|
- **viewlift**
|
||||||
- **ViewLiftEmbed**
|
- **viewlift:embed**
|
||||||
- **Viewster**
|
|
||||||
- **Viidea**
|
- **Viidea**
|
||||||
- **viki**
|
- **viki**
|
||||||
- **viki:channel**
|
- **viki:channel**
|
||||||
@ -1097,7 +1082,6 @@
|
|||||||
- **Weibo**
|
- **Weibo**
|
||||||
- **WeiboMobile**
|
- **WeiboMobile**
|
||||||
- **WeiqiTV**: WQTV
|
- **WeiqiTV**: WQTV
|
||||||
- **Wimp**
|
|
||||||
- **Wistia**
|
- **Wistia**
|
||||||
- **wnl**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
|
- **wnl**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
|
||||||
- **WorldStarHipHop**
|
- **WorldStarHipHop**
|
||||||
@ -1106,9 +1090,10 @@
|
|||||||
- **WWE**
|
- **WWE**
|
||||||
- **XBef**
|
- **XBef**
|
||||||
- **XboxClips**
|
- **XboxClips**
|
||||||
- **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE, Vid ABC, VidBom, vidlo, RapidVideo.TV, FastVideo.me
|
- **XFileShare**: XFileShare based sites: ClipWatching, GoUnlimited, GoVid, HolaVid, Streamty, TheVideoBee, Uqload, VidBom, vidlo, VidLocker, VidShare, VUp, XVideoSharing
|
||||||
- **XHamster**
|
- **XHamster**
|
||||||
- **XHamsterEmbed**
|
- **XHamsterEmbed**
|
||||||
|
- **XHamsterUser**
|
||||||
- **xiami:album**: 虾米音乐 - 专辑
|
- **xiami:album**: 虾米音乐 - 专辑
|
||||||
- **xiami:artist**: 虾米音乐 - 歌手
|
- **xiami:artist**: 虾米音乐 - 歌手
|
||||||
- **xiami:collection**: 虾米音乐 - 精选集
|
- **xiami:collection**: 虾米音乐 - 精选集
|
||||||
@ -1126,6 +1111,7 @@
|
|||||||
- **Yahoo**: Yahoo screen and movies
|
- **Yahoo**: Yahoo screen and movies
|
||||||
- **yahoo:gyao**
|
- **yahoo:gyao**
|
||||||
- **yahoo:gyao:player**
|
- **yahoo:gyao:player**
|
||||||
|
- **yahoo:japannews**: Yahoo! Japan News
|
||||||
- **YandexDisk**
|
- **YandexDisk**
|
||||||
- **yandexmusic:album**: Яндекс.Музыка - Альбом
|
- **yandexmusic:album**: Яндекс.Музыка - Альбом
|
||||||
- **yandexmusic:playlist**: Яндекс.Музыка - Плейлист
|
- **yandexmusic:playlist**: Яндекс.Музыка - Плейлист
|
||||||
|
@ -816,11 +816,15 @@ class TestYoutubeDL(unittest.TestCase):
|
|||||||
'webpage_url': 'http://example.com',
|
'webpage_url': 'http://example.com',
|
||||||
}
|
}
|
||||||
|
|
||||||
def get_ids(params):
|
def get_downloaded_info_dicts(params):
|
||||||
ydl = YDL(params)
|
ydl = YDL(params)
|
||||||
# make a copy because the dictionary can be modified
|
# make a deep copy because the dictionary and nested entries
|
||||||
ydl.process_ie_result(playlist.copy())
|
# can be modified
|
||||||
return [int(v['id']) for v in ydl.downloaded_info_dicts]
|
ydl.process_ie_result(copy.deepcopy(playlist))
|
||||||
|
return ydl.downloaded_info_dicts
|
||||||
|
|
||||||
|
def get_ids(params):
|
||||||
|
return [int(v['id']) for v in get_downloaded_info_dicts(params)]
|
||||||
|
|
||||||
result = get_ids({})
|
result = get_ids({})
|
||||||
self.assertEqual(result, [1, 2, 3, 4])
|
self.assertEqual(result, [1, 2, 3, 4])
|
||||||
@ -852,6 +856,22 @@ class TestYoutubeDL(unittest.TestCase):
|
|||||||
result = get_ids({'playlist_items': '2-4,3-4,3'})
|
result = get_ids({'playlist_items': '2-4,3-4,3'})
|
||||||
self.assertEqual(result, [2, 3, 4])
|
self.assertEqual(result, [2, 3, 4])
|
||||||
|
|
||||||
|
# Tests for https://github.com/ytdl-org/youtube-dl/issues/10591
|
||||||
|
# @{
|
||||||
|
result = get_downloaded_info_dicts({'playlist_items': '2-4,3-4,3'})
|
||||||
|
self.assertEqual(result[0]['playlist_index'], 2)
|
||||||
|
self.assertEqual(result[1]['playlist_index'], 3)
|
||||||
|
|
||||||
|
result = get_downloaded_info_dicts({'playlist_items': '2-4,3-4,3'})
|
||||||
|
self.assertEqual(result[0]['playlist_index'], 2)
|
||||||
|
self.assertEqual(result[1]['playlist_index'], 3)
|
||||||
|
self.assertEqual(result[2]['playlist_index'], 4)
|
||||||
|
|
||||||
|
result = get_downloaded_info_dicts({'playlist_items': '4,2'})
|
||||||
|
self.assertEqual(result[0]['playlist_index'], 4)
|
||||||
|
self.assertEqual(result[1]['playlist_index'], 2)
|
||||||
|
# @}
|
||||||
|
|
||||||
def test_urlopen_no_file_protocol(self):
|
def test_urlopen_no_file_protocol(self):
|
||||||
# see https://github.com/ytdl-org/youtube-dl/issues/8227
|
# see https://github.com/ytdl-org/youtube-dl/issues/8227
|
||||||
ydl = YDL()
|
ydl = YDL()
|
||||||
|
@ -39,6 +39,13 @@ class TestYoutubeDLCookieJar(unittest.TestCase):
|
|||||||
assert_cookie_has_value('HTTPONLY_COOKIE')
|
assert_cookie_has_value('HTTPONLY_COOKIE')
|
||||||
assert_cookie_has_value('JS_ACCESSIBLE_COOKIE')
|
assert_cookie_has_value('JS_ACCESSIBLE_COOKIE')
|
||||||
|
|
||||||
|
def test_malformed_cookies(self):
|
||||||
|
cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/malformed_cookies.txt')
|
||||||
|
cookiejar.load(ignore_discard=True, ignore_expires=True)
|
||||||
|
# Cookies should be empty since all malformed cookie file entries
|
||||||
|
# will be ignored
|
||||||
|
self.assertFalse(cookiejar._cookies)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@ -123,12 +123,6 @@ class TestAllURLsMatching(unittest.TestCase):
|
|||||||
self.assertMatch('http://video.pbs.org/viralplayer/2365173446/', ['pbs'])
|
self.assertMatch('http://video.pbs.org/viralplayer/2365173446/', ['pbs'])
|
||||||
self.assertMatch('http://video.pbs.org/widget/partnerplayer/980042464/', ['pbs'])
|
self.assertMatch('http://video.pbs.org/widget/partnerplayer/980042464/', ['pbs'])
|
||||||
|
|
||||||
def test_yahoo_https(self):
|
|
||||||
# https://github.com/ytdl-org/youtube-dl/issues/2701
|
|
||||||
self.assertMatch(
|
|
||||||
'https://screen.yahoo.com/smartwatches-latest-wearable-gadgets-163745379-cbs.html',
|
|
||||||
['Yahoo'])
|
|
||||||
|
|
||||||
def test_no_duplicated_ie_names(self):
|
def test_no_duplicated_ie_names(self):
|
||||||
name_accu = collections.defaultdict(list)
|
name_accu = collections.defaultdict(list)
|
||||||
for ie in self.ies:
|
for ie in self.ies:
|
||||||
|
@ -26,7 +26,6 @@ from youtube_dl.extractor import (
|
|||||||
ThePlatformIE,
|
ThePlatformIE,
|
||||||
ThePlatformFeedIE,
|
ThePlatformFeedIE,
|
||||||
RTVEALaCartaIE,
|
RTVEALaCartaIE,
|
||||||
FunnyOrDieIE,
|
|
||||||
DemocracynowIE,
|
DemocracynowIE,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -322,18 +321,6 @@ class TestRtveSubtitles(BaseTestSubtitles):
|
|||||||
self.assertEqual(md5(subtitles['es']), '69e70cae2d40574fb7316f31d6eb7fca')
|
self.assertEqual(md5(subtitles['es']), '69e70cae2d40574fb7316f31d6eb7fca')
|
||||||
|
|
||||||
|
|
||||||
class TestFunnyOrDieSubtitles(BaseTestSubtitles):
|
|
||||||
url = 'http://www.funnyordie.com/videos/224829ff6d/judd-apatow-will-direct-your-vine'
|
|
||||||
IE = FunnyOrDieIE
|
|
||||||
|
|
||||||
def test_allsubtitles(self):
|
|
||||||
self.DL.params['writesubtitles'] = True
|
|
||||||
self.DL.params['allsubtitles'] = True
|
|
||||||
subtitles = self.getSubtitles()
|
|
||||||
self.assertEqual(set(subtitles.keys()), set(['en']))
|
|
||||||
self.assertEqual(md5(subtitles['en']), 'c5593c193eacd353596c11c2d4f9ecc4')
|
|
||||||
|
|
||||||
|
|
||||||
class TestDemocracynowSubtitles(BaseTestSubtitles):
|
class TestDemocracynowSubtitles(BaseTestSubtitles):
|
||||||
url = 'http://www.democracynow.org/shows/2015/7/3'
|
url = 'http://www.democracynow.org/shows/2015/7/3'
|
||||||
IE = DemocracynowIE
|
IE = DemocracynowIE
|
||||||
|
@ -19,6 +19,7 @@ from youtube_dl.utils import (
|
|||||||
age_restricted,
|
age_restricted,
|
||||||
args_to_str,
|
args_to_str,
|
||||||
encode_base_n,
|
encode_base_n,
|
||||||
|
caesar,
|
||||||
clean_html,
|
clean_html,
|
||||||
date_from_str,
|
date_from_str,
|
||||||
DateRange,
|
DateRange,
|
||||||
@ -69,11 +70,13 @@ from youtube_dl.utils import (
|
|||||||
remove_start,
|
remove_start,
|
||||||
remove_end,
|
remove_end,
|
||||||
remove_quotes,
|
remove_quotes,
|
||||||
|
rot47,
|
||||||
shell_quote,
|
shell_quote,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
strip_jsonp,
|
strip_jsonp,
|
||||||
strip_or_none,
|
strip_or_none,
|
||||||
|
subtitles_filename,
|
||||||
timeconvert,
|
timeconvert,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
@ -261,6 +264,11 @@ class TestUtil(unittest.TestCase):
|
|||||||
self.assertEqual(replace_extension('.abc', 'temp'), '.abc.temp')
|
self.assertEqual(replace_extension('.abc', 'temp'), '.abc.temp')
|
||||||
self.assertEqual(replace_extension('.abc.ext', 'temp'), '.abc.temp')
|
self.assertEqual(replace_extension('.abc.ext', 'temp'), '.abc.temp')
|
||||||
|
|
||||||
|
def test_subtitles_filename(self):
|
||||||
|
self.assertEqual(subtitles_filename('abc.ext', 'en', 'vtt'), 'abc.en.vtt')
|
||||||
|
self.assertEqual(subtitles_filename('abc.ext', 'en', 'vtt', 'ext'), 'abc.en.vtt')
|
||||||
|
self.assertEqual(subtitles_filename('abc.unexpected_ext', 'en', 'vtt', 'ext'), 'abc.unexpected_ext.en.vtt')
|
||||||
|
|
||||||
def test_remove_start(self):
|
def test_remove_start(self):
|
||||||
self.assertEqual(remove_start(None, 'A - '), None)
|
self.assertEqual(remove_start(None, 'A - '), None)
|
||||||
self.assertEqual(remove_start('A - B', 'A - '), 'B')
|
self.assertEqual(remove_start('A - B', 'A - '), 'B')
|
||||||
@ -334,6 +342,8 @@ class TestUtil(unittest.TestCase):
|
|||||||
self.assertEqual(unified_strdate('July 15th, 2013'), '20130715')
|
self.assertEqual(unified_strdate('July 15th, 2013'), '20130715')
|
||||||
self.assertEqual(unified_strdate('September 1st, 2013'), '20130901')
|
self.assertEqual(unified_strdate('September 1st, 2013'), '20130901')
|
||||||
self.assertEqual(unified_strdate('Sep 2nd, 2013'), '20130902')
|
self.assertEqual(unified_strdate('Sep 2nd, 2013'), '20130902')
|
||||||
|
self.assertEqual(unified_strdate('November 3rd, 2019'), '20191103')
|
||||||
|
self.assertEqual(unified_strdate('October 23rd, 2005'), '20051023')
|
||||||
|
|
||||||
def test_unified_timestamps(self):
|
def test_unified_timestamps(self):
|
||||||
self.assertEqual(unified_timestamp('December 21, 2010'), 1292889600)
|
self.assertEqual(unified_timestamp('December 21, 2010'), 1292889600)
|
||||||
@ -489,6 +499,12 @@ class TestUtil(unittest.TestCase):
|
|||||||
def test_str_to_int(self):
|
def test_str_to_int(self):
|
||||||
self.assertEqual(str_to_int('123,456'), 123456)
|
self.assertEqual(str_to_int('123,456'), 123456)
|
||||||
self.assertEqual(str_to_int('123.456'), 123456)
|
self.assertEqual(str_to_int('123.456'), 123456)
|
||||||
|
self.assertEqual(str_to_int(523), 523)
|
||||||
|
# Python 3 has no long
|
||||||
|
if sys.version_info < (3, 0):
|
||||||
|
eval('self.assertEqual(str_to_int(123456L), 123456)')
|
||||||
|
self.assertEqual(str_to_int('noninteger'), None)
|
||||||
|
self.assertEqual(str_to_int([]), None)
|
||||||
|
|
||||||
def test_url_basename(self):
|
def test_url_basename(self):
|
||||||
self.assertEqual(url_basename('http://foo.de/'), '')
|
self.assertEqual(url_basename('http://foo.de/'), '')
|
||||||
@ -787,6 +803,8 @@ class TestUtil(unittest.TestCase):
|
|||||||
self.assertEqual(mimetype2ext('text/vtt'), 'vtt')
|
self.assertEqual(mimetype2ext('text/vtt'), 'vtt')
|
||||||
self.assertEqual(mimetype2ext('text/vtt;charset=utf-8'), 'vtt')
|
self.assertEqual(mimetype2ext('text/vtt;charset=utf-8'), 'vtt')
|
||||||
self.assertEqual(mimetype2ext('text/html; charset=utf-8'), 'html')
|
self.assertEqual(mimetype2ext('text/html; charset=utf-8'), 'html')
|
||||||
|
self.assertEqual(mimetype2ext('audio/x-wav'), 'wav')
|
||||||
|
self.assertEqual(mimetype2ext('audio/x-wav;codec=pcm'), 'wav')
|
||||||
|
|
||||||
def test_month_by_name(self):
|
def test_month_by_name(self):
|
||||||
self.assertEqual(month_by_name(None), None)
|
self.assertEqual(month_by_name(None), None)
|
||||||
@ -1361,6 +1379,20 @@ Line 1
|
|||||||
self.assertRaises(ValueError, encode_base_n, 0, 70)
|
self.assertRaises(ValueError, encode_base_n, 0, 70)
|
||||||
self.assertRaises(ValueError, encode_base_n, 0, 60, custom_table)
|
self.assertRaises(ValueError, encode_base_n, 0, 60, custom_table)
|
||||||
|
|
||||||
|
def test_caesar(self):
|
||||||
|
self.assertEqual(caesar('ace', 'abcdef', 2), 'cea')
|
||||||
|
self.assertEqual(caesar('cea', 'abcdef', -2), 'ace')
|
||||||
|
self.assertEqual(caesar('ace', 'abcdef', -2), 'eac')
|
||||||
|
self.assertEqual(caesar('eac', 'abcdef', 2), 'ace')
|
||||||
|
self.assertEqual(caesar('ace', 'abcdef', 0), 'ace')
|
||||||
|
self.assertEqual(caesar('xyz', 'abcdef', 2), 'xyz')
|
||||||
|
self.assertEqual(caesar('abc', 'acegik', 2), 'ebg')
|
||||||
|
self.assertEqual(caesar('ebg', 'acegik', -2), 'abc')
|
||||||
|
|
||||||
|
def test_rot47(self):
|
||||||
|
self.assertEqual(rot47('youtube-dl'), r'J@FEF36\5=')
|
||||||
|
self.assertEqual(rot47('YOUTUBE-DL'), r'*~&%&qt\s{')
|
||||||
|
|
||||||
def test_urshift(self):
|
def test_urshift(self):
|
||||||
self.assertEqual(urshift(3, 1), 1)
|
self.assertEqual(urshift(3, 1), 1)
|
||||||
self.assertEqual(urshift(-3, 1), 2147483646)
|
self.assertEqual(urshift(-3, 1), 2147483646)
|
||||||
|
@ -267,7 +267,7 @@ class TestYoutubeChapters(unittest.TestCase):
|
|||||||
for description, duration, expected_chapters in self._TEST_CASES:
|
for description, duration, expected_chapters in self._TEST_CASES:
|
||||||
ie = YoutubeIE()
|
ie = YoutubeIE()
|
||||||
expect_value(
|
expect_value(
|
||||||
self, ie._extract_chapters(description, duration),
|
self, ie._extract_chapters_from_description(description, duration),
|
||||||
expected_chapters, None)
|
expected_chapters, None)
|
||||||
|
|
||||||
|
|
||||||
|
@ -74,6 +74,28 @@ _TESTS = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
class TestPlayerInfo(unittest.TestCase):
|
||||||
|
def test_youtube_extract_player_info(self):
|
||||||
|
PLAYER_URLS = (
|
||||||
|
('https://www.youtube.com/s/player/64dddad9/player_ias.vflset/en_US/base.js', '64dddad9'),
|
||||||
|
# obsolete
|
||||||
|
('https://www.youtube.com/yts/jsbin/player_ias-vfle4-e03/en_US/base.js', 'vfle4-e03'),
|
||||||
|
('https://www.youtube.com/yts/jsbin/player_ias-vfl49f_g4/en_US/base.js', 'vfl49f_g4'),
|
||||||
|
('https://www.youtube.com/yts/jsbin/player_ias-vflCPQUIL/en_US/base.js', 'vflCPQUIL'),
|
||||||
|
('https://www.youtube.com/yts/jsbin/player-vflzQZbt7/en_US/base.js', 'vflzQZbt7'),
|
||||||
|
('https://www.youtube.com/yts/jsbin/player-en_US-vflaxXRn1/base.js', 'vflaxXRn1'),
|
||||||
|
('https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js', 'vflXGBaUN'),
|
||||||
|
('https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js', 'vflKjOTVq'),
|
||||||
|
('http://s.ytimg.com/yt/swfbin/watch_as3-vflrEm9Nq.swf', 'vflrEm9Nq'),
|
||||||
|
('https://s.ytimg.com/yts/swfbin/player-vflenCdZL/watch_as3.swf', 'vflenCdZL'),
|
||||||
|
)
|
||||||
|
for player_url, expected_player_id in PLAYER_URLS:
|
||||||
|
expected_player_type = player_url.split('.')[-1]
|
||||||
|
player_type, player_id = YoutubeIE._extract_player_info(player_url)
|
||||||
|
self.assertEqual(player_type, expected_player_type)
|
||||||
|
self.assertEqual(player_id, expected_player_id)
|
||||||
|
|
||||||
|
|
||||||
class TestSignature(unittest.TestCase):
|
class TestSignature(unittest.TestCase):
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
9
test/testdata/cookies/malformed_cookies.txt
vendored
Normal file
9
test/testdata/cookies/malformed_cookies.txt
vendored
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
# Netscape HTTP Cookie File
|
||||||
|
# http://curl.haxx.se/rfc/cookie_spec.html
|
||||||
|
# This is a generated file! Do not edit.
|
||||||
|
|
||||||
|
# Cookie file entry with invalid number of fields - 6 instead of 7
|
||||||
|
www.foobar.foobar FALSE / FALSE 0 COOKIE
|
||||||
|
|
||||||
|
# Cookie file entry with invalid expires at
|
||||||
|
www.foobar.foobar FALSE / FALSE 1.7976931348623157e+308 COOKIE VALUE
|
@ -92,6 +92,7 @@ from .utils import (
|
|||||||
YoutubeDLCookieJar,
|
YoutubeDLCookieJar,
|
||||||
YoutubeDLCookieProcessor,
|
YoutubeDLCookieProcessor,
|
||||||
YoutubeDLHandler,
|
YoutubeDLHandler,
|
||||||
|
YoutubeDLRedirectHandler,
|
||||||
)
|
)
|
||||||
from .cache import Cache
|
from .cache import Cache
|
||||||
from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
|
from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
|
||||||
@ -852,8 +853,9 @@ class YoutubeDL(object):
|
|||||||
extract_flat = self.params.get('extract_flat', False)
|
extract_flat = self.params.get('extract_flat', False)
|
||||||
if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
|
if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
|
||||||
or extract_flat is True):
|
or extract_flat is True):
|
||||||
if self.params.get('forcejson', False):
|
self.__forced_printings(
|
||||||
self.to_stdout(json.dumps(ie_result))
|
ie_result, self.prepare_filename(ie_result),
|
||||||
|
incomplete=True)
|
||||||
return ie_result
|
return ie_result
|
||||||
|
|
||||||
if result_type == 'video':
|
if result_type == 'video':
|
||||||
@ -989,7 +991,7 @@ class YoutubeDL(object):
|
|||||||
'playlist_title': ie_result.get('title'),
|
'playlist_title': ie_result.get('title'),
|
||||||
'playlist_uploader': ie_result.get('uploader'),
|
'playlist_uploader': ie_result.get('uploader'),
|
||||||
'playlist_uploader_id': ie_result.get('uploader_id'),
|
'playlist_uploader_id': ie_result.get('uploader_id'),
|
||||||
'playlist_index': i + playliststart,
|
'playlist_index': playlistitems[i - 1] if playlistitems else i + playliststart,
|
||||||
'extractor': ie_result['extractor'],
|
'extractor': ie_result['extractor'],
|
||||||
'webpage_url': ie_result['webpage_url'],
|
'webpage_url': ie_result['webpage_url'],
|
||||||
'webpage_url_basename': url_basename(ie_result['webpage_url']),
|
'webpage_url_basename': url_basename(ie_result['webpage_url']),
|
||||||
@ -1693,6 +1695,36 @@ class YoutubeDL(object):
|
|||||||
subs[lang] = f
|
subs[lang] = f
|
||||||
return subs
|
return subs
|
||||||
|
|
||||||
|
def __forced_printings(self, info_dict, filename, incomplete):
|
||||||
|
def print_mandatory(field):
|
||||||
|
if (self.params.get('force%s' % field, False)
|
||||||
|
and (not incomplete or info_dict.get(field) is not None)):
|
||||||
|
self.to_stdout(info_dict[field])
|
||||||
|
|
||||||
|
def print_optional(field):
|
||||||
|
if (self.params.get('force%s' % field, False)
|
||||||
|
and info_dict.get(field) is not None):
|
||||||
|
self.to_stdout(info_dict[field])
|
||||||
|
|
||||||
|
print_mandatory('title')
|
||||||
|
print_mandatory('id')
|
||||||
|
if self.params.get('forceurl', False) and not incomplete:
|
||||||
|
if info_dict.get('requested_formats') is not None:
|
||||||
|
for f in info_dict['requested_formats']:
|
||||||
|
self.to_stdout(f['url'] + f.get('play_path', ''))
|
||||||
|
else:
|
||||||
|
# For RTMP URLs, also include the playpath
|
||||||
|
self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
|
||||||
|
print_optional('thumbnail')
|
||||||
|
print_optional('description')
|
||||||
|
if self.params.get('forcefilename', False) and filename is not None:
|
||||||
|
self.to_stdout(filename)
|
||||||
|
if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
|
||||||
|
self.to_stdout(formatSeconds(info_dict['duration']))
|
||||||
|
print_mandatory('format')
|
||||||
|
if self.params.get('forcejson', False):
|
||||||
|
self.to_stdout(json.dumps(info_dict))
|
||||||
|
|
||||||
def process_info(self, info_dict):
|
def process_info(self, info_dict):
|
||||||
"""Process a single resolved IE result."""
|
"""Process a single resolved IE result."""
|
||||||
|
|
||||||
@ -1703,9 +1735,8 @@ class YoutubeDL(object):
|
|||||||
if self._num_downloads >= int(max_downloads):
|
if self._num_downloads >= int(max_downloads):
|
||||||
raise MaxDownloadsReached()
|
raise MaxDownloadsReached()
|
||||||
|
|
||||||
|
# TODO: backward compatibility, to be removed
|
||||||
info_dict['fulltitle'] = info_dict['title']
|
info_dict['fulltitle'] = info_dict['title']
|
||||||
if len(info_dict['title']) > 200:
|
|
||||||
info_dict['title'] = info_dict['title'][:197] + '...'
|
|
||||||
|
|
||||||
if 'format' not in info_dict:
|
if 'format' not in info_dict:
|
||||||
info_dict['format'] = info_dict['ext']
|
info_dict['format'] = info_dict['ext']
|
||||||
@ -1720,29 +1751,7 @@ class YoutubeDL(object):
|
|||||||
info_dict['_filename'] = filename = self.prepare_filename(info_dict)
|
info_dict['_filename'] = filename = self.prepare_filename(info_dict)
|
||||||
|
|
||||||
# Forced printings
|
# Forced printings
|
||||||
if self.params.get('forcetitle', False):
|
self.__forced_printings(info_dict, filename, incomplete=False)
|
||||||
self.to_stdout(info_dict['fulltitle'])
|
|
||||||
if self.params.get('forceid', False):
|
|
||||||
self.to_stdout(info_dict['id'])
|
|
||||||
if self.params.get('forceurl', False):
|
|
||||||
if info_dict.get('requested_formats') is not None:
|
|
||||||
for f in info_dict['requested_formats']:
|
|
||||||
self.to_stdout(f['url'] + f.get('play_path', ''))
|
|
||||||
else:
|
|
||||||
# For RTMP URLs, also include the playpath
|
|
||||||
self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
|
|
||||||
if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
|
|
||||||
self.to_stdout(info_dict['thumbnail'])
|
|
||||||
if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
|
|
||||||
self.to_stdout(info_dict['description'])
|
|
||||||
if self.params.get('forcefilename', False) and filename is not None:
|
|
||||||
self.to_stdout(filename)
|
|
||||||
if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
|
|
||||||
self.to_stdout(formatSeconds(info_dict['duration']))
|
|
||||||
if self.params.get('forceformat', False):
|
|
||||||
self.to_stdout(info_dict['format'])
|
|
||||||
if self.params.get('forcejson', False):
|
|
||||||
self.to_stdout(json.dumps(info_dict))
|
|
||||||
|
|
||||||
# Do nothing else if in simulate mode
|
# Do nothing else if in simulate mode
|
||||||
if self.params.get('simulate', False):
|
if self.params.get('simulate', False):
|
||||||
@ -1783,6 +1792,8 @@ class YoutubeDL(object):
|
|||||||
annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
|
annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
|
||||||
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
|
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
|
||||||
self.to_screen('[info] Video annotations are already present')
|
self.to_screen('[info] Video annotations are already present')
|
||||||
|
elif not info_dict.get('annotations'):
|
||||||
|
self.report_warning('There are no annotations to write.')
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
self.to_screen('[info] Writing video annotations to: ' + annofn)
|
self.to_screen('[info] Writing video annotations to: ' + annofn)
|
||||||
@ -1804,7 +1815,7 @@ class YoutubeDL(object):
|
|||||||
ie = self.get_info_extractor(info_dict['extractor_key'])
|
ie = self.get_info_extractor(info_dict['extractor_key'])
|
||||||
for sub_lang, sub_info in subtitles.items():
|
for sub_lang, sub_info in subtitles.items():
|
||||||
sub_format = sub_info['ext']
|
sub_format = sub_info['ext']
|
||||||
sub_filename = subtitles_filename(filename, sub_lang, sub_format)
|
sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
|
||||||
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
|
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
|
||||||
self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
|
self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
|
||||||
else:
|
else:
|
||||||
@ -2333,6 +2344,7 @@ class YoutubeDL(object):
|
|||||||
debuglevel = 1 if self.params.get('debug_printtraffic') else 0
|
debuglevel = 1 if self.params.get('debug_printtraffic') else 0
|
||||||
https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
|
https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
|
||||||
ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
|
ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
|
||||||
|
redirect_handler = YoutubeDLRedirectHandler()
|
||||||
data_handler = compat_urllib_request_DataHandler()
|
data_handler = compat_urllib_request_DataHandler()
|
||||||
|
|
||||||
# When passing our own FileHandler instance, build_opener won't add the
|
# When passing our own FileHandler instance, build_opener won't add the
|
||||||
@ -2346,7 +2358,7 @@ class YoutubeDL(object):
|
|||||||
file_handler.file_open = file_open
|
file_handler.file_open = file_open
|
||||||
|
|
||||||
opener = compat_urllib_request.build_opener(
|
opener = compat_urllib_request.build_opener(
|
||||||
proxy_handler, https_handler, cookie_processor, ydlh, data_handler, file_handler)
|
proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
|
||||||
|
|
||||||
# Delete the default user-agent header, which would otherwise apply in
|
# Delete the default user-agent header, which would otherwise apply in
|
||||||
# cases where our custom HTTP handler doesn't come into play
|
# cases where our custom HTTP handler doesn't come into play
|
||||||
|
@ -94,7 +94,7 @@ def _real_main(argv=None):
|
|||||||
if opts.verbose:
|
if opts.verbose:
|
||||||
write_string('[debug] Batch file urls: ' + repr(batch_urls) + '\n')
|
write_string('[debug] Batch file urls: ' + repr(batch_urls) + '\n')
|
||||||
except IOError:
|
except IOError:
|
||||||
sys.exit('ERROR: batch file could not be read')
|
sys.exit('ERROR: batch file %s could not be read' % opts.batchfile)
|
||||||
all_urls = batch_urls + [url.strip() for url in args] # batch_urls are already striped in read_batch_urls
|
all_urls = batch_urls + [url.strip() for url in args] # batch_urls are already striped in read_batch_urls
|
||||||
_enc = preferredencoding()
|
_enc = preferredencoding()
|
||||||
all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls]
|
all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls]
|
||||||
|
@ -57,6 +57,17 @@ try:
|
|||||||
except ImportError: # Python 2
|
except ImportError: # Python 2
|
||||||
import cookielib as compat_cookiejar
|
import cookielib as compat_cookiejar
|
||||||
|
|
||||||
|
if sys.version_info[0] == 2:
|
||||||
|
class compat_cookiejar_Cookie(compat_cookiejar.Cookie):
|
||||||
|
def __init__(self, version, name, value, *args, **kwargs):
|
||||||
|
if isinstance(name, compat_str):
|
||||||
|
name = name.encode()
|
||||||
|
if isinstance(value, compat_str):
|
||||||
|
value = value.encode()
|
||||||
|
compat_cookiejar.Cookie.__init__(self, version, name, value, *args, **kwargs)
|
||||||
|
else:
|
||||||
|
compat_cookiejar_Cookie = compat_cookiejar.Cookie
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import http.cookies as compat_cookies
|
import http.cookies as compat_cookies
|
||||||
except ImportError: # Python 2
|
except ImportError: # Python 2
|
||||||
@ -2754,6 +2765,17 @@ else:
|
|||||||
compat_expanduser = os.path.expanduser
|
compat_expanduser = os.path.expanduser
|
||||||
|
|
||||||
|
|
||||||
|
if compat_os_name == 'nt' and sys.version_info < (3, 8):
|
||||||
|
# os.path.realpath on Windows does not follow symbolic links
|
||||||
|
# prior to Python 3.8 (see https://bugs.python.org/issue9949)
|
||||||
|
def compat_realpath(path):
|
||||||
|
while os.path.islink(path):
|
||||||
|
path = os.path.abspath(os.readlink(path))
|
||||||
|
return path
|
||||||
|
else:
|
||||||
|
compat_realpath = os.path.realpath
|
||||||
|
|
||||||
|
|
||||||
if sys.version_info < (3, 0):
|
if sys.version_info < (3, 0):
|
||||||
def compat_print(s):
|
def compat_print(s):
|
||||||
from .utils import preferredencoding
|
from .utils import preferredencoding
|
||||||
@ -2976,6 +2998,7 @@ __all__ = [
|
|||||||
'compat_basestring',
|
'compat_basestring',
|
||||||
'compat_chr',
|
'compat_chr',
|
||||||
'compat_cookiejar',
|
'compat_cookiejar',
|
||||||
|
'compat_cookiejar_Cookie',
|
||||||
'compat_cookies',
|
'compat_cookies',
|
||||||
'compat_ctypes_WINFUNCTYPE',
|
'compat_ctypes_WINFUNCTYPE',
|
||||||
'compat_etree_Element',
|
'compat_etree_Element',
|
||||||
@ -2998,6 +3021,7 @@ __all__ = [
|
|||||||
'compat_os_name',
|
'compat_os_name',
|
||||||
'compat_parse_qs',
|
'compat_parse_qs',
|
||||||
'compat_print',
|
'compat_print',
|
||||||
|
'compat_realpath',
|
||||||
'compat_setenv',
|
'compat_setenv',
|
||||||
'compat_shlex_quote',
|
'compat_shlex_quote',
|
||||||
'compat_shlex_split',
|
'compat_shlex_split',
|
||||||
|
@ -53,7 +53,7 @@ class DashSegmentsFD(FragmentFD):
|
|||||||
except compat_urllib_error.HTTPError as err:
|
except compat_urllib_error.HTTPError as err:
|
||||||
# YouTube may often return 404 HTTP error for a fragment causing the
|
# YouTube may often return 404 HTTP error for a fragment causing the
|
||||||
# whole download to fail. However if the same fragment is immediately
|
# whole download to fail. However if the same fragment is immediately
|
||||||
# retried with the same request data this usually succeeds (1-2 attemps
|
# retried with the same request data this usually succeeds (1-2 attempts
|
||||||
# is usually enough) thus allowing to download the whole file successfully.
|
# is usually enough) thus allowing to download the whole file successfully.
|
||||||
# To be future-proof we will retry all fragments that fail with any
|
# To be future-proof we will retry all fragments that fail with any
|
||||||
# HTTP error.
|
# HTTP error.
|
||||||
|
@ -194,6 +194,7 @@ class Aria2cFD(ExternalFD):
|
|||||||
cmd += self._option('--interface', 'source_address')
|
cmd += self._option('--interface', 'source_address')
|
||||||
cmd += self._option('--all-proxy', 'proxy')
|
cmd += self._option('--all-proxy', 'proxy')
|
||||||
cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=')
|
cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=')
|
||||||
|
cmd += self._bool_option('--remote-time', 'updatetime', 'true', 'false', '=')
|
||||||
cmd += ['--', info_dict['url']]
|
cmd += ['--', info_dict['url']]
|
||||||
return cmd
|
return cmd
|
||||||
|
|
||||||
|
@ -190,12 +190,13 @@ class FragmentFD(FileDownloader):
|
|||||||
})
|
})
|
||||||
|
|
||||||
def _start_frag_download(self, ctx):
|
def _start_frag_download(self, ctx):
|
||||||
|
resume_len = ctx['complete_frags_downloaded_bytes']
|
||||||
total_frags = ctx['total_frags']
|
total_frags = ctx['total_frags']
|
||||||
# This dict stores the download progress, it's updated by the progress
|
# This dict stores the download progress, it's updated by the progress
|
||||||
# hook
|
# hook
|
||||||
state = {
|
state = {
|
||||||
'status': 'downloading',
|
'status': 'downloading',
|
||||||
'downloaded_bytes': ctx['complete_frags_downloaded_bytes'],
|
'downloaded_bytes': resume_len,
|
||||||
'fragment_index': ctx['fragment_index'],
|
'fragment_index': ctx['fragment_index'],
|
||||||
'fragment_count': total_frags,
|
'fragment_count': total_frags,
|
||||||
'filename': ctx['filename'],
|
'filename': ctx['filename'],
|
||||||
@ -234,8 +235,8 @@ class FragmentFD(FileDownloader):
|
|||||||
state['downloaded_bytes'] += frag_downloaded_bytes - ctx['prev_frag_downloaded_bytes']
|
state['downloaded_bytes'] += frag_downloaded_bytes - ctx['prev_frag_downloaded_bytes']
|
||||||
if not ctx['live']:
|
if not ctx['live']:
|
||||||
state['eta'] = self.calc_eta(
|
state['eta'] = self.calc_eta(
|
||||||
start, time_now, estimated_size,
|
start, time_now, estimated_size - resume_len,
|
||||||
state['downloaded_bytes'])
|
state['downloaded_bytes'] - resume_len)
|
||||||
state['speed'] = s.get('speed') or ctx.get('speed')
|
state['speed'] = s.get('speed') or ctx.get('speed')
|
||||||
ctx['speed'] = state['speed']
|
ctx['speed'] = state['speed']
|
||||||
ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes
|
ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes
|
||||||
|
@ -64,7 +64,7 @@ class HlsFD(FragmentFD):
|
|||||||
s = urlh.read().decode('utf-8', 'ignore')
|
s = urlh.read().decode('utf-8', 'ignore')
|
||||||
|
|
||||||
if not self.can_download(s, info_dict):
|
if not self.can_download(s, info_dict):
|
||||||
if info_dict.get('extra_param_to_segment_url'):
|
if info_dict.get('extra_param_to_segment_url') or info_dict.get('_decryption_key_url'):
|
||||||
self.report_error('pycrypto not found. Please install it.')
|
self.report_error('pycrypto not found. Please install it.')
|
||||||
return False
|
return False
|
||||||
self.report_warning(
|
self.report_warning(
|
||||||
@ -141,7 +141,7 @@ class HlsFD(FragmentFD):
|
|||||||
count = 0
|
count = 0
|
||||||
headers = info_dict.get('http_headers', {})
|
headers = info_dict.get('http_headers', {})
|
||||||
if byte_range:
|
if byte_range:
|
||||||
headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'])
|
headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'] - 1)
|
||||||
while count <= fragment_retries:
|
while count <= fragment_retries:
|
||||||
try:
|
try:
|
||||||
success, frag_content = self._download_fragment(
|
success, frag_content = self._download_fragment(
|
||||||
@ -169,7 +169,7 @@ class HlsFD(FragmentFD):
|
|||||||
if decrypt_info['METHOD'] == 'AES-128':
|
if decrypt_info['METHOD'] == 'AES-128':
|
||||||
iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence)
|
iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence)
|
||||||
decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen(
|
decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen(
|
||||||
self._prepare_url(info_dict, decrypt_info['URI'])).read()
|
self._prepare_url(info_dict, info_dict.get('_decryption_key_url') or decrypt_info['URI'])).read()
|
||||||
frag_content = AES.new(
|
frag_content = AES.new(
|
||||||
decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content)
|
decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content)
|
||||||
self._append_fragment(ctx, frag_content)
|
self._append_fragment(ctx, frag_content)
|
||||||
|
@ -106,7 +106,12 @@ class HttpFD(FileDownloader):
|
|||||||
set_range(request, range_start, range_end)
|
set_range(request, range_start, range_end)
|
||||||
# Establish connection
|
# Establish connection
|
||||||
try:
|
try:
|
||||||
ctx.data = self.ydl.urlopen(request)
|
try:
|
||||||
|
ctx.data = self.ydl.urlopen(request)
|
||||||
|
except (compat_urllib_error.URLError, ) as err:
|
||||||
|
if isinstance(err.reason, socket.timeout):
|
||||||
|
raise RetryDownload(err)
|
||||||
|
raise err
|
||||||
# When trying to resume, Content-Range HTTP header of response has to be checked
|
# When trying to resume, Content-Range HTTP header of response has to be checked
|
||||||
# to match the value of requested Range HTTP header. This is due to a webservers
|
# to match the value of requested Range HTTP header. This is due to a webservers
|
||||||
# that don't support resuming and serve a whole file with no Content-Range
|
# that don't support resuming and serve a whole file with no Content-Range
|
||||||
@ -218,24 +223,27 @@ class HttpFD(FileDownloader):
|
|||||||
|
|
||||||
def retry(e):
|
def retry(e):
|
||||||
to_stdout = ctx.tmpfilename == '-'
|
to_stdout = ctx.tmpfilename == '-'
|
||||||
if not to_stdout:
|
if ctx.stream is not None:
|
||||||
ctx.stream.close()
|
if not to_stdout:
|
||||||
ctx.stream = None
|
ctx.stream.close()
|
||||||
|
ctx.stream = None
|
||||||
ctx.resume_len = byte_counter if to_stdout else os.path.getsize(encodeFilename(ctx.tmpfilename))
|
ctx.resume_len = byte_counter if to_stdout else os.path.getsize(encodeFilename(ctx.tmpfilename))
|
||||||
raise RetryDownload(e)
|
raise RetryDownload(e)
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
# Download and write
|
# Download and write
|
||||||
data_block = ctx.data.read(block_size if not is_test else min(block_size, data_len - byte_counter))
|
data_block = ctx.data.read(block_size if data_len is None else min(block_size, data_len - byte_counter))
|
||||||
# socket.timeout is a subclass of socket.error but may not have
|
# socket.timeout is a subclass of socket.error but may not have
|
||||||
# errno set
|
# errno set
|
||||||
except socket.timeout as e:
|
except socket.timeout as e:
|
||||||
retry(e)
|
retry(e)
|
||||||
except socket.error as e:
|
except socket.error as e:
|
||||||
if e.errno not in (errno.ECONNRESET, errno.ETIMEDOUT):
|
# SSLError on python 2 (inherits socket.error) may have
|
||||||
raise
|
# no errno set but this error message
|
||||||
retry(e)
|
if e.errno in (errno.ECONNRESET, errno.ETIMEDOUT) or getattr(e, 'message', None) == 'The read operation timed out':
|
||||||
|
retry(e)
|
||||||
|
raise
|
||||||
|
|
||||||
byte_counter += len(data_block)
|
byte_counter += len(data_block)
|
||||||
|
|
||||||
@ -299,7 +307,7 @@ class HttpFD(FileDownloader):
|
|||||||
'elapsed': now - ctx.start_time,
|
'elapsed': now - ctx.start_time,
|
||||||
})
|
})
|
||||||
|
|
||||||
if is_test and byte_counter == data_len:
|
if data_len is not None and byte_counter == data_len:
|
||||||
break
|
break
|
||||||
|
|
||||||
if not is_test and ctx.chunk_size and ctx.data_len is not None and byte_counter < ctx.data_len:
|
if not is_test and ctx.chunk_size and ctx.data_len is not None and byte_counter < ctx.data_len:
|
||||||
|
@ -146,7 +146,7 @@ def write_piff_header(stream, params):
|
|||||||
sps, pps = codec_private_data.split(u32.pack(1))[1:]
|
sps, pps = codec_private_data.split(u32.pack(1))[1:]
|
||||||
avcc_payload = u8.pack(1) # configuration version
|
avcc_payload = u8.pack(1) # configuration version
|
||||||
avcc_payload += sps[1:4] # avc profile indication + profile compatibility + avc level indication
|
avcc_payload += sps[1:4] # avc profile indication + profile compatibility + avc level indication
|
||||||
avcc_payload += u8.pack(0xfc | (params.get('nal_unit_length_field', 4) - 1)) # complete represenation (1) + reserved (11111) + length size minus one
|
avcc_payload += u8.pack(0xfc | (params.get('nal_unit_length_field', 4) - 1)) # complete representation (1) + reserved (11111) + length size minus one
|
||||||
avcc_payload += u8.pack(1) # reserved (0) + number of sps (0000001)
|
avcc_payload += u8.pack(1) # reserved (0) + number of sps (0000001)
|
||||||
avcc_payload += u16.pack(len(sps))
|
avcc_payload += u16.pack(len(sps))
|
||||||
avcc_payload += sps
|
avcc_payload += sps
|
||||||
|
@ -110,17 +110,17 @@ class ABCIViewIE(InfoExtractor):
|
|||||||
|
|
||||||
# ABC iview programs are normally available for 14 days only.
|
# ABC iview programs are normally available for 14 days only.
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://iview.abc.net.au/show/ben-and-hollys-little-kingdom/series/0/video/ZX9371A050S00',
|
'url': 'https://iview.abc.net.au/show/gruen/series/11/video/LE1927H001S00',
|
||||||
'md5': 'cde42d728b3b7c2b32b1b94b4a548afc',
|
'md5': '67715ce3c78426b11ba167d875ac6abf',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'ZX9371A050S00',
|
'id': 'LE1927H001S00',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': "Gaston's Birthday",
|
'title': "Series 11 Ep 1",
|
||||||
'series': "Ben And Holly's Little Kingdom",
|
'series': "Gruen",
|
||||||
'description': 'md5:f9de914d02f226968f598ac76f105bcf',
|
'description': 'md5:52cc744ad35045baf6aded2ce7287f67',
|
||||||
'upload_date': '20180604',
|
'upload_date': '20190925',
|
||||||
'uploader_id': 'abc4kids',
|
'uploader_id': 'abc1',
|
||||||
'timestamp': 1528140219,
|
'timestamp': 1569445289,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
@ -148,7 +148,7 @@ class ABCIViewIE(InfoExtractor):
|
|||||||
'hdnea': token,
|
'hdnea': token,
|
||||||
})
|
})
|
||||||
|
|
||||||
for sd in ('sd', 'sd-low'):
|
for sd in ('720', 'sd', 'sd-low'):
|
||||||
sd_url = try_get(
|
sd_url = try_get(
|
||||||
stream, lambda x: x['streams']['hls'][sd], compat_str)
|
stream, lambda x: x['streams']['hls'][sd], compat_str)
|
||||||
if not sd_url:
|
if not sd_url:
|
||||||
|
@ -15,10 +15,13 @@ class AbcNewsVideoIE(AMPIE):
|
|||||||
IE_NAME = 'abcnews:video'
|
IE_NAME = 'abcnews:video'
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://
|
https?://
|
||||||
abcnews\.go\.com/
|
|
||||||
(?:
|
(?:
|
||||||
[^/]+/video/(?P<display_id>[0-9a-z-]+)-|
|
abcnews\.go\.com/
|
||||||
video/embed\?.*?\bid=
|
(?:
|
||||||
|
[^/]+/video/(?P<display_id>[0-9a-z-]+)-|
|
||||||
|
video/embed\?.*?\bid=
|
||||||
|
)|
|
||||||
|
fivethirtyeight\.abcnews\.go\.com/video/embed/\d+/
|
||||||
)
|
)
|
||||||
(?P<id>\d+)
|
(?P<id>\d+)
|
||||||
'''
|
'''
|
||||||
|
@ -4,29 +4,30 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
dict_get,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_iso8601,
|
try_get,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class ABCOTVSIE(InfoExtractor):
|
class ABCOTVSIE(InfoExtractor):
|
||||||
IE_NAME = 'abcotvs'
|
IE_NAME = 'abcotvs'
|
||||||
IE_DESC = 'ABC Owned Television Stations'
|
IE_DESC = 'ABC Owned Television Stations'
|
||||||
_VALID_URL = r'https?://(?:abc(?:7(?:news|ny|chicago)?|11|13|30)|6abc)\.com(?:/[^/]+/(?P<display_id>[^/]+))?/(?P<id>\d+)'
|
_VALID_URL = r'https?://(?P<site>abc(?:7(?:news|ny|chicago)?|11|13|30)|6abc)\.com(?:(?:/[^/]+)*/(?P<display_id>[^/]+))?/(?P<id>\d+)'
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://abc7news.com/entertainment/east-bay-museum-celebrates-vintage-synthesizers/472581/',
|
'url': 'http://abc7news.com/entertainment/east-bay-museum-celebrates-vintage-synthesizers/472581/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '472581',
|
'id': '472548',
|
||||||
'display_id': 'east-bay-museum-celebrates-vintage-synthesizers',
|
'display_id': 'east-bay-museum-celebrates-vintage-synthesizers',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'East Bay museum celebrates vintage synthesizers',
|
'title': 'East Bay museum celebrates synthesized music',
|
||||||
'description': 'md5:24ed2bd527096ec2a5c67b9d5a9005f3',
|
'description': 'md5:24ed2bd527096ec2a5c67b9d5a9005f3',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
'timestamp': 1421123075,
|
'timestamp': 1421118520,
|
||||||
'upload_date': '20150113',
|
'upload_date': '20150113',
|
||||||
'uploader': 'Jonathan Bloom',
|
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
# m3u8 download
|
# m3u8 download
|
||||||
@ -37,39 +38,63 @@ class ABCOTVSIE(InfoExtractor):
|
|||||||
'url': 'http://abc7news.com/472581',
|
'url': 'http://abc7news.com/472581',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
'url': 'https://6abc.com/man-75-killed-after-being-struck-by-vehicle-in-chester/5725182/',
|
||||||
|
'only_matching': True,
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
_SITE_MAP = {
|
||||||
|
'6abc': 'wpvi',
|
||||||
|
'abc11': 'wtvd',
|
||||||
|
'abc13': 'ktrk',
|
||||||
|
'abc30': 'kfsn',
|
||||||
|
'abc7': 'kabc',
|
||||||
|
'abc7chicago': 'wls',
|
||||||
|
'abc7news': 'kgo',
|
||||||
|
'abc7ny': 'wabc',
|
||||||
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
site, display_id, video_id = re.match(self._VALID_URL, url).groups()
|
||||||
video_id = mobj.group('id')
|
display_id = display_id or video_id
|
||||||
display_id = mobj.group('display_id') or video_id
|
station = self._SITE_MAP[site]
|
||||||
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
data = self._download_json(
|
||||||
|
'https://api.abcotvs.com/v2/content', display_id, query={
|
||||||
|
'id': video_id,
|
||||||
|
'key': 'otv.web.%s.story' % station,
|
||||||
|
'station': station,
|
||||||
|
})['data']
|
||||||
|
video = try_get(data, lambda x: x['featuredMedia']['video'], dict) or data
|
||||||
|
video_id = compat_str(dict_get(video, ('id', 'publishedKey'), video_id))
|
||||||
|
title = video.get('title') or video['linkText']
|
||||||
|
|
||||||
m3u8 = self._html_search_meta(
|
formats = []
|
||||||
'contentURL', webpage, 'm3u8 url', fatal=True).split('?')[0]
|
m3u8_url = video.get('m3u8')
|
||||||
|
if m3u8_url:
|
||||||
formats = self._extract_m3u8_formats(m3u8, display_id, 'mp4')
|
formats = self._extract_m3u8_formats(
|
||||||
|
video['m3u8'].split('?')[0], display_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||||
|
mp4_url = video.get('mp4')
|
||||||
|
if mp4_url:
|
||||||
|
formats.append({
|
||||||
|
'abr': 128,
|
||||||
|
'format_id': 'https',
|
||||||
|
'height': 360,
|
||||||
|
'url': mp4_url,
|
||||||
|
'width': 640,
|
||||||
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
title = self._og_search_title(webpage).strip()
|
image = video.get('image') or {}
|
||||||
description = self._og_search_description(webpage).strip()
|
|
||||||
thumbnail = self._og_search_thumbnail(webpage)
|
|
||||||
timestamp = parse_iso8601(self._search_regex(
|
|
||||||
r'<div class="meta">\s*<time class="timeago" datetime="([^"]+)">',
|
|
||||||
webpage, 'upload date', fatal=False))
|
|
||||||
uploader = self._search_regex(
|
|
||||||
r'rel="author">([^<]+)</a>',
|
|
||||||
webpage, 'uploader', default=None)
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': description,
|
'description': dict_get(video, ('description', 'caption'), try_get(video, lambda x: x['meta']['description'])),
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': dict_get(image, ('source', 'dynamicSource')),
|
||||||
'timestamp': timestamp,
|
'timestamp': int_or_none(video.get('date')),
|
||||||
'uploader': uploader,
|
'duration': int_or_none(video.get('length')),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,95 +0,0 @@
|
|||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..compat import (
|
|
||||||
compat_HTTPError,
|
|
||||||
compat_str,
|
|
||||||
compat_urllib_parse_urlencode,
|
|
||||||
compat_urllib_parse_urlparse,
|
|
||||||
)
|
|
||||||
from ..utils import (
|
|
||||||
ExtractorError,
|
|
||||||
qualities,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class AddAnimeIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://(?:\w+\.)?add-anime\.net/(?:watch_video\.php\?(?:.*?)v=|video/)(?P<id>[\w_]+)'
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9',
|
|
||||||
'md5': '72954ea10bc979ab5e2eb288b21425a0',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '24MR3YO5SAS9',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'description': 'One Piece 606',
|
|
||||||
'title': 'One Piece 606',
|
|
||||||
},
|
|
||||||
'skip': 'Video is gone',
|
|
||||||
}, {
|
|
||||||
'url': 'http://add-anime.net/video/MDUGWYKNGBD8/One-Piece-687',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url)
|
|
||||||
|
|
||||||
try:
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
except ExtractorError as ee:
|
|
||||||
if not isinstance(ee.cause, compat_HTTPError) or \
|
|
||||||
ee.cause.code != 503:
|
|
||||||
raise
|
|
||||||
|
|
||||||
redir_webpage = ee.cause.read().decode('utf-8')
|
|
||||||
action = self._search_regex(
|
|
||||||
r'<form id="challenge-form" action="([^"]+)"',
|
|
||||||
redir_webpage, 'Redirect form')
|
|
||||||
vc = self._search_regex(
|
|
||||||
r'<input type="hidden" name="jschl_vc" value="([^"]+)"/>',
|
|
||||||
redir_webpage, 'redirect vc value')
|
|
||||||
av = re.search(
|
|
||||||
r'a\.value = ([0-9]+)[+]([0-9]+)[*]([0-9]+);',
|
|
||||||
redir_webpage)
|
|
||||||
if av is None:
|
|
||||||
raise ExtractorError('Cannot find redirect math task')
|
|
||||||
av_res = int(av.group(1)) + int(av.group(2)) * int(av.group(3))
|
|
||||||
|
|
||||||
parsed_url = compat_urllib_parse_urlparse(url)
|
|
||||||
av_val = av_res + len(parsed_url.netloc)
|
|
||||||
confirm_url = (
|
|
||||||
parsed_url.scheme + '://' + parsed_url.netloc
|
|
||||||
+ action + '?'
|
|
||||||
+ compat_urllib_parse_urlencode({
|
|
||||||
'jschl_vc': vc, 'jschl_answer': compat_str(av_val)}))
|
|
||||||
self._download_webpage(
|
|
||||||
confirm_url, video_id,
|
|
||||||
note='Confirming after redirect')
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
|
|
||||||
FORMATS = ('normal', 'hq')
|
|
||||||
quality = qualities(FORMATS)
|
|
||||||
formats = []
|
|
||||||
for format_id in FORMATS:
|
|
||||||
rex = r"var %s_video_file = '(.*?)';" % re.escape(format_id)
|
|
||||||
video_url = self._search_regex(rex, webpage, 'video file URLx',
|
|
||||||
fatal=False)
|
|
||||||
if not video_url:
|
|
||||||
continue
|
|
||||||
formats.append({
|
|
||||||
'format_id': format_id,
|
|
||||||
'url': video_url,
|
|
||||||
'quality': quality(format_id),
|
|
||||||
})
|
|
||||||
self._sort_formats(formats)
|
|
||||||
video_title = self._og_search_title(webpage)
|
|
||||||
video_description = self._og_search_description(webpage)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'_type': 'video',
|
|
||||||
'id': video_id,
|
|
||||||
'formats': formats,
|
|
||||||
'title': video_title,
|
|
||||||
'description': video_description
|
|
||||||
}
|
|
@ -1,25 +1,119 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import functools
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_str
|
from ..compat import compat_str
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
parse_duration,
|
|
||||||
unified_strdate,
|
|
||||||
str_to_int,
|
|
||||||
int_or_none,
|
|
||||||
float_or_none,
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
ISO639Utils,
|
ISO639Utils,
|
||||||
determine_ext,
|
OnDemandPagedList,
|
||||||
|
parse_duration,
|
||||||
|
str_or_none,
|
||||||
|
str_to_int,
|
||||||
|
unified_strdate,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class AdobeTVBaseIE(InfoExtractor):
|
class AdobeTVBaseIE(InfoExtractor):
|
||||||
_API_BASE_URL = 'http://tv.adobe.com/api/v4/'
|
def _call_api(self, path, video_id, query, note=None):
|
||||||
|
return self._download_json(
|
||||||
|
'http://tv.adobe.com/api/v4/' + path,
|
||||||
|
video_id, note, query=query)['data']
|
||||||
|
|
||||||
|
def _parse_subtitles(self, video_data, url_key):
|
||||||
|
subtitles = {}
|
||||||
|
for translation in video_data.get('translations', []):
|
||||||
|
vtt_path = translation.get(url_key)
|
||||||
|
if not vtt_path:
|
||||||
|
continue
|
||||||
|
lang = translation.get('language_w3c') or ISO639Utils.long2short(translation['language_medium'])
|
||||||
|
subtitles.setdefault(lang, []).append({
|
||||||
|
'ext': 'vtt',
|
||||||
|
'url': vtt_path,
|
||||||
|
})
|
||||||
|
return subtitles
|
||||||
|
|
||||||
|
def _parse_video_data(self, video_data):
|
||||||
|
video_id = compat_str(video_data['id'])
|
||||||
|
title = video_data['title']
|
||||||
|
|
||||||
|
s3_extracted = False
|
||||||
|
formats = []
|
||||||
|
for source in video_data.get('videos', []):
|
||||||
|
source_url = source.get('url')
|
||||||
|
if not source_url:
|
||||||
|
continue
|
||||||
|
f = {
|
||||||
|
'format_id': source.get('quality_level'),
|
||||||
|
'fps': int_or_none(source.get('frame_rate')),
|
||||||
|
'height': int_or_none(source.get('height')),
|
||||||
|
'tbr': int_or_none(source.get('video_data_rate')),
|
||||||
|
'width': int_or_none(source.get('width')),
|
||||||
|
'url': source_url,
|
||||||
|
}
|
||||||
|
original_filename = source.get('original_filename')
|
||||||
|
if original_filename:
|
||||||
|
if not (f.get('height') and f.get('width')):
|
||||||
|
mobj = re.search(r'_(\d+)x(\d+)', original_filename)
|
||||||
|
if mobj:
|
||||||
|
f.update({
|
||||||
|
'height': int(mobj.group(2)),
|
||||||
|
'width': int(mobj.group(1)),
|
||||||
|
})
|
||||||
|
if original_filename.startswith('s3://') and not s3_extracted:
|
||||||
|
formats.append({
|
||||||
|
'format_id': 'original',
|
||||||
|
'preference': 1,
|
||||||
|
'url': original_filename.replace('s3://', 'https://s3.amazonaws.com/'),
|
||||||
|
})
|
||||||
|
s3_extracted = True
|
||||||
|
formats.append(f)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': video_data.get('description'),
|
||||||
|
'thumbnail': video_data.get('thumbnail'),
|
||||||
|
'upload_date': unified_strdate(video_data.get('start_date')),
|
||||||
|
'duration': parse_duration(video_data.get('duration')),
|
||||||
|
'view_count': str_to_int(video_data.get('playcount')),
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': self._parse_subtitles(video_data, 'vtt'),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class AdobeTVEmbedIE(AdobeTVBaseIE):
|
||||||
|
IE_NAME = 'adobetv:embed'
|
||||||
|
_VALID_URL = r'https?://tv\.adobe\.com/embed/\d+/(?P<id>\d+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://tv.adobe.com/embed/22/4153',
|
||||||
|
'md5': 'c8c0461bf04d54574fc2b4d07ac6783a',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '4153',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Creating Graphics Optimized for BlackBerry',
|
||||||
|
'description': 'md5:eac6e8dced38bdaae51cd94447927459',
|
||||||
|
'thumbnail': r're:https?://.*\.jpg$',
|
||||||
|
'upload_date': '20091109',
|
||||||
|
'duration': 377,
|
||||||
|
'view_count': int,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
video_data = self._call_api(
|
||||||
|
'episode/' + video_id, video_id, {'disclosure': 'standard'})[0]
|
||||||
|
return self._parse_video_data(video_data)
|
||||||
|
|
||||||
|
|
||||||
class AdobeTVIE(AdobeTVBaseIE):
|
class AdobeTVIE(AdobeTVBaseIE):
|
||||||
|
IE_NAME = 'adobetv'
|
||||||
_VALID_URL = r'https?://tv\.adobe\.com/(?:(?P<language>fr|de|es|jp)/)?watch/(?P<show_urlname>[^/]+)/(?P<id>[^/]+)'
|
_VALID_URL = r'https?://tv\.adobe\.com/(?:(?P<language>fr|de|es|jp)/)?watch/(?P<show_urlname>[^/]+)/(?P<id>[^/]+)'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
@ -42,45 +136,33 @@ class AdobeTVIE(AdobeTVBaseIE):
|
|||||||
if not language:
|
if not language:
|
||||||
language = 'en'
|
language = 'en'
|
||||||
|
|
||||||
video_data = self._download_json(
|
video_data = self._call_api(
|
||||||
self._API_BASE_URL + 'episode/get/?language=%s&show_urlname=%s&urlname=%s&disclosure=standard' % (language, show_urlname, urlname),
|
'episode/get', urlname, {
|
||||||
urlname)['data'][0]
|
'disclosure': 'standard',
|
||||||
|
'language': language,
|
||||||
formats = [{
|
'show_urlname': show_urlname,
|
||||||
'url': source['url'],
|
'urlname': urlname,
|
||||||
'format_id': source.get('quality_level') or source['url'].split('-')[-1].split('.')[0] or None,
|
})[0]
|
||||||
'width': int_or_none(source.get('width')),
|
return self._parse_video_data(video_data)
|
||||||
'height': int_or_none(source.get('height')),
|
|
||||||
'tbr': int_or_none(source.get('video_data_rate')),
|
|
||||||
} for source in video_data['videos']]
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': compat_str(video_data['id']),
|
|
||||||
'title': video_data['title'],
|
|
||||||
'description': video_data.get('description'),
|
|
||||||
'thumbnail': video_data.get('thumbnail'),
|
|
||||||
'upload_date': unified_strdate(video_data.get('start_date')),
|
|
||||||
'duration': parse_duration(video_data.get('duration')),
|
|
||||||
'view_count': str_to_int(video_data.get('playcount')),
|
|
||||||
'formats': formats,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class AdobeTVPlaylistBaseIE(AdobeTVBaseIE):
|
class AdobeTVPlaylistBaseIE(AdobeTVBaseIE):
|
||||||
def _parse_page_data(self, page_data):
|
_PAGE_SIZE = 25
|
||||||
return [self.url_result(self._get_element_url(element_data)) for element_data in page_data]
|
|
||||||
|
|
||||||
def _extract_playlist_entries(self, url, display_id):
|
def _fetch_page(self, display_id, query, page):
|
||||||
page = self._download_json(url, display_id)
|
page += 1
|
||||||
entries = self._parse_page_data(page['data'])
|
query['page'] = page
|
||||||
for page_num in range(2, page['paging']['pages'] + 1):
|
for element_data in self._call_api(
|
||||||
entries.extend(self._parse_page_data(
|
self._RESOURCE, display_id, query, 'Download Page %d' % page):
|
||||||
self._download_json(url + '&page=%d' % page_num, display_id)['data']))
|
yield self._process_data(element_data)
|
||||||
return entries
|
|
||||||
|
def _extract_playlist_entries(self, display_id, query):
|
||||||
|
return OnDemandPagedList(functools.partial(
|
||||||
|
self._fetch_page, display_id, query), self._PAGE_SIZE)
|
||||||
|
|
||||||
|
|
||||||
class AdobeTVShowIE(AdobeTVPlaylistBaseIE):
|
class AdobeTVShowIE(AdobeTVPlaylistBaseIE):
|
||||||
|
IE_NAME = 'adobetv:show'
|
||||||
_VALID_URL = r'https?://tv\.adobe\.com/(?:(?P<language>fr|de|es|jp)/)?show/(?P<id>[^/]+)'
|
_VALID_URL = r'https?://tv\.adobe\.com/(?:(?P<language>fr|de|es|jp)/)?show/(?P<id>[^/]+)'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
@ -92,26 +174,31 @@ class AdobeTVShowIE(AdobeTVPlaylistBaseIE):
|
|||||||
},
|
},
|
||||||
'playlist_mincount': 136,
|
'playlist_mincount': 136,
|
||||||
}
|
}
|
||||||
|
_RESOURCE = 'episode'
|
||||||
def _get_element_url(self, element_data):
|
_process_data = AdobeTVBaseIE._parse_video_data
|
||||||
return element_data['urls'][0]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
language, show_urlname = re.match(self._VALID_URL, url).groups()
|
language, show_urlname = re.match(self._VALID_URL, url).groups()
|
||||||
if not language:
|
if not language:
|
||||||
language = 'en'
|
language = 'en'
|
||||||
query = 'language=%s&show_urlname=%s' % (language, show_urlname)
|
query = {
|
||||||
|
'disclosure': 'standard',
|
||||||
|
'language': language,
|
||||||
|
'show_urlname': show_urlname,
|
||||||
|
}
|
||||||
|
|
||||||
show_data = self._download_json(self._API_BASE_URL + 'show/get/?%s' % query, show_urlname)['data'][0]
|
show_data = self._call_api(
|
||||||
|
'show/get', show_urlname, query)[0]
|
||||||
|
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
self._extract_playlist_entries(self._API_BASE_URL + 'episode/?%s' % query, show_urlname),
|
self._extract_playlist_entries(show_urlname, query),
|
||||||
compat_str(show_data['id']),
|
str_or_none(show_data.get('id')),
|
||||||
show_data['show_name'],
|
show_data.get('show_name'),
|
||||||
show_data['show_description'])
|
show_data.get('show_description'))
|
||||||
|
|
||||||
|
|
||||||
class AdobeTVChannelIE(AdobeTVPlaylistBaseIE):
|
class AdobeTVChannelIE(AdobeTVPlaylistBaseIE):
|
||||||
|
IE_NAME = 'adobetv:channel'
|
||||||
_VALID_URL = r'https?://tv\.adobe\.com/(?:(?P<language>fr|de|es|jp)/)?channel/(?P<id>[^/]+)(?:/(?P<category_urlname>[^/]+))?'
|
_VALID_URL = r'https?://tv\.adobe\.com/(?:(?P<language>fr|de|es|jp)/)?channel/(?P<id>[^/]+)(?:/(?P<category_urlname>[^/]+))?'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
@ -121,24 +208,30 @@ class AdobeTVChannelIE(AdobeTVPlaylistBaseIE):
|
|||||||
},
|
},
|
||||||
'playlist_mincount': 96,
|
'playlist_mincount': 96,
|
||||||
}
|
}
|
||||||
|
_RESOURCE = 'show'
|
||||||
|
|
||||||
def _get_element_url(self, element_data):
|
def _process_data(self, show_data):
|
||||||
return element_data['url']
|
return self.url_result(
|
||||||
|
show_data['url'], 'AdobeTVShow', str_or_none(show_data.get('id')))
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
language, channel_urlname, category_urlname = re.match(self._VALID_URL, url).groups()
|
language, channel_urlname, category_urlname = re.match(self._VALID_URL, url).groups()
|
||||||
if not language:
|
if not language:
|
||||||
language = 'en'
|
language = 'en'
|
||||||
query = 'language=%s&channel_urlname=%s' % (language, channel_urlname)
|
query = {
|
||||||
|
'channel_urlname': channel_urlname,
|
||||||
|
'language': language,
|
||||||
|
}
|
||||||
if category_urlname:
|
if category_urlname:
|
||||||
query += '&category_urlname=%s' % category_urlname
|
query['category_urlname'] = category_urlname
|
||||||
|
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
self._extract_playlist_entries(self._API_BASE_URL + 'show/?%s' % query, channel_urlname),
|
self._extract_playlist_entries(channel_urlname, query),
|
||||||
channel_urlname)
|
channel_urlname)
|
||||||
|
|
||||||
|
|
||||||
class AdobeTVVideoIE(InfoExtractor):
|
class AdobeTVVideoIE(AdobeTVBaseIE):
|
||||||
|
IE_NAME = 'adobetv:video'
|
||||||
_VALID_URL = r'https?://video\.tv\.adobe\.com/v/(?P<id>\d+)'
|
_VALID_URL = r'https?://video\.tv\.adobe\.com/v/(?P<id>\d+)'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
@ -160,38 +253,36 @@ class AdobeTVVideoIE(InfoExtractor):
|
|||||||
|
|
||||||
video_data = self._parse_json(self._search_regex(
|
video_data = self._parse_json(self._search_regex(
|
||||||
r'var\s+bridge\s*=\s*([^;]+);', webpage, 'bridged data'), video_id)
|
r'var\s+bridge\s*=\s*([^;]+);', webpage, 'bridged data'), video_id)
|
||||||
|
title = video_data['title']
|
||||||
|
|
||||||
formats = [{
|
formats = []
|
||||||
'format_id': '%s-%s' % (determine_ext(source['src']), source.get('height')),
|
sources = video_data.get('sources') or []
|
||||||
'url': source['src'],
|
for source in sources:
|
||||||
'width': int_or_none(source.get('width')),
|
source_src = source.get('src')
|
||||||
'height': int_or_none(source.get('height')),
|
if not source_src:
|
||||||
'tbr': int_or_none(source.get('bitrate')),
|
continue
|
||||||
} for source in video_data['sources']]
|
formats.append({
|
||||||
|
'filesize': int_or_none(source.get('kilobytes') or None, invscale=1000),
|
||||||
|
'format_id': '-'.join(filter(None, [source.get('format'), source.get('label')])),
|
||||||
|
'height': int_or_none(source.get('height') or None),
|
||||||
|
'tbr': int_or_none(source.get('bitrate') or None),
|
||||||
|
'width': int_or_none(source.get('width') or None),
|
||||||
|
'url': source_src,
|
||||||
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
# For both metadata and downloaded files the duration varies among
|
# For both metadata and downloaded files the duration varies among
|
||||||
# formats. I just pick the max one
|
# formats. I just pick the max one
|
||||||
duration = max(filter(None, [
|
duration = max(filter(None, [
|
||||||
float_or_none(source.get('duration'), scale=1000)
|
float_or_none(source.get('duration'), scale=1000)
|
||||||
for source in video_data['sources']]))
|
for source in sources]))
|
||||||
|
|
||||||
subtitles = {}
|
|
||||||
for translation in video_data.get('translations', []):
|
|
||||||
lang_id = translation.get('language_w3c') or ISO639Utils.long2short(translation['language_medium'])
|
|
||||||
if lang_id not in subtitles:
|
|
||||||
subtitles[lang_id] = []
|
|
||||||
subtitles[lang_id].append({
|
|
||||||
'url': translation['vttPath'],
|
|
||||||
'ext': 'vtt',
|
|
||||||
})
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'title': video_data['title'],
|
'title': title,
|
||||||
'description': video_data.get('description'),
|
'description': video_data.get('description'),
|
||||||
'thumbnail': video_data['video'].get('poster'),
|
'thumbnail': video_data.get('video', {}).get('poster'),
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'subtitles': subtitles,
|
'subtitles': self._parse_subtitles(video_data, 'vttPath'),
|
||||||
}
|
}
|
||||||
|
@ -5,6 +5,7 @@ from .common import InfoExtractor
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
clean_html,
|
clean_html,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
js_to_json,
|
||||||
try_get,
|
try_get,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
)
|
)
|
||||||
@ -13,22 +14,21 @@ from ..utils import (
|
|||||||
class AmericasTestKitchenIE(InfoExtractor):
|
class AmericasTestKitchenIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?americastestkitchen\.com/(?:episode|videos)/(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?americastestkitchen\.com/(?:episode|videos)/(?P<id>\d+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.americastestkitchen.com/episode/548-summer-dinner-party',
|
'url': 'https://www.americastestkitchen.com/episode/582-weeknight-japanese-suppers',
|
||||||
'md5': 'b861c3e365ac38ad319cfd509c30577f',
|
'md5': 'b861c3e365ac38ad319cfd509c30577f',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1_5g5zua6e',
|
'id': '5b400b9ee338f922cb06450c',
|
||||||
'title': 'Summer Dinner Party',
|
'title': 'Weeknight Japanese Suppers',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'description': 'md5:858d986e73a4826979b6a5d9f8f6a1ec',
|
'description': 'md5:3d0c1a44bb3b27607ce82652db25b4a8',
|
||||||
'thumbnail': r're:^https?://.*\.jpg',
|
'thumbnail': r're:^https?://',
|
||||||
'timestamp': 1497285541,
|
'timestamp': 1523664000,
|
||||||
'upload_date': '20170612',
|
'upload_date': '20180414',
|
||||||
'uploader_id': 'roger.metcalf@americastestkitchen.com',
|
'release_date': '20180414',
|
||||||
'release_date': '20170617',
|
|
||||||
'series': "America's Test Kitchen",
|
'series': "America's Test Kitchen",
|
||||||
'season_number': 17,
|
'season_number': 18,
|
||||||
'episode': 'Summer Dinner Party',
|
'episode': 'Weeknight Japanese Suppers',
|
||||||
'episode_number': 24,
|
'episode_number': 15,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
@ -47,7 +47,7 @@ class AmericasTestKitchenIE(InfoExtractor):
|
|||||||
self._search_regex(
|
self._search_regex(
|
||||||
r'window\.__INITIAL_STATE__\s*=\s*({.+?})\s*;\s*</script>',
|
r'window\.__INITIAL_STATE__\s*=\s*({.+?})\s*;\s*</script>',
|
||||||
webpage, 'initial context'),
|
webpage, 'initial context'),
|
||||||
video_id)
|
video_id, js_to_json)
|
||||||
|
|
||||||
ep_data = try_get(
|
ep_data = try_get(
|
||||||
video_data,
|
video_data,
|
||||||
@ -55,17 +55,7 @@ class AmericasTestKitchenIE(InfoExtractor):
|
|||||||
lambda x: x['videoDetail']['content']['data']), dict)
|
lambda x: x['videoDetail']['content']['data']), dict)
|
||||||
ep_meta = ep_data.get('full_video', {})
|
ep_meta = ep_data.get('full_video', {})
|
||||||
|
|
||||||
zype_id = ep_meta.get('zype_id')
|
zype_id = ep_data.get('zype_id') or ep_meta['zype_id']
|
||||||
if zype_id:
|
|
||||||
embed_url = 'https://player.zype.com/embed/%s.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ' % zype_id
|
|
||||||
ie_key = 'Zype'
|
|
||||||
else:
|
|
||||||
partner_id = self._search_regex(
|
|
||||||
r'src=["\'](?:https?:)?//(?:[^/]+\.)kaltura\.com/(?:[^/]+/)*(?:p|partner_id)/(\d+)',
|
|
||||||
webpage, 'kaltura partner id')
|
|
||||||
external_id = ep_data.get('external_id') or ep_meta['external_id']
|
|
||||||
embed_url = 'kaltura:%s:%s' % (partner_id, external_id)
|
|
||||||
ie_key = 'Kaltura'
|
|
||||||
|
|
||||||
title = ep_data.get('title') or ep_meta.get('title')
|
title = ep_data.get('title') or ep_meta.get('title')
|
||||||
description = clean_html(ep_meta.get('episode_description') or ep_data.get(
|
description = clean_html(ep_meta.get('episode_description') or ep_data.get(
|
||||||
@ -79,8 +69,8 @@ class AmericasTestKitchenIE(InfoExtractor):
|
|||||||
|
|
||||||
return {
|
return {
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
'url': embed_url,
|
'url': 'https://player.zype.com/embed/%s.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ' % zype_id,
|
||||||
'ie_key': ie_key,
|
'ie_key': 'Zype',
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': description,
|
'description': description,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import json
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
@ -22,7 +23,101 @@ from ..utils import (
|
|||||||
from ..compat import compat_etree_fromstring
|
from ..compat import compat_etree_fromstring
|
||||||
|
|
||||||
|
|
||||||
class ARDMediathekIE(InfoExtractor):
|
class ARDMediathekBaseIE(InfoExtractor):
|
||||||
|
_GEO_COUNTRIES = ['DE']
|
||||||
|
|
||||||
|
def _extract_media_info(self, media_info_url, webpage, video_id):
|
||||||
|
media_info = self._download_json(
|
||||||
|
media_info_url, video_id, 'Downloading media JSON')
|
||||||
|
return self._parse_media_info(media_info, video_id, '"fsk"' in webpage)
|
||||||
|
|
||||||
|
def _parse_media_info(self, media_info, video_id, fsk):
|
||||||
|
formats = self._extract_formats(media_info, video_id)
|
||||||
|
|
||||||
|
if not formats:
|
||||||
|
if fsk:
|
||||||
|
raise ExtractorError(
|
||||||
|
'This video is only available after 20:00', expected=True)
|
||||||
|
elif media_info.get('_geoblocked'):
|
||||||
|
self.raise_geo_restricted(
|
||||||
|
'This video is not available due to geoblocking',
|
||||||
|
countries=self._GEO_COUNTRIES)
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
subtitle_url = media_info.get('_subtitleUrl')
|
||||||
|
if subtitle_url:
|
||||||
|
subtitles['de'] = [{
|
||||||
|
'ext': 'ttml',
|
||||||
|
'url': subtitle_url,
|
||||||
|
}]
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'duration': int_or_none(media_info.get('_duration')),
|
||||||
|
'thumbnail': media_info.get('_previewImage'),
|
||||||
|
'is_live': media_info.get('_isLive') is True,
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _extract_formats(self, media_info, video_id):
|
||||||
|
type_ = media_info.get('_type')
|
||||||
|
media_array = media_info.get('_mediaArray', [])
|
||||||
|
formats = []
|
||||||
|
for num, media in enumerate(media_array):
|
||||||
|
for stream in media.get('_mediaStreamArray', []):
|
||||||
|
stream_urls = stream.get('_stream')
|
||||||
|
if not stream_urls:
|
||||||
|
continue
|
||||||
|
if not isinstance(stream_urls, list):
|
||||||
|
stream_urls = [stream_urls]
|
||||||
|
quality = stream.get('_quality')
|
||||||
|
server = stream.get('_server')
|
||||||
|
for stream_url in stream_urls:
|
||||||
|
if not url_or_none(stream_url):
|
||||||
|
continue
|
||||||
|
ext = determine_ext(stream_url)
|
||||||
|
if quality != 'auto' and ext in ('f4m', 'm3u8'):
|
||||||
|
continue
|
||||||
|
if ext == 'f4m':
|
||||||
|
formats.extend(self._extract_f4m_formats(
|
||||||
|
update_url_query(stream_url, {
|
||||||
|
'hdcore': '3.1.1',
|
||||||
|
'plugin': 'aasp-3.1.1.69.124'
|
||||||
|
}), video_id, f4m_id='hds', fatal=False))
|
||||||
|
elif ext == 'm3u8':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
stream_url, video_id, 'mp4', 'm3u8_native',
|
||||||
|
m3u8_id='hls', fatal=False))
|
||||||
|
else:
|
||||||
|
if server and server.startswith('rtmp'):
|
||||||
|
f = {
|
||||||
|
'url': server,
|
||||||
|
'play_path': stream_url,
|
||||||
|
'format_id': 'a%s-rtmp-%s' % (num, quality),
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
f = {
|
||||||
|
'url': stream_url,
|
||||||
|
'format_id': 'a%s-%s-%s' % (num, ext, quality)
|
||||||
|
}
|
||||||
|
m = re.search(
|
||||||
|
r'_(?P<width>\d+)x(?P<height>\d+)\.mp4$',
|
||||||
|
stream_url)
|
||||||
|
if m:
|
||||||
|
f.update({
|
||||||
|
'width': int(m.group('width')),
|
||||||
|
'height': int(m.group('height')),
|
||||||
|
})
|
||||||
|
if type_ == 'audio':
|
||||||
|
f['vcodec'] = 'none'
|
||||||
|
formats.append(f)
|
||||||
|
return formats
|
||||||
|
|
||||||
|
|
||||||
|
class ARDMediathekIE(ARDMediathekBaseIE):
|
||||||
IE_NAME = 'ARD:mediathek'
|
IE_NAME = 'ARD:mediathek'
|
||||||
_VALID_URL = r'^https?://(?:(?:(?:www|classic)\.)?ardmediathek\.de|mediathek\.(?:daserste|rbb-online)\.de|one\.ard\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?'
|
_VALID_URL = r'^https?://(?:(?:(?:www|classic)\.)?ardmediathek\.de|mediathek\.(?:daserste|rbb-online)\.de|one\.ard\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?'
|
||||||
|
|
||||||
@ -63,94 +158,6 @@ class ARDMediathekIE(InfoExtractor):
|
|||||||
def suitable(cls, url):
|
def suitable(cls, url):
|
||||||
return False if ARDBetaMediathekIE.suitable(url) else super(ARDMediathekIE, cls).suitable(url)
|
return False if ARDBetaMediathekIE.suitable(url) else super(ARDMediathekIE, cls).suitable(url)
|
||||||
|
|
||||||
def _extract_media_info(self, media_info_url, webpage, video_id):
|
|
||||||
media_info = self._download_json(
|
|
||||||
media_info_url, video_id, 'Downloading media JSON')
|
|
||||||
|
|
||||||
formats = self._extract_formats(media_info, video_id)
|
|
||||||
|
|
||||||
if not formats:
|
|
||||||
if '"fsk"' in webpage:
|
|
||||||
raise ExtractorError(
|
|
||||||
'This video is only available after 20:00', expected=True)
|
|
||||||
elif media_info.get('_geoblocked'):
|
|
||||||
raise ExtractorError('This video is not available due to geo restriction', expected=True)
|
|
||||||
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
duration = int_or_none(media_info.get('_duration'))
|
|
||||||
thumbnail = media_info.get('_previewImage')
|
|
||||||
is_live = media_info.get('_isLive') is True
|
|
||||||
|
|
||||||
subtitles = {}
|
|
||||||
subtitle_url = media_info.get('_subtitleUrl')
|
|
||||||
if subtitle_url:
|
|
||||||
subtitles['de'] = [{
|
|
||||||
'ext': 'ttml',
|
|
||||||
'url': subtitle_url,
|
|
||||||
}]
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'duration': duration,
|
|
||||||
'thumbnail': thumbnail,
|
|
||||||
'is_live': is_live,
|
|
||||||
'formats': formats,
|
|
||||||
'subtitles': subtitles,
|
|
||||||
}
|
|
||||||
|
|
||||||
def _extract_formats(self, media_info, video_id):
|
|
||||||
type_ = media_info.get('_type')
|
|
||||||
media_array = media_info.get('_mediaArray', [])
|
|
||||||
formats = []
|
|
||||||
for num, media in enumerate(media_array):
|
|
||||||
for stream in media.get('_mediaStreamArray', []):
|
|
||||||
stream_urls = stream.get('_stream')
|
|
||||||
if not stream_urls:
|
|
||||||
continue
|
|
||||||
if not isinstance(stream_urls, list):
|
|
||||||
stream_urls = [stream_urls]
|
|
||||||
quality = stream.get('_quality')
|
|
||||||
server = stream.get('_server')
|
|
||||||
for stream_url in stream_urls:
|
|
||||||
if not url_or_none(stream_url):
|
|
||||||
continue
|
|
||||||
ext = determine_ext(stream_url)
|
|
||||||
if quality != 'auto' and ext in ('f4m', 'm3u8'):
|
|
||||||
continue
|
|
||||||
if ext == 'f4m':
|
|
||||||
formats.extend(self._extract_f4m_formats(
|
|
||||||
update_url_query(stream_url, {
|
|
||||||
'hdcore': '3.1.1',
|
|
||||||
'plugin': 'aasp-3.1.1.69.124'
|
|
||||||
}),
|
|
||||||
video_id, f4m_id='hds', fatal=False))
|
|
||||||
elif ext == 'm3u8':
|
|
||||||
formats.extend(self._extract_m3u8_formats(
|
|
||||||
stream_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
|
||||||
else:
|
|
||||||
if server and server.startswith('rtmp'):
|
|
||||||
f = {
|
|
||||||
'url': server,
|
|
||||||
'play_path': stream_url,
|
|
||||||
'format_id': 'a%s-rtmp-%s' % (num, quality),
|
|
||||||
}
|
|
||||||
else:
|
|
||||||
f = {
|
|
||||||
'url': stream_url,
|
|
||||||
'format_id': 'a%s-%s-%s' % (num, ext, quality)
|
|
||||||
}
|
|
||||||
m = re.search(r'_(?P<width>\d+)x(?P<height>\d+)\.mp4$', stream_url)
|
|
||||||
if m:
|
|
||||||
f.update({
|
|
||||||
'width': int(m.group('width')),
|
|
||||||
'height': int(m.group('height')),
|
|
||||||
})
|
|
||||||
if type_ == 'audio':
|
|
||||||
f['vcodec'] = 'none'
|
|
||||||
formats.append(f)
|
|
||||||
return formats
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
# determine video id from url
|
# determine video id from url
|
||||||
m = re.match(self._VALID_URL, url)
|
m = re.match(self._VALID_URL, url)
|
||||||
@ -242,7 +249,7 @@ class ARDMediathekIE(InfoExtractor):
|
|||||||
|
|
||||||
|
|
||||||
class ARDIE(InfoExtractor):
|
class ARDIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?P<mainurl>https?://(www\.)?daserste\.de/[^?#]+/videos/(?P<display_id>[^/?#]+)-(?P<id>[0-9]+))\.html'
|
_VALID_URL = r'(?P<mainurl>https?://(www\.)?daserste\.de/[^?#]+/videos(?:extern)?/(?P<display_id>[^/?#]+)-(?P<id>[0-9]+))\.html'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# available till 14.02.2019
|
# available till 14.02.2019
|
||||||
'url': 'http://www.daserste.de/information/talk/maischberger/videos/das-groko-drama-zerlegen-sich-die-volksparteien-video-102.html',
|
'url': 'http://www.daserste.de/information/talk/maischberger/videos/das-groko-drama-zerlegen-sich-die-volksparteien-video-102.html',
|
||||||
@ -256,6 +263,9 @@ class ARDIE(InfoExtractor):
|
|||||||
'upload_date': '20180214',
|
'upload_date': '20180214',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.daserste.de/information/reportage-dokumentation/erlebnis-erde/videosextern/woelfe-und-herdenschutzhunde-ungleiche-brueder-102.html',
|
||||||
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.daserste.de/information/reportage-dokumentation/dokus/videos/die-story-im-ersten-mission-unter-falscher-flagge-100.html',
|
'url': 'http://www.daserste.de/information/reportage-dokumentation/dokus/videos/die-story-im-ersten-mission-unter-falscher-flagge-100.html',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -302,21 +312,31 @@ class ARDIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class ARDBetaMediathekIE(InfoExtractor):
|
class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||||
_VALID_URL = r'https://(?:beta|www)\.ardmediathek\.de/[^/]+/(?:player|live)/(?P<video_id>[a-zA-Z0-9]+)(?:/(?P<display_id>[^/?#]+))?'
|
_VALID_URL = r'https://(?:(?:beta|www)\.)?ardmediathek\.de/(?P<client>[^/]+)/(?:player|live|video)/(?P<display_id>(?:[^/]+/)*)(?P<video_id>[a-zA-Z0-9]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://beta.ardmediathek.de/ard/player/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE/die-robuste-roswita',
|
'url': 'https://ardmediathek.de/ard/video/die-robuste-roswita/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
|
||||||
'md5': '2d02d996156ea3c397cfc5036b5d7f8f',
|
'md5': 'dfdc87d2e7e09d073d5a80770a9ce88f',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'display_id': 'die-robuste-roswita',
|
'display_id': 'die-robuste-roswita',
|
||||||
'id': 'Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
|
'id': '70153354',
|
||||||
'title': 'Tatort: Die robuste Roswita',
|
'title': 'Die robuste Roswita',
|
||||||
'description': r're:^Der Mord.*trüber ist als die Ilm.',
|
'description': r're:^Der Mord.*trüber ist als die Ilm.',
|
||||||
'duration': 5316,
|
'duration': 5316,
|
||||||
'thumbnail': 'https://img.ardmediathek.de/standard/00/55/43/59/34/-1774185891/16x9/960?mandant=ard',
|
'thumbnail': 'https://img.ardmediathek.de/standard/00/70/15/33/90/-1852531467/16x9/960?mandant=ard',
|
||||||
'upload_date': '20180826',
|
'timestamp': 1577047500,
|
||||||
|
'upload_date': '20191222',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://beta.ardmediathek.de/ard/video/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://ardmediathek.de/ard/video/saartalk/saartalk-gesellschaftsgift-haltung-gegen-hass/sr-fernsehen/Y3JpZDovL3NyLW9ubGluZS5kZS9TVF84MTY4MA/',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.ardmediathek.de/ard/video/trailer/private-eyes-s01-e01/one/Y3JpZDovL3dkci5kZS9CZWl0cmFnLTE1MTgwYzczLWNiMTEtNGNkMS1iMjUyLTg5MGYzOWQxZmQ1YQ/',
|
||||||
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.ardmediathek.de/ard/player/Y3JpZDovL3N3ci5kZS9hZXgvbzEwNzE5MTU/',
|
'url': 'https://www.ardmediathek.de/ard/player/Y3JpZDovL3N3ci5kZS9hZXgvbzEwNzE5MTU/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -328,73 +348,75 @@ class ARDBetaMediathekIE(InfoExtractor):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('video_id')
|
video_id = mobj.group('video_id')
|
||||||
display_id = mobj.group('display_id') or video_id
|
display_id = mobj.group('display_id')
|
||||||
|
if display_id:
|
||||||
|
display_id = display_id.rstrip('/')
|
||||||
|
if not display_id:
|
||||||
|
display_id = video_id
|
||||||
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
player_page = self._download_json(
|
||||||
data_json = self._search_regex(r'window\.__APOLLO_STATE__\s*=\s*(\{.*);\n', webpage, 'json')
|
'https://api.ardmediathek.de/public-gateway',
|
||||||
data = self._parse_json(data_json, display_id)
|
display_id, data=json.dumps({
|
||||||
|
'query': '''{
|
||||||
res = {
|
playerPage(client:"%s", clipId: "%s") {
|
||||||
'id': video_id,
|
blockedByFsk
|
||||||
'display_id': display_id,
|
broadcastedOn
|
||||||
|
maturityContentRating
|
||||||
|
mediaCollection {
|
||||||
|
_duration
|
||||||
|
_geoblocked
|
||||||
|
_isLive
|
||||||
|
_mediaArray {
|
||||||
|
_mediaStreamArray {
|
||||||
|
_quality
|
||||||
|
_server
|
||||||
|
_stream
|
||||||
}
|
}
|
||||||
formats = []
|
}
|
||||||
subtitles = {}
|
_previewImage
|
||||||
geoblocked = False
|
_subtitleUrl
|
||||||
for widget in data.values():
|
_type
|
||||||
if widget.get('_geoblocked') is True:
|
}
|
||||||
geoblocked = True
|
show {
|
||||||
if '_duration' in widget:
|
title
|
||||||
res['duration'] = int_or_none(widget['_duration'])
|
}
|
||||||
if 'clipTitle' in widget:
|
synopsis
|
||||||
res['title'] = widget['clipTitle']
|
title
|
||||||
if '_previewImage' in widget:
|
tracking {
|
||||||
res['thumbnail'] = widget['_previewImage']
|
atiCustomVars {
|
||||||
if 'broadcastedOn' in widget:
|
contentId
|
||||||
res['timestamp'] = unified_timestamp(widget['broadcastedOn'])
|
}
|
||||||
if 'synopsis' in widget:
|
}
|
||||||
res['description'] = widget['synopsis']
|
}
|
||||||
subtitle_url = url_or_none(widget.get('_subtitleUrl'))
|
}''' % (mobj.group('client'), video_id),
|
||||||
if subtitle_url:
|
}).encode(), headers={
|
||||||
subtitles.setdefault('de', []).append({
|
'Content-Type': 'application/json'
|
||||||
'ext': 'ttml',
|
})['data']['playerPage']
|
||||||
'url': subtitle_url,
|
title = player_page['title']
|
||||||
})
|
content_id = str_or_none(try_get(
|
||||||
if '_quality' in widget:
|
player_page, lambda x: x['tracking']['atiCustomVars']['contentId']))
|
||||||
format_url = url_or_none(try_get(
|
media_collection = player_page.get('mediaCollection') or {}
|
||||||
widget, lambda x: x['_stream']['json'][0]))
|
if not media_collection and content_id:
|
||||||
if not format_url:
|
media_collection = self._download_json(
|
||||||
continue
|
'https://www.ardmediathek.de/play/media/' + content_id,
|
||||||
ext = determine_ext(format_url)
|
content_id, fatal=False) or {}
|
||||||
if ext == 'f4m':
|
info = self._parse_media_info(
|
||||||
formats.extend(self._extract_f4m_formats(
|
media_collection, content_id or video_id,
|
||||||
format_url + '?hdcore=3.11.0',
|
player_page.get('blockedByFsk'))
|
||||||
video_id, f4m_id='hds', fatal=False))
|
age_limit = None
|
||||||
elif ext == 'm3u8':
|
description = player_page.get('synopsis')
|
||||||
formats.extend(self._extract_m3u8_formats(
|
maturity_content_rating = player_page.get('maturityContentRating')
|
||||||
format_url, video_id, 'mp4', m3u8_id='hls',
|
if maturity_content_rating:
|
||||||
fatal=False))
|
age_limit = int_or_none(maturity_content_rating.lstrip('FSK'))
|
||||||
else:
|
if not age_limit and description:
|
||||||
# HTTP formats are not available when geoblocked is True,
|
age_limit = int_or_none(self._search_regex(
|
||||||
# other formats are fine though
|
r'\(FSK\s*(\d+)\)\s*$', description, 'age limit', default=None))
|
||||||
if geoblocked:
|
info.update({
|
||||||
continue
|
'age_limit': age_limit,
|
||||||
quality = str_or_none(widget.get('_quality'))
|
'display_id': display_id,
|
||||||
formats.append({
|
'title': title,
|
||||||
'format_id': ('http-' + quality) if quality else 'http',
|
'description': description,
|
||||||
'url': format_url,
|
'timestamp': unified_timestamp(player_page.get('broadcastedOn')),
|
||||||
'preference': 10, # Plain HTTP, that's nice
|
'series': try_get(player_page, lambda x: x['show']['title']),
|
||||||
})
|
|
||||||
|
|
||||||
if not formats and geoblocked:
|
|
||||||
self.raise_geo_restricted(
|
|
||||||
msg='This video is not available due to geoblocking',
|
|
||||||
countries=['DE'])
|
|
||||||
|
|
||||||
self._sort_formats(formats)
|
|
||||||
res.update({
|
|
||||||
'subtitles': subtitles,
|
|
||||||
'formats': formats,
|
|
||||||
})
|
})
|
||||||
|
return info
|
||||||
return res
|
|
||||||
|
@ -5,14 +5,12 @@ import re
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from .kaltura import KalturaIE
|
from .kaltura import KalturaIE
|
||||||
from ..utils import (
|
from ..utils import extract_attributes
|
||||||
extract_attributes,
|
|
||||||
remove_end,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class AsianCrushIE(InfoExtractor):
|
class AsianCrushIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?asiancrush\.com/video/(?:[^/]+/)?0+(?P<id>\d+)v\b'
|
_VALID_URL_BASE = r'https?://(?:www\.)?(?P<host>(?:(?:asiancrush|yuyutv|midnightpulp)\.com|cocoro\.tv))'
|
||||||
|
_VALID_URL = r'%s/video/(?:[^/]+/)?0+(?P<id>\d+)v\b' % _VALID_URL_BASE
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.asiancrush.com/video/012869v/women-who-flirt/',
|
'url': 'https://www.asiancrush.com/video/012869v/women-who-flirt/',
|
||||||
'md5': 'c3b740e48d0ba002a42c0b72857beae6',
|
'md5': 'c3b740e48d0ba002a42c0b72857beae6',
|
||||||
@ -20,7 +18,7 @@ class AsianCrushIE(InfoExtractor):
|
|||||||
'id': '1_y4tmjm5r',
|
'id': '1_y4tmjm5r',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Women Who Flirt',
|
'title': 'Women Who Flirt',
|
||||||
'description': 'md5:3db14e9186197857e7063522cb89a805',
|
'description': 'md5:7e986615808bcfb11756eb503a751487',
|
||||||
'timestamp': 1496936429,
|
'timestamp': 1496936429,
|
||||||
'upload_date': '20170608',
|
'upload_date': '20170608',
|
||||||
'uploader_id': 'craig@crifkin.com',
|
'uploader_id': 'craig@crifkin.com',
|
||||||
@ -28,10 +26,27 @@ class AsianCrushIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'https://www.asiancrush.com/video/she-was-pretty/011886v-pretty-episode-3/',
|
'url': 'https://www.asiancrush.com/video/she-was-pretty/011886v-pretty-episode-3/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.yuyutv.com/video/013886v/the-act-of-killing/',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.yuyutv.com/video/peep-show/013922v-warring-factions/',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.midnightpulp.com/video/010400v/drifters/',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.midnightpulp.com/video/mononoke/016378v-zashikiwarashi-part-1/',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.cocoro.tv/video/the-wonderful-wizard-of-oz/008878v-the-wonderful-wizard-of-oz-ep01/',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
host = mobj.group('host')
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
@ -51,7 +66,7 @@ class AsianCrushIE(InfoExtractor):
|
|||||||
r'\bentry_id["\']\s*:\s*["\'](\d+)', webpage, 'entry id')
|
r'\bentry_id["\']\s*:\s*["\'](\d+)', webpage, 'entry id')
|
||||||
|
|
||||||
player = self._download_webpage(
|
player = self._download_webpage(
|
||||||
'https://api.asiancrush.com/embeddedVideoPlayer', video_id,
|
'https://api.%s/embeddedVideoPlayer' % host, video_id,
|
||||||
query={'id': entry_id})
|
query={'id': entry_id})
|
||||||
|
|
||||||
kaltura_id = self._search_regex(
|
kaltura_id = self._search_regex(
|
||||||
@ -63,15 +78,23 @@ class AsianCrushIE(InfoExtractor):
|
|||||||
r'/p(?:artner_id)?/(\d+)', player, 'partner id',
|
r'/p(?:artner_id)?/(\d+)', player, 'partner id',
|
||||||
default='513551')
|
default='513551')
|
||||||
|
|
||||||
return self.url_result(
|
description = self._html_search_regex(
|
||||||
'kaltura:%s:%s' % (partner_id, kaltura_id),
|
r'(?s)<div[^>]+\bclass=["\']description["\'][^>]*>(.+?)</div>',
|
||||||
ie=KalturaIE.ie_key(), video_id=kaltura_id,
|
webpage, 'description', fatal=False)
|
||||||
video_title=title)
|
|
||||||
|
return {
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'url': 'kaltura:%s:%s' % (partner_id, kaltura_id),
|
||||||
|
'ie_key': KalturaIE.ie_key(),
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
class AsianCrushPlaylistIE(InfoExtractor):
|
class AsianCrushPlaylistIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?asiancrush\.com/series/0+(?P<id>\d+)s\b'
|
_VALID_URL = r'%s/series/0+(?P<id>\d+)s\b' % AsianCrushIE._VALID_URL_BASE
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'https://www.asiancrush.com/series/012481s/scholar-walks-night/',
|
'url': 'https://www.asiancrush.com/series/012481s/scholar-walks-night/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '12481',
|
'id': '12481',
|
||||||
@ -79,7 +102,16 @@ class AsianCrushPlaylistIE(InfoExtractor):
|
|||||||
'description': 'md5:7addd7c5132a09fd4741152d96cce886',
|
'description': 'md5:7addd7c5132a09fd4741152d96cce886',
|
||||||
},
|
},
|
||||||
'playlist_count': 20,
|
'playlist_count': 20,
|
||||||
}
|
}, {
|
||||||
|
'url': 'https://www.yuyutv.com/series/013920s/peep-show/',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.midnightpulp.com/series/016375s/mononoke/',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.cocoro.tv/series/008549s/the-wonderful-wizard-of-oz/',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
playlist_id = self._match_id(url)
|
playlist_id = self._match_id(url)
|
||||||
@ -96,15 +128,15 @@ class AsianCrushPlaylistIE(InfoExtractor):
|
|||||||
entries.append(self.url_result(
|
entries.append(self.url_result(
|
||||||
mobj.group('url'), ie=AsianCrushIE.ie_key()))
|
mobj.group('url'), ie=AsianCrushIE.ie_key()))
|
||||||
|
|
||||||
title = remove_end(
|
title = self._html_search_regex(
|
||||||
self._html_search_regex(
|
r'(?s)<h1\b[^>]\bid=["\']movieTitle[^>]+>(.+?)</h1>', webpage,
|
||||||
r'(?s)<h1\b[^>]\bid=["\']movieTitle[^>]+>(.+?)</h1>', webpage,
|
'title', default=None) or self._og_search_title(
|
||||||
'title', default=None) or self._og_search_title(
|
webpage, default=None) or self._html_search_meta(
|
||||||
webpage, default=None) or self._html_search_meta(
|
'twitter:title', webpage, 'title',
|
||||||
'twitter:title', webpage, 'title',
|
default=None) or self._search_regex(
|
||||||
default=None) or self._search_regex(
|
r'<title>([^<]+)</title>', webpage, 'title', fatal=False)
|
||||||
r'<title>([^<]+)</title>', webpage, 'title', fatal=False),
|
if title:
|
||||||
' | AsianCrush')
|
title = re.sub(r'\s*\|\s*.+?$', '', title)
|
||||||
|
|
||||||
description = self._og_search_description(
|
description = self._og_search_description(
|
||||||
webpage, default=None) or self._html_search_meta(
|
webpage, default=None) or self._html_search_meta(
|
||||||
|
@ -1,202 +1,118 @@
|
|||||||
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import time
|
|
||||||
import hmac
|
|
||||||
import hashlib
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_str
|
from ..compat import compat_HTTPError
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
float_or_none,
|
|
||||||
int_or_none,
|
int_or_none,
|
||||||
sanitized_Request,
|
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
xpath_text,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class AtresPlayerIE(InfoExtractor):
|
class AtresPlayerIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?atresplayer\.com/television/[^/]+/[^/]+/[^/]+/(?P<id>.+?)_\d+\.html'
|
_VALID_URL = r'https?://(?:www\.)?atresplayer\.com/[^/]+/[^/]+/[^/]+/[^/]+/(?P<display_id>.+?)_(?P<id>[0-9a-f]{24})'
|
||||||
_NETRC_MACHINE = 'atresplayer'
|
_NETRC_MACHINE = 'atresplayer'
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://www.atresplayer.com/television/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_2014122100174.html',
|
'url': 'https://www.atresplayer.com/antena3/series/pequenas-coincidencias/temporada-1/capitulo-7-asuntos-pendientes_5d4aa2c57ed1a88fc715a615/',
|
||||||
'md5': 'efd56753cda1bb64df52a3074f62e38a',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'capitulo-10-especial-solidario-nochebuena',
|
'id': '5d4aa2c57ed1a88fc715a615',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Especial Solidario de Nochebuena',
|
'title': 'Capítulo 7: Asuntos pendientes',
|
||||||
'description': 'md5:e2d52ff12214fa937107d21064075bf1',
|
'description': 'md5:7634cdcb4d50d5381bedf93efb537fbc',
|
||||||
'duration': 5527.6,
|
'duration': 3413,
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
},
|
||||||
|
'params': {
|
||||||
|
'format': 'bestvideo',
|
||||||
},
|
},
|
||||||
'skip': 'This video is only available for registered users'
|
'skip': 'This video is only available for registered users'
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://www.atresplayer.com/television/especial/videoencuentros/temporada-1/capitulo-112-david-bustamante_2014121600375.html',
|
'url': 'https://www.atresplayer.com/lasexta/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_5ad08edf986b2855ed47adc4/',
|
||||||
'md5': '6e52cbb513c405e403dbacb7aacf8747',
|
'only_matching': True,
|
||||||
'info_dict': {
|
|
||||||
'id': 'capitulo-112-david-bustamante',
|
|
||||||
'ext': 'flv',
|
|
||||||
'title': 'David Bustamante',
|
|
||||||
'description': 'md5:f33f1c0a05be57f6708d4dd83a3b81c6',
|
|
||||||
'duration': 1439.0,
|
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://www.atresplayer.com/television/series/el-secreto-de-puente-viejo/el-chico-de-los-tres-lunares/capitulo-977-29-12-14_2014122400174.html',
|
'url': 'https://www.atresplayer.com/antena3/series/el-secreto-de-puente-viejo/el-chico-de-los-tres-lunares/capitulo-977-29-12-14_5ad51046986b2886722ccdea/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
_API_BASE = 'https://api.atresplayer.com/'
|
||||||
_USER_AGENT = 'Dalvik/1.6.0 (Linux; U; Android 4.3; GT-I9300 Build/JSS15J'
|
|
||||||
_MAGIC = 'QWtMLXs414Yo+c#_+Q#K@NN)'
|
|
||||||
_TIMESTAMP_SHIFT = 30000
|
|
||||||
|
|
||||||
_TIME_API_URL = 'http://servicios.atresplayer.com/api/admin/time.json'
|
|
||||||
_URL_VIDEO_TEMPLATE = 'https://servicios.atresplayer.com/api/urlVideo/{1}/{0}/{1}|{2}|{3}.json'
|
|
||||||
_PLAYER_URL_TEMPLATE = 'https://servicios.atresplayer.com/episode/getplayer.json?episodePk=%s'
|
|
||||||
_EPISODE_URL_TEMPLATE = 'http://www.atresplayer.com/episodexml/%s'
|
|
||||||
|
|
||||||
_LOGIN_URL = 'https://servicios.atresplayer.com/j_spring_security_check'
|
|
||||||
|
|
||||||
_ERRORS = {
|
|
||||||
'UNPUBLISHED': 'We\'re sorry, but this video is not yet available.',
|
|
||||||
'DELETED': 'This video has expired and is no longer available for online streaming.',
|
|
||||||
'GEOUNPUBLISHED': 'We\'re sorry, but this video is not available in your region due to right restrictions.',
|
|
||||||
# 'PREMIUM': 'PREMIUM',
|
|
||||||
}
|
|
||||||
|
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
self._login()
|
self._login()
|
||||||
|
|
||||||
|
def _handle_error(self, e, code):
|
||||||
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code == code:
|
||||||
|
error = self._parse_json(e.cause.read(), None)
|
||||||
|
if error.get('error') == 'required_registered':
|
||||||
|
self.raise_login_required()
|
||||||
|
raise ExtractorError(error['error_description'], expected=True)
|
||||||
|
raise
|
||||||
|
|
||||||
def _login(self):
|
def _login(self):
|
||||||
username, password = self._get_login_info()
|
username, password = self._get_login_info()
|
||||||
if username is None:
|
if username is None:
|
||||||
return
|
return
|
||||||
|
|
||||||
login_form = {
|
self._request_webpage(
|
||||||
'j_username': username,
|
self._API_BASE + 'login', None, 'Downloading login page')
|
||||||
'j_password': password,
|
|
||||||
}
|
|
||||||
|
|
||||||
request = sanitized_Request(
|
try:
|
||||||
self._LOGIN_URL, urlencode_postdata(login_form))
|
target_url = self._download_json(
|
||||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
'https://account.atresmedia.com/api/login', None,
|
||||||
response = self._download_webpage(
|
'Logging in', headers={
|
||||||
request, None, 'Logging in')
|
'Content-Type': 'application/x-www-form-urlencoded'
|
||||||
|
}, data=urlencode_postdata({
|
||||||
|
'username': username,
|
||||||
|
'password': password,
|
||||||
|
}))['targetUrl']
|
||||||
|
except ExtractorError as e:
|
||||||
|
self._handle_error(e, 400)
|
||||||
|
|
||||||
error = self._html_search_regex(
|
self._request_webpage(target_url, None, 'Following Target URL')
|
||||||
r'(?s)<ul[^>]+class="[^"]*\blist_error\b[^"]*">(.+?)</ul>',
|
|
||||||
response, 'error', default=None)
|
|
||||||
if error:
|
|
||||||
raise ExtractorError(
|
|
||||||
'Unable to login: %s' % error, expected=True)
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
display_id, video_id = re.match(self._VALID_URL, url).groups()
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
try:
|
||||||
|
episode = self._download_json(
|
||||||
|
self._API_BASE + 'client/v1/player/episode/' + video_id, video_id)
|
||||||
|
except ExtractorError as e:
|
||||||
|
self._handle_error(e, 403)
|
||||||
|
|
||||||
episode_id = self._search_regex(
|
title = episode['titulo']
|
||||||
r'episode="([^"]+)"', webpage, 'episode id')
|
|
||||||
|
|
||||||
request = sanitized_Request(
|
|
||||||
self._PLAYER_URL_TEMPLATE % episode_id,
|
|
||||||
headers={'User-Agent': self._USER_AGENT})
|
|
||||||
player = self._download_json(request, episode_id, 'Downloading player JSON')
|
|
||||||
|
|
||||||
episode_type = player.get('typeOfEpisode')
|
|
||||||
error_message = self._ERRORS.get(episode_type)
|
|
||||||
if error_message:
|
|
||||||
raise ExtractorError(
|
|
||||||
'%s returned error: %s' % (self.IE_NAME, error_message), expected=True)
|
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
video_url = player.get('urlVideo')
|
for source in episode.get('sources', []):
|
||||||
if video_url:
|
src = source.get('src')
|
||||||
format_info = {
|
if not src:
|
||||||
'url': video_url,
|
|
||||||
'format_id': 'http',
|
|
||||||
}
|
|
||||||
mobj = re.search(r'(?P<bitrate>\d+)K_(?P<width>\d+)x(?P<height>\d+)', video_url)
|
|
||||||
if mobj:
|
|
||||||
format_info.update({
|
|
||||||
'width': int_or_none(mobj.group('width')),
|
|
||||||
'height': int_or_none(mobj.group('height')),
|
|
||||||
'tbr': int_or_none(mobj.group('bitrate')),
|
|
||||||
})
|
|
||||||
formats.append(format_info)
|
|
||||||
|
|
||||||
timestamp = int_or_none(self._download_webpage(
|
|
||||||
self._TIME_API_URL,
|
|
||||||
video_id, 'Downloading timestamp', fatal=False), 1000, time.time())
|
|
||||||
timestamp_shifted = compat_str(timestamp + self._TIMESTAMP_SHIFT)
|
|
||||||
token = hmac.new(
|
|
||||||
self._MAGIC.encode('ascii'),
|
|
||||||
(episode_id + timestamp_shifted).encode('utf-8'), hashlib.md5
|
|
||||||
).hexdigest()
|
|
||||||
|
|
||||||
request = sanitized_Request(
|
|
||||||
self._URL_VIDEO_TEMPLATE.format('windows', episode_id, timestamp_shifted, token),
|
|
||||||
headers={'User-Agent': self._USER_AGENT})
|
|
||||||
|
|
||||||
fmt_json = self._download_json(
|
|
||||||
request, video_id, 'Downloading windows video JSON')
|
|
||||||
|
|
||||||
result = fmt_json.get('resultDes')
|
|
||||||
if result.lower() != 'ok':
|
|
||||||
raise ExtractorError(
|
|
||||||
'%s returned error: %s' % (self.IE_NAME, result), expected=True)
|
|
||||||
|
|
||||||
for format_id, video_url in fmt_json['resultObject'].items():
|
|
||||||
if format_id == 'token' or not video_url.startswith('http'):
|
|
||||||
continue
|
continue
|
||||||
if 'geodeswowsmpra3player' in video_url:
|
src_type = source.get('type')
|
||||||
# f4m_path = video_url.split('smil:', 1)[-1].split('free_', 1)[0]
|
if src_type == 'application/vnd.apple.mpegurl':
|
||||||
# f4m_url = 'http://drg.antena3.com/{0}hds/es/sd.f4m'.format(f4m_path)
|
formats.extend(self._extract_m3u8_formats(
|
||||||
# this videos are protected by DRM, the f4m downloader doesn't support them
|
src, video_id, 'mp4', 'm3u8_native',
|
||||||
continue
|
m3u8_id='hls', fatal=False))
|
||||||
video_url_hd = video_url.replace('free_es', 'es')
|
elif src_type == 'application/dash+xml':
|
||||||
formats.extend(self._extract_f4m_formats(
|
formats.extend(self._extract_mpd_formats(
|
||||||
video_url_hd[:-9] + '/manifest.f4m', video_id, f4m_id='hds',
|
src, video_id, mpd_id='dash', fatal=False))
|
||||||
fatal=False))
|
|
||||||
formats.extend(self._extract_mpd_formats(
|
|
||||||
video_url_hd[:-9] + '/manifest.mpd', video_id, mpd_id='dash',
|
|
||||||
fatal=False))
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
path_data = player.get('pathData')
|
heartbeat = episode.get('heartbeat') or {}
|
||||||
|
omniture = episode.get('omniture') or {}
|
||||||
episode = self._download_xml(
|
get_meta = lambda x: heartbeat.get(x) or omniture.get(x)
|
||||||
self._EPISODE_URL_TEMPLATE % path_data, video_id,
|
|
||||||
'Downloading episode XML')
|
|
||||||
|
|
||||||
duration = float_or_none(xpath_text(
|
|
||||||
episode, './media/asset/info/technical/contentDuration', 'duration'))
|
|
||||||
|
|
||||||
art = episode.find('./media/asset/info/art')
|
|
||||||
title = xpath_text(art, './name', 'title')
|
|
||||||
description = xpath_text(art, './description', 'description')
|
|
||||||
thumbnail = xpath_text(episode, './media/asset/files/background', 'thumbnail')
|
|
||||||
|
|
||||||
subtitles = {}
|
|
||||||
subtitle_url = xpath_text(episode, './media/asset/files/subtitle', 'subtitle')
|
|
||||||
if subtitle_url:
|
|
||||||
subtitles['es'] = [{
|
|
||||||
'ext': 'srt',
|
|
||||||
'url': subtitle_url,
|
|
||||||
}]
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
'display_id': display_id,
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': description,
|
'description': episode.get('descripcion'),
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': episode.get('imgPoster'),
|
||||||
'duration': duration,
|
'duration': int_or_none(episode.get('duration')),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': subtitles,
|
'channel': get_meta('channel'),
|
||||||
|
'season': get_meta('season'),
|
||||||
|
'episode_number': int_or_none(get_meta('episodeNumber')),
|
||||||
}
|
}
|
||||||
|
@ -2,22 +2,25 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import float_or_none
|
from ..utils import (
|
||||||
|
clean_html,
|
||||||
|
float_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class AudioBoomIE(InfoExtractor):
|
class AudioBoomIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?audioboom\.com/(?:boos|posts)/(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://(?:www\.)?audioboom\.com/(?:boos|posts)/(?P<id>[0-9]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://audioboom.com/boos/4279833-3-09-2016-czaban-hour-3?t=0',
|
'url': 'https://audioboom.com/posts/7398103-asim-chaudhry',
|
||||||
'md5': '63a8d73a055c6ed0f1e51921a10a5a76',
|
'md5': '7b00192e593ff227e6a315486979a42d',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '4279833',
|
'id': '7398103',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': '3/09/2016 Czaban Hour 3',
|
'title': 'Asim Chaudhry',
|
||||||
'description': 'Guest: Nate Davis - NFL free agency, Guest: Stan Gans',
|
'description': 'md5:2f3fef17dacc2595b5362e1d7d3602fc',
|
||||||
'duration': 2245.72,
|
'duration': 4000.99,
|
||||||
'uploader': 'SB Nation A.M.',
|
'uploader': 'Sue Perkins: An hour or so with...',
|
||||||
'uploader_url': r're:https?://(?:www\.)?audioboom\.com/channel/steveczabanyahoosportsradio',
|
'uploader_url': r're:https?://(?:www\.)?audioboom\.com/channel/perkins',
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://audioboom.com/posts/4279833-3-09-2016-czaban-hour-3?t=0',
|
'url': 'https://audioboom.com/posts/4279833-3-09-2016-czaban-hour-3?t=0',
|
||||||
@ -32,8 +35,8 @@ class AudioBoomIE(InfoExtractor):
|
|||||||
clip = None
|
clip = None
|
||||||
|
|
||||||
clip_store = self._parse_json(
|
clip_store = self._parse_json(
|
||||||
self._search_regex(
|
self._html_search_regex(
|
||||||
r'data-new-clip-store=(["\'])(?P<json>{.*?"clipId"\s*:\s*%s.*?})\1' % video_id,
|
r'data-new-clip-store=(["\'])(?P<json>{.+?})\1',
|
||||||
webpage, 'clip store', default='{}', group='json'),
|
webpage, 'clip store', default='{}', group='json'),
|
||||||
video_id, fatal=False)
|
video_id, fatal=False)
|
||||||
if clip_store:
|
if clip_store:
|
||||||
@ -47,14 +50,15 @@ class AudioBoomIE(InfoExtractor):
|
|||||||
|
|
||||||
audio_url = from_clip('clipURLPriorToLoading') or self._og_search_property(
|
audio_url = from_clip('clipURLPriorToLoading') or self._og_search_property(
|
||||||
'audio', webpage, 'audio url')
|
'audio', webpage, 'audio url')
|
||||||
title = from_clip('title') or self._og_search_title(webpage)
|
title = from_clip('title') or self._html_search_meta(
|
||||||
description = from_clip('description') or self._og_search_description(webpage)
|
['og:title', 'og:audio:title', 'audio_title'], webpage)
|
||||||
|
description = from_clip('description') or clean_html(from_clip('formattedDescription')) or self._og_search_description(webpage)
|
||||||
|
|
||||||
duration = float_or_none(from_clip('duration') or self._html_search_meta(
|
duration = float_or_none(from_clip('duration') or self._html_search_meta(
|
||||||
'weibo:audio:duration', webpage))
|
'weibo:audio:duration', webpage))
|
||||||
|
|
||||||
uploader = from_clip('author') or self._og_search_property(
|
uploader = from_clip('author') or self._html_search_meta(
|
||||||
'audio:artist', webpage, 'uploader', fatal=False)
|
['og:audio:artist', 'twitter:audio:artist_name', 'audio_artist'], webpage, 'uploader')
|
||||||
uploader_url = from_clip('author_url') or self._html_search_meta(
|
uploader_url = from_clip('author_url') or self._html_search_meta(
|
||||||
'audioboo:channel', webpage, 'uploader url')
|
'audioboo:channel', webpage, 'uploader url')
|
||||||
|
|
||||||
|
@ -47,39 +47,19 @@ class AZMedienIE(InfoExtractor):
|
|||||||
'url': 'https://www.telebaern.tv/telebaern-news/montag-1-oktober-2018-ganze-sendung-133531189#video=0_7xjo9lf1',
|
'url': 'https://www.telebaern.tv/telebaern-news/montag-1-oktober-2018-ganze-sendung-133531189#video=0_7xjo9lf1',
|
||||||
'only_matching': True
|
'only_matching': True
|
||||||
}]
|
}]
|
||||||
|
_API_TEMPL = 'https://www.%s/api/pub/gql/%s/NewsArticleTeaser/cb9f2f81ed22e9b47f4ca64ea3cc5a5d13e88d1d'
|
||||||
_PARTNER_ID = '1719221'
|
_PARTNER_ID = '1719221'
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
host, display_id, article_id, entry_id = re.match(self._VALID_URL, url).groups()
|
||||||
host = mobj.group('host')
|
|
||||||
video_id = mobj.group('id')
|
|
||||||
entry_id = mobj.group('kaltura_id')
|
|
||||||
|
|
||||||
if not entry_id:
|
if not entry_id:
|
||||||
api_url = 'https://www.%s/api/pub/gql/%s' % (host, host.split('.')[0])
|
entry_id = self._download_json(
|
||||||
payload = {
|
self._API_TEMPL % (host, host.split('.')[0]), display_id, query={
|
||||||
'query': '''query VideoContext($articleId: ID!) {
|
'variables': json.dumps({
|
||||||
article: node(id: $articleId) {
|
'contextId': 'NewsArticle:' + article_id,
|
||||||
... on Article {
|
}),
|
||||||
mainAssetRelation {
|
})['data']['context']['mainAsset']['video']['kaltura']['kalturaId']
|
||||||
asset {
|
|
||||||
... on VideoAsset {
|
|
||||||
kalturaId
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}''',
|
|
||||||
'variables': {'articleId': 'Article:%s' % mobj.group('article_id')},
|
|
||||||
}
|
|
||||||
json_data = self._download_json(
|
|
||||||
api_url, video_id, headers={
|
|
||||||
'Content-Type': 'application/json',
|
|
||||||
},
|
|
||||||
data=json.dumps(payload).encode())
|
|
||||||
entry_id = json_data['data']['article']['mainAssetRelation']['asset']['kalturaId']
|
|
||||||
|
|
||||||
return self.url_result(
|
return self.url_result(
|
||||||
'kaltura:%s:%s' % (self._PARTNER_ID, entry_id),
|
'kaltura:%s:%s' % (self._PARTNER_ID, entry_id),
|
||||||
|
@ -1,142 +0,0 @@
|
|||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
import re
|
|
||||||
import itertools
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..compat import compat_str
|
|
||||||
from ..utils import (
|
|
||||||
ExtractorError,
|
|
||||||
float_or_none,
|
|
||||||
int_or_none,
|
|
||||||
sanitized_Request,
|
|
||||||
urlencode_postdata,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class BambuserIE(InfoExtractor):
|
|
||||||
IE_NAME = 'bambuser'
|
|
||||||
_VALID_URL = r'https?://bambuser\.com/v/(?P<id>\d+)'
|
|
||||||
_API_KEY = '005f64509e19a868399060af746a00aa'
|
|
||||||
_LOGIN_URL = 'https://bambuser.com/user'
|
|
||||||
_NETRC_MACHINE = 'bambuser'
|
|
||||||
|
|
||||||
_TEST = {
|
|
||||||
'url': 'http://bambuser.com/v/4050584',
|
|
||||||
# MD5 seems to be flaky, see https://travis-ci.org/ytdl-org/youtube-dl/jobs/14051016#L388
|
|
||||||
# 'md5': 'fba8f7693e48fd4e8641b3fd5539a641',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '4050584',
|
|
||||||
'ext': 'flv',
|
|
||||||
'title': 'Education engineering days - lightning talks',
|
|
||||||
'duration': 3741,
|
|
||||||
'uploader': 'pixelversity',
|
|
||||||
'uploader_id': '344706',
|
|
||||||
'timestamp': 1382976692,
|
|
||||||
'upload_date': '20131028',
|
|
||||||
'view_count': int,
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
# It doesn't respect the 'Range' header, it would download the whole video
|
|
||||||
# caused the travis builds to fail: https://travis-ci.org/ytdl-org/youtube-dl/jobs/14493845#L59
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
def _login(self):
|
|
||||||
username, password = self._get_login_info()
|
|
||||||
if username is None:
|
|
||||||
return
|
|
||||||
|
|
||||||
login_form = {
|
|
||||||
'form_id': 'user_login',
|
|
||||||
'op': 'Log in',
|
|
||||||
'name': username,
|
|
||||||
'pass': password,
|
|
||||||
}
|
|
||||||
|
|
||||||
request = sanitized_Request(
|
|
||||||
self._LOGIN_URL, urlencode_postdata(login_form))
|
|
||||||
request.add_header('Referer', self._LOGIN_URL)
|
|
||||||
response = self._download_webpage(
|
|
||||||
request, None, 'Logging in')
|
|
||||||
|
|
||||||
login_error = self._html_search_regex(
|
|
||||||
r'(?s)<div class="messages error">(.+?)</div>',
|
|
||||||
response, 'login error', default=None)
|
|
||||||
if login_error:
|
|
||||||
raise ExtractorError(
|
|
||||||
'Unable to login: %s' % login_error, expected=True)
|
|
||||||
|
|
||||||
def _real_initialize(self):
|
|
||||||
self._login()
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url)
|
|
||||||
|
|
||||||
info = self._download_json(
|
|
||||||
'http://player-c.api.bambuser.com/getVideo.json?api_key=%s&vid=%s'
|
|
||||||
% (self._API_KEY, video_id), video_id)
|
|
||||||
|
|
||||||
error = info.get('error')
|
|
||||||
if error:
|
|
||||||
raise ExtractorError(
|
|
||||||
'%s returned error: %s' % (self.IE_NAME, error), expected=True)
|
|
||||||
|
|
||||||
result = info['result']
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': result['title'],
|
|
||||||
'url': result['url'],
|
|
||||||
'thumbnail': result.get('preview'),
|
|
||||||
'duration': int_or_none(result.get('length')),
|
|
||||||
'uploader': result.get('username'),
|
|
||||||
'uploader_id': compat_str(result.get('owner', {}).get('uid')),
|
|
||||||
'timestamp': int_or_none(result.get('created')),
|
|
||||||
'fps': float_or_none(result.get('framerate')),
|
|
||||||
'view_count': int_or_none(result.get('views_total')),
|
|
||||||
'comment_count': int_or_none(result.get('comment_count')),
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class BambuserChannelIE(InfoExtractor):
|
|
||||||
IE_NAME = 'bambuser:channel'
|
|
||||||
_VALID_URL = r'https?://bambuser\.com/channel/(?P<user>.*?)(?:/|#|\?|$)'
|
|
||||||
# The maximum number we can get with each request
|
|
||||||
_STEP = 50
|
|
||||||
_TEST = {
|
|
||||||
'url': 'http://bambuser.com/channel/pixelversity',
|
|
||||||
'info_dict': {
|
|
||||||
'title': 'pixelversity',
|
|
||||||
},
|
|
||||||
'playlist_mincount': 60,
|
|
||||||
}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
mobj = re.match(self._VALID_URL, url)
|
|
||||||
user = mobj.group('user')
|
|
||||||
urls = []
|
|
||||||
last_id = ''
|
|
||||||
for i in itertools.count(1):
|
|
||||||
req_url = (
|
|
||||||
'http://bambuser.com/xhr-api/index.php?username={user}'
|
|
||||||
'&sort=created&access_mode=0%2C1%2C2&limit={count}'
|
|
||||||
'&method=broadcast&format=json&vid_older_than={last}'
|
|
||||||
).format(user=user, count=self._STEP, last=last_id)
|
|
||||||
req = sanitized_Request(req_url)
|
|
||||||
# Without setting this header, we wouldn't get any result
|
|
||||||
req.add_header('Referer', 'http://bambuser.com/channel/%s' % user)
|
|
||||||
data = self._download_json(
|
|
||||||
req, user, 'Downloading page %d' % i)
|
|
||||||
results = data['result']
|
|
||||||
if not results:
|
|
||||||
break
|
|
||||||
last_id = results[-1]['vid']
|
|
||||||
urls.extend(self.url_result(v['page'], 'Bambuser') for v in results)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'_type': 'playlist',
|
|
||||||
'title': user,
|
|
||||||
'entries': urls,
|
|
||||||
}
|
|
@ -40,6 +40,7 @@ class BBCCoUkIE(InfoExtractor):
|
|||||||
iplayer(?:/[^/]+)?/(?:episode/|playlist/)|
|
iplayer(?:/[^/]+)?/(?:episode/|playlist/)|
|
||||||
music/(?:clips|audiovideo/popular)[/#]|
|
music/(?:clips|audiovideo/popular)[/#]|
|
||||||
radio/player/|
|
radio/player/|
|
||||||
|
sounds/play/|
|
||||||
events/[^/]+/play/[^/]+/
|
events/[^/]+/play/[^/]+/
|
||||||
)
|
)
|
||||||
(?P<id>%s)(?!/(?:episodes|broadcasts|clips))
|
(?P<id>%s)(?!/(?:episodes|broadcasts|clips))
|
||||||
@ -70,7 +71,7 @@ class BBCCoUkIE(InfoExtractor):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'b039d07m',
|
'id': 'b039d07m',
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'title': 'Leonard Cohen, Kaleidoscope - BBC Radio 4',
|
'title': 'Kaleidoscope, Leonard Cohen',
|
||||||
'description': 'The Canadian poet and songwriter reflects on his musical career.',
|
'description': 'The Canadian poet and songwriter reflects on his musical career.',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
@ -220,6 +221,20 @@ class BBCCoUkIE(InfoExtractor):
|
|||||||
# rtmp download
|
# rtmp download
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.bbc.co.uk/sounds/play/m0007jzb',
|
||||||
|
'note': 'Audio',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'm0007jz9',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'BBC Proms, 2019, Prom 34: West–Eastern Divan Orchestra',
|
||||||
|
'description': "Live BBC Proms. West–Eastern Divan Orchestra with Daniel Barenboim and Martha Argerich.",
|
||||||
|
'duration': 9840,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4',
|
'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -513,7 +528,7 @@ class BBCCoUkIE(InfoExtractor):
|
|||||||
|
|
||||||
def get_programme_id(item):
|
def get_programme_id(item):
|
||||||
def get_from_attributes(item):
|
def get_from_attributes(item):
|
||||||
for p in('identifier', 'group'):
|
for p in ('identifier', 'group'):
|
||||||
value = item.get(p)
|
value = item.get(p)
|
||||||
if value and re.match(r'^[pb][\da-z]{7}$', value):
|
if value and re.match(r'^[pb][\da-z]{7}$', value):
|
||||||
return value
|
return value
|
||||||
@ -609,7 +624,7 @@ class BBCIE(BBCCoUkIE):
|
|||||||
'url': 'http://www.bbc.com/news/world-europe-32668511',
|
'url': 'http://www.bbc.com/news/world-europe-32668511',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'world-europe-32668511',
|
'id': 'world-europe-32668511',
|
||||||
'title': 'Russia stages massive WW2 parade despite Western boycott',
|
'title': 'Russia stages massive WW2 parade',
|
||||||
'description': 'md5:00ff61976f6081841f759a08bf78cc9c',
|
'description': 'md5:00ff61976f6081841f759a08bf78cc9c',
|
||||||
},
|
},
|
||||||
'playlist_count': 2,
|
'playlist_count': 2,
|
||||||
|
@ -32,6 +32,10 @@ class BeegIE(InfoExtractor):
|
|||||||
# api/v6 v2
|
# api/v6 v2
|
||||||
'url': 'https://beeg.com/1941093077?t=911-1391',
|
'url': 'https://beeg.com/1941093077?t=911-1391',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# api/v6 v2 w/o t
|
||||||
|
'url': 'https://beeg.com/1277207756',
|
||||||
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://beeg.porn/video/5416503',
|
'url': 'https://beeg.porn/video/5416503',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -49,14 +53,17 @@ class BeegIE(InfoExtractor):
|
|||||||
r'beeg_version\s*=\s*([\da-zA-Z_-]+)', webpage, 'beeg version',
|
r'beeg_version\s*=\s*([\da-zA-Z_-]+)', webpage, 'beeg version',
|
||||||
default='1546225636701')
|
default='1546225636701')
|
||||||
|
|
||||||
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
|
if len(video_id) >= 10:
|
||||||
t = qs.get('t', [''])[0].split('-')
|
|
||||||
if len(t) > 1:
|
|
||||||
query = {
|
query = {
|
||||||
'v': 2,
|
'v': 2,
|
||||||
's': t[0],
|
|
||||||
'e': t[1],
|
|
||||||
}
|
}
|
||||||
|
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
|
||||||
|
t = qs.get('t', [''])[0].split('-')
|
||||||
|
if len(t) > 1:
|
||||||
|
query.update({
|
||||||
|
's': t[0],
|
||||||
|
'e': t[1],
|
||||||
|
})
|
||||||
else:
|
else:
|
||||||
query = {'v': 1}
|
query = {'v': 1}
|
||||||
|
|
||||||
|
@ -22,10 +22,11 @@ class BellMediaIE(InfoExtractor):
|
|||||||
bravo|
|
bravo|
|
||||||
mtv|
|
mtv|
|
||||||
space|
|
space|
|
||||||
etalk
|
etalk|
|
||||||
|
marilyn
|
||||||
)\.ca|
|
)\.ca|
|
||||||
much\.com
|
(?:much|cp24)\.com
|
||||||
)/.*?(?:\bvid(?:eoid)?=|-vid|~|%7E|/(?:episode)?)(?P<id>[0-9]{6,})'''
|
)/.*?(?:\b(?:vid(?:eoid)?|clipId)=|-vid|~|%7E|/(?:episode)?)(?P<id>[0-9]{6,})'''
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.bnnbloomberg.ca/video/david-cockfield-s-top-picks~1403070',
|
'url': 'https://www.bnnbloomberg.ca/video/david-cockfield-s-top-picks~1403070',
|
||||||
'md5': '36d3ef559cfe8af8efe15922cd3ce950',
|
'md5': '36d3ef559cfe8af8efe15922cd3ce950',
|
||||||
@ -61,6 +62,9 @@ class BellMediaIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://www.etalk.ca/video?videoid=663455',
|
'url': 'http://www.etalk.ca/video?videoid=663455',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.cp24.com/video?clipId=1982548',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
_DOMAINS = {
|
_DOMAINS = {
|
||||||
'thecomedynetwork': 'comedy',
|
'thecomedynetwork': 'comedy',
|
||||||
@ -70,6 +74,7 @@ class BellMediaIE(InfoExtractor):
|
|||||||
'animalplanet': 'aniplan',
|
'animalplanet': 'aniplan',
|
||||||
'etalk': 'ctv',
|
'etalk': 'ctv',
|
||||||
'bnnbloomberg': 'bnn',
|
'bnnbloomberg': 'bnn',
|
||||||
|
'marilyn': 'ctv_marilyn',
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -15,6 +15,7 @@ from ..utils import (
|
|||||||
float_or_none,
|
float_or_none,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
|
str_or_none,
|
||||||
strip_jsonp,
|
strip_jsonp,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
unsmuggle_url,
|
unsmuggle_url,
|
||||||
@ -23,7 +24,18 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class BiliBiliIE(InfoExtractor):
|
class BiliBiliIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.|bangumi\.|)bilibili\.(?:tv|com)/(?:video/av|anime/(?P<anime_id>\d+)/play#)(?P<id>\d+)'
|
_VALID_URL = r'''(?x)
|
||||||
|
https?://
|
||||||
|
(?:(?:www|bangumi)\.)?
|
||||||
|
bilibili\.(?:tv|com)/
|
||||||
|
(?:
|
||||||
|
(?:
|
||||||
|
video/[aA][vV]|
|
||||||
|
anime/(?P<anime_id>\d+)/play\#
|
||||||
|
)(?P<id_bv>\d+)|
|
||||||
|
video/[bB][vV](?P<id>[^/?#&]+)
|
||||||
|
)
|
||||||
|
'''
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.bilibili.tv/video/av1074402/',
|
'url': 'http://www.bilibili.tv/video/av1074402/',
|
||||||
@ -91,6 +103,10 @@ class BiliBiliIE(InfoExtractor):
|
|||||||
'skip_download': True, # Test metadata only
|
'skip_download': True, # Test metadata only
|
||||||
},
|
},
|
||||||
}]
|
}]
|
||||||
|
}, {
|
||||||
|
# new BV video id format
|
||||||
|
'url': 'https://www.bilibili.com/video/BV1JE411F741',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_APP_KEY = 'iVGUTjsxvpLeuDCf'
|
_APP_KEY = 'iVGUTjsxvpLeuDCf'
|
||||||
@ -108,7 +124,7 @@ class BiliBiliIE(InfoExtractor):
|
|||||||
url, smuggled_data = unsmuggle_url(url, {})
|
url, smuggled_data = unsmuggle_url(url, {})
|
||||||
|
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id') or mobj.group('id_bv')
|
||||||
anime_id = mobj.group('anime_id')
|
anime_id = mobj.group('anime_id')
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
@ -306,3 +322,129 @@ class BiliBiliBangumiIE(InfoExtractor):
|
|||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
entries, bangumi_id,
|
entries, bangumi_id,
|
||||||
season_info.get('bangumi_title'), season_info.get('evaluate'))
|
season_info.get('bangumi_title'), season_info.get('evaluate'))
|
||||||
|
|
||||||
|
|
||||||
|
class BilibiliAudioBaseIE(InfoExtractor):
|
||||||
|
def _call_api(self, path, sid, query=None):
|
||||||
|
if not query:
|
||||||
|
query = {'sid': sid}
|
||||||
|
return self._download_json(
|
||||||
|
'https://www.bilibili.com/audio/music-service-c/web/' + path,
|
||||||
|
sid, query=query)['data']
|
||||||
|
|
||||||
|
|
||||||
|
class BilibiliAudioIE(BilibiliAudioBaseIE):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/au(?P<id>\d+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://www.bilibili.com/audio/au1003142',
|
||||||
|
'md5': 'fec4987014ec94ef9e666d4d158ad03b',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1003142',
|
||||||
|
'ext': 'm4a',
|
||||||
|
'title': '【tsukimi】YELLOW / 神山羊',
|
||||||
|
'artist': 'tsukimi',
|
||||||
|
'comment_count': int,
|
||||||
|
'description': 'YELLOW的mp3版!',
|
||||||
|
'duration': 183,
|
||||||
|
'subtitles': {
|
||||||
|
'origin': [{
|
||||||
|
'ext': 'lrc',
|
||||||
|
}],
|
||||||
|
},
|
||||||
|
'thumbnail': r're:^https?://.+\.jpg',
|
||||||
|
'timestamp': 1564836614,
|
||||||
|
'upload_date': '20190803',
|
||||||
|
'uploader': 'tsukimi-つきみぐー',
|
||||||
|
'view_count': int,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
au_id = self._match_id(url)
|
||||||
|
|
||||||
|
play_data = self._call_api('url', au_id)
|
||||||
|
formats = [{
|
||||||
|
'url': play_data['cdns'][0],
|
||||||
|
'filesize': int_or_none(play_data.get('size')),
|
||||||
|
}]
|
||||||
|
|
||||||
|
song = self._call_api('song/info', au_id)
|
||||||
|
title = song['title']
|
||||||
|
statistic = song.get('statistic') or {}
|
||||||
|
|
||||||
|
subtitles = None
|
||||||
|
lyric = song.get('lyric')
|
||||||
|
if lyric:
|
||||||
|
subtitles = {
|
||||||
|
'origin': [{
|
||||||
|
'url': lyric,
|
||||||
|
}]
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': au_id,
|
||||||
|
'title': title,
|
||||||
|
'formats': formats,
|
||||||
|
'artist': song.get('author'),
|
||||||
|
'comment_count': int_or_none(statistic.get('comment')),
|
||||||
|
'description': song.get('intro'),
|
||||||
|
'duration': int_or_none(song.get('duration')),
|
||||||
|
'subtitles': subtitles,
|
||||||
|
'thumbnail': song.get('cover'),
|
||||||
|
'timestamp': int_or_none(song.get('passtime')),
|
||||||
|
'uploader': song.get('uname'),
|
||||||
|
'view_count': int_or_none(statistic.get('play')),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class BilibiliAudioAlbumIE(BilibiliAudioBaseIE):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/am(?P<id>\d+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://www.bilibili.com/audio/am10624',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '10624',
|
||||||
|
'title': '每日新曲推荐(每日11:00更新)',
|
||||||
|
'description': '每天11:00更新,为你推送最新音乐',
|
||||||
|
},
|
||||||
|
'playlist_count': 19,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
am_id = self._match_id(url)
|
||||||
|
|
||||||
|
songs = self._call_api(
|
||||||
|
'song/of-menu', am_id, {'sid': am_id, 'pn': 1, 'ps': 100})['data']
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
for song in songs:
|
||||||
|
sid = str_or_none(song.get('id'))
|
||||||
|
if not sid:
|
||||||
|
continue
|
||||||
|
entries.append(self.url_result(
|
||||||
|
'https://www.bilibili.com/audio/au' + sid,
|
||||||
|
BilibiliAudioIE.ie_key(), sid))
|
||||||
|
|
||||||
|
if entries:
|
||||||
|
album_data = self._call_api('menu/info', am_id) or {}
|
||||||
|
album_title = album_data.get('title')
|
||||||
|
if album_title:
|
||||||
|
for entry in entries:
|
||||||
|
entry['album'] = album_title
|
||||||
|
return self.playlist_result(
|
||||||
|
entries, am_id, album_title, album_data.get('intro'))
|
||||||
|
|
||||||
|
return self.playlist_result(entries, am_id)
|
||||||
|
|
||||||
|
|
||||||
|
class BiliBiliPlayerIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://player\.bilibili\.com/player\.html\?.*?\baid=(?P<id>\d+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://player.bilibili.com/player.html?aid=92494333&cid=157926707&page=1',
|
||||||
|
'only_matching': True,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
return self.url_result(
|
||||||
|
'http://www.bilibili.tv/video/av%s/' % video_id,
|
||||||
|
ie=BiliBiliIE.ie_key(), video_id=video_id)
|
||||||
|
@ -6,7 +6,6 @@ from ..utils import (
|
|||||||
ExtractorError,
|
ExtractorError,
|
||||||
remove_end,
|
remove_end,
|
||||||
)
|
)
|
||||||
from .rudo import RudoIE
|
|
||||||
|
|
||||||
|
|
||||||
class BioBioChileTVIE(InfoExtractor):
|
class BioBioChileTVIE(InfoExtractor):
|
||||||
@ -41,11 +40,15 @@ class BioBioChileTVIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://www.biobiochile.cl/noticias/bbtv/comentarios-bio-bio/2016/07/08/edecanes-del-congreso-figuras-decorativas-que-le-cuestan-muy-caro-a-los-chilenos.shtml',
|
'url': 'http://www.biobiochile.cl/noticias/bbtv/comentarios-bio-bio/2016/07/08/edecanes-del-congreso-figuras-decorativas-que-le-cuestan-muy-caro-a-los-chilenos.shtml',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'edecanes-del-congreso-figuras-decorativas-que-le-cuestan-muy-caro-a-los-chilenos',
|
'id': 'b4xd0LK3SK',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'uploader': '(none)',
|
# TODO: fix url_transparent information overriding
|
||||||
'upload_date': '20160708',
|
# 'uploader': 'Juan Pablo Echenique',
|
||||||
'title': 'Edecanes del Congreso: Figuras decorativas que le cuestan muy caro a los chilenos',
|
'title': 'Comentario Oscar Cáceres',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# empty m3u8 manifest
|
||||||
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://tv.biobiochile.cl/notas/2015/10/22/ninos-transexuales-de-quien-es-la-decision.shtml',
|
'url': 'http://tv.biobiochile.cl/notas/2015/10/22/ninos-transexuales-de-quien-es-la-decision.shtml',
|
||||||
@ -60,7 +63,9 @@ class BioBioChileTVIE(InfoExtractor):
|
|||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
rudo_url = RudoIE._extract_url(webpage)
|
rudo_url = self._search_regex(
|
||||||
|
r'<iframe[^>]+src=(?P<q1>[\'"])(?P<url>(?:https?:)?//rudo\.video/vod/[0-9a-zA-Z]+)(?P=q1)',
|
||||||
|
webpage, 'embed URL', None, group='url')
|
||||||
if not rudo_url:
|
if not rudo_url:
|
||||||
raise ExtractorError('No videos found')
|
raise ExtractorError('No videos found')
|
||||||
|
|
||||||
@ -68,7 +73,7 @@ class BioBioChileTVIE(InfoExtractor):
|
|||||||
|
|
||||||
thumbnail = self._og_search_thumbnail(webpage)
|
thumbnail = self._og_search_thumbnail(webpage)
|
||||||
uploader = self._html_search_regex(
|
uploader = self._html_search_regex(
|
||||||
r'<a[^>]+href=["\']https?://(?:busca|www)\.biobiochile\.cl/(?:lista/)?(?:author|autor)[^>]+>(.+?)</a>',
|
r'<a[^>]+href=["\'](?:https?://(?:busca|www)\.biobiochile\.cl)?/(?:lista/)?(?:author|autor)[^>]+>(.+?)</a>',
|
||||||
webpage, 'uploader', fatal=False)
|
webpage, 'uploader', fatal=False)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
@ -3,10 +3,11 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from .vk import VKIE
|
from .vk import VKIE
|
||||||
from ..utils import (
|
from ..compat import (
|
||||||
HEADRequest,
|
compat_b64decode,
|
||||||
int_or_none,
|
compat_urllib_parse_unquote,
|
||||||
)
|
)
|
||||||
|
from ..utils import int_or_none
|
||||||
|
|
||||||
|
|
||||||
class BIQLEIE(InfoExtractor):
|
class BIQLEIE(InfoExtractor):
|
||||||
@ -47,9 +48,16 @@ class BIQLEIE(InfoExtractor):
|
|||||||
if VKIE.suitable(embed_url):
|
if VKIE.suitable(embed_url):
|
||||||
return self.url_result(embed_url, VKIE.ie_key(), video_id)
|
return self.url_result(embed_url, VKIE.ie_key(), video_id)
|
||||||
|
|
||||||
self._request_webpage(
|
embed_page = self._download_webpage(
|
||||||
HEADRequest(embed_url), video_id, headers={'Referer': url})
|
embed_url, video_id, headers={'Referer': url})
|
||||||
video_id, sig, _, access_token = self._get_cookies(embed_url)['video_ext'].value.split('%3A')
|
video_ext = self._get_cookies(embed_url).get('video_ext')
|
||||||
|
if video_ext:
|
||||||
|
video_ext = compat_urllib_parse_unquote(video_ext.value)
|
||||||
|
if not video_ext:
|
||||||
|
video_ext = compat_b64decode(self._search_regex(
|
||||||
|
r'video_ext\s*:\s*[\'"]([A-Za-z0-9+/=]+)',
|
||||||
|
embed_page, 'video_ext')).decode()
|
||||||
|
video_id, sig, _, access_token = video_ext.split(':')
|
||||||
item = self._download_json(
|
item = self._download_json(
|
||||||
'https://api.vk.com/method/video.get', video_id,
|
'https://api.vk.com/method/video.get', video_id,
|
||||||
headers={'User-Agent': 'okhttp/3.4.1'}, query={
|
headers={'User-Agent': 'okhttp/3.4.1'}, query={
|
||||||
|
@ -7,6 +7,7 @@ import re
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
orderedSet,
|
orderedSet,
|
||||||
|
unified_strdate,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -23,6 +24,7 @@ class BitChuteIE(InfoExtractor):
|
|||||||
'description': 'md5:3f21f6fb5b1d17c3dee9cf6b5fe60b3a',
|
'description': 'md5:3f21f6fb5b1d17c3dee9cf6b5fe60b3a',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
'uploader': 'Victoria X Rave',
|
'uploader': 'Victoria X Rave',
|
||||||
|
'upload_date': '20170813',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.bitchute.com/embed/lbb5G1hjPhw/',
|
'url': 'https://www.bitchute.com/embed/lbb5G1hjPhw/',
|
||||||
@ -74,12 +76,17 @@ class BitChuteIE(InfoExtractor):
|
|||||||
r'(?s)<p\b[^>]+\bclass=["\']video-author[^>]+>(.+?)</p>'),
|
r'(?s)<p\b[^>]+\bclass=["\']video-author[^>]+>(.+?)</p>'),
|
||||||
webpage, 'uploader', fatal=False)
|
webpage, 'uploader', fatal=False)
|
||||||
|
|
||||||
|
upload_date = unified_strdate(self._search_regex(
|
||||||
|
r'class=["\']video-publish-date[^>]+>[^<]+ at \d+:\d+ UTC on (.+?)\.',
|
||||||
|
webpage, 'upload date', fatal=False))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': description,
|
'description': description,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'uploader': uploader,
|
'uploader': uploader,
|
||||||
|
'upload_date': upload_date,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -71,7 +71,7 @@ class BleacherReportIE(InfoExtractor):
|
|||||||
video = article_data.get('video')
|
video = article_data.get('video')
|
||||||
if video:
|
if video:
|
||||||
video_type = video['type']
|
video_type = video['type']
|
||||||
if video_type == 'cms.bleacherreport.com':
|
if video_type in ('cms.bleacherreport.com', 'vid.bleacherreport.com'):
|
||||||
info['url'] = 'http://bleacherreport.com/video_embed?id=%s' % video['id']
|
info['url'] = 'http://bleacherreport.com/video_embed?id=%s' % video['id']
|
||||||
elif video_type == 'ooyala.com':
|
elif video_type == 'ooyala.com':
|
||||||
info['url'] = 'ooyala:%s' % video['id']
|
info['url'] = 'ooyala:%s' % video['id']
|
||||||
@ -87,9 +87,9 @@ class BleacherReportIE(InfoExtractor):
|
|||||||
|
|
||||||
|
|
||||||
class BleacherReportCMSIE(AMPIE):
|
class BleacherReportCMSIE(AMPIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/video_embed\?id=(?P<id>[0-9a-f-]{36})'
|
_VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/video_embed\?id=(?P<id>[0-9a-f-]{36}|\d{5})'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://bleacherreport.com/video_embed?id=8fd44c2f-3dc5-4821-9118-2c825a98c0e1',
|
'url': 'http://bleacherreport.com/video_embed?id=8fd44c2f-3dc5-4821-9118-2c825a98c0e1&library=video-cms',
|
||||||
'md5': '2e4b0a997f9228ffa31fada5c53d1ed1',
|
'md5': '2e4b0a997f9228ffa31fada5c53d1ed1',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '8fd44c2f-3dc5-4821-9118-2c825a98c0e1',
|
'id': '8fd44c2f-3dc5-4821-9118-2c825a98c0e1',
|
||||||
@ -101,6 +101,6 @@ class BleacherReportCMSIE(AMPIE):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
info = self._extract_feed_info('http://cms.bleacherreport.com/media/items/%s/akamai.json' % video_id)
|
info = self._extract_feed_info('http://vid.bleacherreport.com/videos/%s.akamai' % video_id)
|
||||||
info['id'] = video_id
|
info['id'] = video_id
|
||||||
return info
|
return info
|
||||||
|
@ -11,8 +11,8 @@ from ..utils import ExtractorError
|
|||||||
class BokeCCBaseIE(InfoExtractor):
|
class BokeCCBaseIE(InfoExtractor):
|
||||||
def _extract_bokecc_formats(self, webpage, video_id, format_id=None):
|
def _extract_bokecc_formats(self, webpage, video_id, format_id=None):
|
||||||
player_params_str = self._html_search_regex(
|
player_params_str = self._html_search_regex(
|
||||||
r'<(?:script|embed)[^>]+src="http://p\.bokecc\.com/player\?([^"]+)',
|
r'<(?:script|embed)[^>]+src=(?P<q>["\'])(?:https?:)?//p\.bokecc\.com/(?:player|flash/player\.swf)\?(?P<query>.+?)(?P=q)',
|
||||||
webpage, 'player params')
|
webpage, 'player params', group='query')
|
||||||
|
|
||||||
player_params = compat_parse_qs(player_params_str)
|
player_params = compat_parse_qs(player_params_str)
|
||||||
|
|
||||||
@ -36,9 +36,9 @@ class BokeCCIE(BokeCCBaseIE):
|
|||||||
_VALID_URL = r'https?://union\.bokecc\.com/playvideo\.bo\?(?P<query>.*)'
|
_VALID_URL = r'https?://union\.bokecc\.com/playvideo\.bo\?(?P<query>.*)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://union.bokecc.com/playvideo.bo?vid=E44D40C15E65EA30&uid=CD0C5D3C8614B28B',
|
'url': 'http://union.bokecc.com/playvideo.bo?vid=E0ABAE9D4F509B189C33DC5901307461&uid=FE644790DE9D154A',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'CD0C5D3C8614B28B_E44D40C15E65EA30',
|
'id': 'FE644790DE9D154A_E0ABAE9D4F509B189C33DC5901307461',
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'title': 'BokeCC Video',
|
'title': 'BokeCC Video',
|
||||||
},
|
},
|
||||||
|
@ -2,43 +2,43 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import base64
|
import base64
|
||||||
import json
|
|
||||||
import re
|
import re
|
||||||
import struct
|
import struct
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from .adobepass import AdobePassIE
|
from .adobepass import AdobePassIE
|
||||||
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_etree_fromstring,
|
compat_etree_fromstring,
|
||||||
|
compat_HTTPError,
|
||||||
compat_parse_qs,
|
compat_parse_qs,
|
||||||
compat_str,
|
|
||||||
compat_urllib_parse_urlparse,
|
compat_urllib_parse_urlparse,
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
compat_xml_parse_error,
|
compat_xml_parse_error,
|
||||||
compat_HTTPError,
|
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
clean_html,
|
||||||
ExtractorError,
|
|
||||||
extract_attributes,
|
extract_attributes,
|
||||||
|
ExtractorError,
|
||||||
find_xpath_attr,
|
find_xpath_attr,
|
||||||
fix_xml_ampersands,
|
fix_xml_ampersands,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
js_to_json,
|
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
js_to_json,
|
||||||
|
mimetype2ext,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
|
smuggle_url,
|
||||||
|
str_or_none,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
unsmuggle_url,
|
unsmuggle_url,
|
||||||
|
UnsupportedError,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
clean_html,
|
url_or_none,
|
||||||
mimetype2ext,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class BrightcoveLegacyIE(InfoExtractor):
|
class BrightcoveLegacyIE(InfoExtractor):
|
||||||
IE_NAME = 'brightcove:legacy'
|
IE_NAME = 'brightcove:legacy'
|
||||||
_VALID_URL = r'(?:https?://.*brightcove\.com/(services|viewer).*?\?|brightcove:)(?P<query>.*)'
|
_VALID_URL = r'(?:https?://.*brightcove\.com/(services|viewer).*?\?|brightcove:)(?P<query>.*)'
|
||||||
_FEDERATED_URL = 'http://c.brightcove.com/services/viewer/htmlFederated'
|
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
@ -55,7 +55,8 @@ class BrightcoveLegacyIE(InfoExtractor):
|
|||||||
'timestamp': 1368213670,
|
'timestamp': 1368213670,
|
||||||
'upload_date': '20130510',
|
'upload_date': '20130510',
|
||||||
'uploader_id': '1589608506001',
|
'uploader_id': '1589608506001',
|
||||||
}
|
},
|
||||||
|
'skip': 'The player has been deactivated by the content owner',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
# From http://medianetwork.oracle.com/video/player/1785452137001
|
# From http://medianetwork.oracle.com/video/player/1785452137001
|
||||||
@ -70,6 +71,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
|||||||
'upload_date': '20120814',
|
'upload_date': '20120814',
|
||||||
'uploader_id': '1460825906',
|
'uploader_id': '1460825906',
|
||||||
},
|
},
|
||||||
|
'skip': 'video not playable',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
# From http://mashable.com/2013/10/26/thermoelectric-bracelet-lets-you-control-your-body-temperature/
|
# From http://mashable.com/2013/10/26/thermoelectric-bracelet-lets-you-control-your-body-temperature/
|
||||||
@ -79,7 +81,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'This Bracelet Acts as a Personal Thermostat',
|
'title': 'This Bracelet Acts as a Personal Thermostat',
|
||||||
'description': 'md5:547b78c64f4112766ccf4e151c20b6a0',
|
'description': 'md5:547b78c64f4112766ccf4e151c20b6a0',
|
||||||
'uploader': 'Mashable',
|
# 'uploader': 'Mashable',
|
||||||
'timestamp': 1382041798,
|
'timestamp': 1382041798,
|
||||||
'upload_date': '20131017',
|
'upload_date': '20131017',
|
||||||
'uploader_id': '1130468786001',
|
'uploader_id': '1130468786001',
|
||||||
@ -124,6 +126,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
|||||||
'id': '3550319591001',
|
'id': '3550319591001',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 7,
|
'playlist_mincount': 7,
|
||||||
|
'skip': 'Unsupported URL',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
# playlist with 'playlistTab' (https://github.com/ytdl-org/youtube-dl/issues/9965)
|
# playlist with 'playlistTab' (https://github.com/ytdl-org/youtube-dl/issues/9965)
|
||||||
@ -133,6 +136,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
|||||||
'title': 'Lesson 08',
|
'title': 'Lesson 08',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 10,
|
'playlist_mincount': 10,
|
||||||
|
'skip': 'Unsupported URL',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
# playerID inferred from bcpid
|
# playerID inferred from bcpid
|
||||||
@ -141,12 +145,6 @@ class BrightcoveLegacyIE(InfoExtractor):
|
|||||||
'only_matching': True, # Tested in GenericIE
|
'only_matching': True, # Tested in GenericIE
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
FLV_VCODECS = {
|
|
||||||
1: 'SORENSON',
|
|
||||||
2: 'ON2',
|
|
||||||
3: 'H264',
|
|
||||||
4: 'VP8',
|
|
||||||
}
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _build_brighcove_url(cls, object_str):
|
def _build_brighcove_url(cls, object_str):
|
||||||
@ -238,7 +236,8 @@ class BrightcoveLegacyIE(InfoExtractor):
|
|||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _make_brightcove_url(cls, params):
|
def _make_brightcove_url(cls, params):
|
||||||
return update_url_query(cls._FEDERATED_URL, params)
|
return update_url_query(
|
||||||
|
'http://c.brightcove.com/services/viewer/htmlFederated', params)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _extract_brightcove_url(cls, webpage):
|
def _extract_brightcove_url(cls, webpage):
|
||||||
@ -297,38 +296,12 @@ class BrightcoveLegacyIE(InfoExtractor):
|
|||||||
videoPlayer = query.get('@videoPlayer')
|
videoPlayer = query.get('@videoPlayer')
|
||||||
if videoPlayer:
|
if videoPlayer:
|
||||||
# We set the original url as the default 'Referer' header
|
# We set the original url as the default 'Referer' header
|
||||||
referer = smuggled_data.get('Referer', url)
|
referer = query.get('linkBaseURL', [None])[0] or smuggled_data.get('Referer', url)
|
||||||
|
video_id = videoPlayer[0]
|
||||||
if 'playerID' not in query:
|
if 'playerID' not in query:
|
||||||
mobj = re.search(r'/bcpid(\d+)', url)
|
mobj = re.search(r'/bcpid(\d+)', url)
|
||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
query['playerID'] = [mobj.group(1)]
|
query['playerID'] = [mobj.group(1)]
|
||||||
return self._get_video_info(
|
|
||||||
videoPlayer[0], query, referer=referer)
|
|
||||||
elif 'playerKey' in query:
|
|
||||||
player_key = query['playerKey']
|
|
||||||
return self._get_playlist_info(player_key[0])
|
|
||||||
else:
|
|
||||||
raise ExtractorError(
|
|
||||||
'Cannot find playerKey= variable. Did you forget quotes in a shell invocation?',
|
|
||||||
expected=True)
|
|
||||||
|
|
||||||
def _brightcove_new_url_result(self, publisher_id, video_id):
|
|
||||||
brightcove_new_url = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' % (publisher_id, video_id)
|
|
||||||
return self.url_result(brightcove_new_url, BrightcoveNewIE.ie_key(), video_id)
|
|
||||||
|
|
||||||
def _get_video_info(self, video_id, query, referer=None):
|
|
||||||
headers = {}
|
|
||||||
linkBase = query.get('linkBaseURL')
|
|
||||||
if linkBase is not None:
|
|
||||||
referer = linkBase[0]
|
|
||||||
if referer is not None:
|
|
||||||
headers['Referer'] = referer
|
|
||||||
webpage = self._download_webpage(self._FEDERATED_URL, video_id, headers=headers, query=query)
|
|
||||||
|
|
||||||
error_msg = self._html_search_regex(
|
|
||||||
r"<h1>We're sorry.</h1>([\s\n]*<p>.*?</p>)+", webpage,
|
|
||||||
'error message', default=None)
|
|
||||||
if error_msg is not None:
|
|
||||||
publisher_id = query.get('publisherId')
|
publisher_id = query.get('publisherId')
|
||||||
if publisher_id and publisher_id[0].isdigit():
|
if publisher_id and publisher_id[0].isdigit():
|
||||||
publisher_id = publisher_id[0]
|
publisher_id = publisher_id[0]
|
||||||
@ -339,6 +312,9 @@ class BrightcoveLegacyIE(InfoExtractor):
|
|||||||
else:
|
else:
|
||||||
player_id = query.get('playerID')
|
player_id = query.get('playerID')
|
||||||
if player_id and player_id[0].isdigit():
|
if player_id and player_id[0].isdigit():
|
||||||
|
headers = {}
|
||||||
|
if referer:
|
||||||
|
headers['Referer'] = referer
|
||||||
player_page = self._download_webpage(
|
player_page = self._download_webpage(
|
||||||
'http://link.brightcove.com/services/player/bcpid' + player_id[0],
|
'http://link.brightcove.com/services/player/bcpid' + player_id[0],
|
||||||
video_id, headers=headers, fatal=False)
|
video_id, headers=headers, fatal=False)
|
||||||
@ -349,136 +325,16 @@ class BrightcoveLegacyIE(InfoExtractor):
|
|||||||
if player_key:
|
if player_key:
|
||||||
enc_pub_id = player_key.split(',')[1].replace('~', '=')
|
enc_pub_id = player_key.split(',')[1].replace('~', '=')
|
||||||
publisher_id = struct.unpack('>Q', base64.urlsafe_b64decode(enc_pub_id))[0]
|
publisher_id = struct.unpack('>Q', base64.urlsafe_b64decode(enc_pub_id))[0]
|
||||||
if publisher_id:
|
if publisher_id:
|
||||||
return self._brightcove_new_url_result(publisher_id, video_id)
|
brightcove_new_url = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' % (publisher_id, video_id)
|
||||||
raise ExtractorError(
|
if referer:
|
||||||
'brightcove said: %s' % error_msg, expected=True)
|
brightcove_new_url = smuggle_url(brightcove_new_url, {'referrer': referer})
|
||||||
|
return self.url_result(brightcove_new_url, BrightcoveNewIE.ie_key(), video_id)
|
||||||
self.report_extraction(video_id)
|
# TODO: figure out if it's possible to extract playlistId from playerKey
|
||||||
info = self._search_regex(r'var experienceJSON = ({.*});', webpage, 'json')
|
# elif 'playerKey' in query:
|
||||||
info = json.loads(info)['data']
|
# player_key = query['playerKey']
|
||||||
video_info = info['programmedContent']['videoPlayer']['mediaDTO']
|
# return self._get_playlist_info(player_key[0])
|
||||||
video_info['_youtubedl_adServerURL'] = info.get('adServerURL')
|
raise UnsupportedError(url)
|
||||||
|
|
||||||
return self._extract_video_info(video_info)
|
|
||||||
|
|
||||||
def _get_playlist_info(self, player_key):
|
|
||||||
info_url = 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=%s' % player_key
|
|
||||||
playlist_info = self._download_webpage(
|
|
||||||
info_url, player_key, 'Downloading playlist information')
|
|
||||||
|
|
||||||
json_data = json.loads(playlist_info)
|
|
||||||
if 'videoList' in json_data:
|
|
||||||
playlist_info = json_data['videoList']
|
|
||||||
playlist_dto = playlist_info['mediaCollectionDTO']
|
|
||||||
elif 'playlistTabs' in json_data:
|
|
||||||
playlist_info = json_data['playlistTabs']
|
|
||||||
playlist_dto = playlist_info['lineupListDTO']['playlistDTOs'][0]
|
|
||||||
else:
|
|
||||||
raise ExtractorError('Empty playlist')
|
|
||||||
|
|
||||||
videos = [self._extract_video_info(video_info) for video_info in playlist_dto['videoDTOs']]
|
|
||||||
|
|
||||||
return self.playlist_result(videos, playlist_id='%s' % playlist_info['id'],
|
|
||||||
playlist_title=playlist_dto['displayName'])
|
|
||||||
|
|
||||||
def _extract_video_info(self, video_info):
|
|
||||||
video_id = compat_str(video_info['id'])
|
|
||||||
publisher_id = video_info.get('publisherId')
|
|
||||||
info = {
|
|
||||||
'id': video_id,
|
|
||||||
'title': video_info['displayName'].strip(),
|
|
||||||
'description': video_info.get('shortDescription'),
|
|
||||||
'thumbnail': video_info.get('videoStillURL') or video_info.get('thumbnailURL'),
|
|
||||||
'uploader': video_info.get('publisherName'),
|
|
||||||
'uploader_id': compat_str(publisher_id) if publisher_id else None,
|
|
||||||
'duration': float_or_none(video_info.get('length'), 1000),
|
|
||||||
'timestamp': int_or_none(video_info.get('creationDate'), 1000),
|
|
||||||
}
|
|
||||||
|
|
||||||
renditions = video_info.get('renditions', []) + video_info.get('IOSRenditions', [])
|
|
||||||
if renditions:
|
|
||||||
formats = []
|
|
||||||
for rend in renditions:
|
|
||||||
url = rend['defaultURL']
|
|
||||||
if not url:
|
|
||||||
continue
|
|
||||||
ext = None
|
|
||||||
if rend['remote']:
|
|
||||||
url_comp = compat_urllib_parse_urlparse(url)
|
|
||||||
if url_comp.path.endswith('.m3u8'):
|
|
||||||
formats.extend(
|
|
||||||
self._extract_m3u8_formats(
|
|
||||||
url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
|
|
||||||
continue
|
|
||||||
elif 'akamaihd.net' in url_comp.netloc:
|
|
||||||
# This type of renditions are served through
|
|
||||||
# akamaihd.net, but they don't use f4m manifests
|
|
||||||
url = url.replace('control/', '') + '?&v=3.3.0&fp=13&r=FEEFJ&g=RTSJIMBMPFPB'
|
|
||||||
ext = 'flv'
|
|
||||||
if ext is None:
|
|
||||||
ext = determine_ext(url)
|
|
||||||
tbr = int_or_none(rend.get('encodingRate'), 1000)
|
|
||||||
a_format = {
|
|
||||||
'format_id': 'http%s' % ('-%s' % tbr if tbr else ''),
|
|
||||||
'url': url,
|
|
||||||
'ext': ext,
|
|
||||||
'filesize': int_or_none(rend.get('size')) or None,
|
|
||||||
'tbr': tbr,
|
|
||||||
}
|
|
||||||
if rend.get('audioOnly'):
|
|
||||||
a_format.update({
|
|
||||||
'vcodec': 'none',
|
|
||||||
})
|
|
||||||
else:
|
|
||||||
a_format.update({
|
|
||||||
'height': int_or_none(rend.get('frameHeight')),
|
|
||||||
'width': int_or_none(rend.get('frameWidth')),
|
|
||||||
'vcodec': rend.get('videoCodec'),
|
|
||||||
})
|
|
||||||
|
|
||||||
# m3u8 manifests with remote == false are media playlists
|
|
||||||
# Not calling _extract_m3u8_formats here to save network traffic
|
|
||||||
if ext == 'm3u8':
|
|
||||||
a_format.update({
|
|
||||||
'format_id': 'hls%s' % ('-%s' % tbr if tbr else ''),
|
|
||||||
'ext': 'mp4',
|
|
||||||
'protocol': 'm3u8_native',
|
|
||||||
})
|
|
||||||
|
|
||||||
formats.append(a_format)
|
|
||||||
self._sort_formats(formats)
|
|
||||||
info['formats'] = formats
|
|
||||||
elif video_info.get('FLVFullLengthURL') is not None:
|
|
||||||
info.update({
|
|
||||||
'url': video_info['FLVFullLengthURL'],
|
|
||||||
'vcodec': self.FLV_VCODECS.get(video_info.get('FLVFullCodec')),
|
|
||||||
'filesize': int_or_none(video_info.get('FLVFullSize')),
|
|
||||||
})
|
|
||||||
|
|
||||||
if self._downloader.params.get('include_ads', False):
|
|
||||||
adServerURL = video_info.get('_youtubedl_adServerURL')
|
|
||||||
if adServerURL:
|
|
||||||
ad_info = {
|
|
||||||
'_type': 'url',
|
|
||||||
'url': adServerURL,
|
|
||||||
}
|
|
||||||
if 'url' in info:
|
|
||||||
return {
|
|
||||||
'_type': 'playlist',
|
|
||||||
'title': info['title'],
|
|
||||||
'entries': [ad_info, info],
|
|
||||||
}
|
|
||||||
else:
|
|
||||||
return ad_info
|
|
||||||
|
|
||||||
if not info.get('url') and not info.get('formats'):
|
|
||||||
uploader_id = info.get('uploader_id')
|
|
||||||
if uploader_id:
|
|
||||||
info.update(self._brightcove_new_url_result(uploader_id, video_id))
|
|
||||||
else:
|
|
||||||
raise ExtractorError('Unable to extract video url for %s' % video_id)
|
|
||||||
return info
|
|
||||||
|
|
||||||
|
|
||||||
class BrightcoveNewIE(AdobePassIE):
|
class BrightcoveNewIE(AdobePassIE):
|
||||||
@ -570,7 +426,7 @@ class BrightcoveNewIE(AdobePassIE):
|
|||||||
# [2] looks like:
|
# [2] looks like:
|
||||||
for video, script_tag, account_id, player_id, embed in re.findall(
|
for video, script_tag, account_id, player_id, embed in re.findall(
|
||||||
r'''(?isx)
|
r'''(?isx)
|
||||||
(<video\s+[^>]*\bdata-video-id\s*=\s*['"]?[^>]+>)
|
(<video(?:-js)?\s+[^>]*\bdata-video-id\s*=\s*['"]?[^>]+>)
|
||||||
(?:.*?
|
(?:.*?
|
||||||
(<script[^>]+
|
(<script[^>]+
|
||||||
src=["\'](?:https?:)?//players\.brightcove\.net/
|
src=["\'](?:https?:)?//players\.brightcove\.net/
|
||||||
@ -699,10 +555,16 @@ class BrightcoveNewIE(AdobePassIE):
|
|||||||
|
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
for text_track in json_data.get('text_tracks', []):
|
for text_track in json_data.get('text_tracks', []):
|
||||||
if text_track.get('src'):
|
if text_track.get('kind') != 'captions':
|
||||||
subtitles.setdefault(text_track.get('srclang'), []).append({
|
continue
|
||||||
'url': text_track['src'],
|
text_track_url = url_or_none(text_track.get('src'))
|
||||||
})
|
if not text_track_url:
|
||||||
|
continue
|
||||||
|
lang = (str_or_none(text_track.get('srclang'))
|
||||||
|
or str_or_none(text_track.get('label')) or 'en').lower()
|
||||||
|
subtitles.setdefault(lang, []).append({
|
||||||
|
'url': text_track_url,
|
||||||
|
})
|
||||||
|
|
||||||
is_live = False
|
is_live = False
|
||||||
duration = float_or_none(json_data.get('duration'), 1000)
|
duration = float_or_none(json_data.get('duration'), 1000)
|
||||||
@ -732,45 +594,63 @@ class BrightcoveNewIE(AdobePassIE):
|
|||||||
|
|
||||||
account_id, player_id, embed, content_type, video_id = re.match(self._VALID_URL, url).groups()
|
account_id, player_id, embed, content_type, video_id = re.match(self._VALID_URL, url).groups()
|
||||||
|
|
||||||
webpage = self._download_webpage(
|
policy_key_id = '%s_%s' % (account_id, player_id)
|
||||||
'http://players.brightcove.net/%s/%s_%s/index.min.js'
|
policy_key = self._downloader.cache.load('brightcove', policy_key_id)
|
||||||
% (account_id, player_id, embed), video_id)
|
policy_key_extracted = False
|
||||||
|
store_pk = lambda x: self._downloader.cache.store('brightcove', policy_key_id, x)
|
||||||
|
|
||||||
policy_key = None
|
def extract_policy_key():
|
||||||
|
webpage = self._download_webpage(
|
||||||
|
'http://players.brightcove.net/%s/%s_%s/index.min.js'
|
||||||
|
% (account_id, player_id, embed), video_id)
|
||||||
|
|
||||||
catalog = self._search_regex(
|
policy_key = None
|
||||||
r'catalog\(({.+?})\);', webpage, 'catalog', default=None)
|
|
||||||
if catalog:
|
catalog = self._search_regex(
|
||||||
catalog = self._parse_json(
|
r'catalog\(({.+?})\);', webpage, 'catalog', default=None)
|
||||||
js_to_json(catalog), video_id, fatal=False)
|
|
||||||
if catalog:
|
if catalog:
|
||||||
policy_key = catalog.get('policyKey')
|
catalog = self._parse_json(
|
||||||
|
js_to_json(catalog), video_id, fatal=False)
|
||||||
|
if catalog:
|
||||||
|
policy_key = catalog.get('policyKey')
|
||||||
|
|
||||||
if not policy_key:
|
if not policy_key:
|
||||||
policy_key = self._search_regex(
|
policy_key = self._search_regex(
|
||||||
r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1',
|
r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1',
|
||||||
webpage, 'policy key', group='pk')
|
webpage, 'policy key', group='pk')
|
||||||
|
|
||||||
|
store_pk(policy_key)
|
||||||
|
return policy_key
|
||||||
|
|
||||||
api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/%ss/%s' % (account_id, content_type, video_id)
|
api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/%ss/%s' % (account_id, content_type, video_id)
|
||||||
headers = {
|
headers = {}
|
||||||
'Accept': 'application/json;pk=%s' % policy_key,
|
|
||||||
}
|
|
||||||
referrer = smuggled_data.get('referrer')
|
referrer = smuggled_data.get('referrer')
|
||||||
if referrer:
|
if referrer:
|
||||||
headers.update({
|
headers.update({
|
||||||
'Referer': referrer,
|
'Referer': referrer,
|
||||||
'Origin': re.search(r'https?://[^/]+', referrer).group(0),
|
'Origin': re.search(r'https?://[^/]+', referrer).group(0),
|
||||||
})
|
})
|
||||||
try:
|
|
||||||
json_data = self._download_json(api_url, video_id, headers=headers)
|
for _ in range(2):
|
||||||
except ExtractorError as e:
|
if not policy_key:
|
||||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
policy_key = extract_policy_key()
|
||||||
json_data = self._parse_json(e.cause.read().decode(), video_id)[0]
|
policy_key_extracted = True
|
||||||
message = json_data.get('message') or json_data['error_code']
|
headers['Accept'] = 'application/json;pk=%s' % policy_key
|
||||||
if json_data.get('error_subcode') == 'CLIENT_GEO':
|
try:
|
||||||
self.raise_geo_restricted(msg=message)
|
json_data = self._download_json(api_url, video_id, headers=headers)
|
||||||
raise ExtractorError(message, expected=True)
|
break
|
||||||
raise
|
except ExtractorError as e:
|
||||||
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403):
|
||||||
|
json_data = self._parse_json(e.cause.read().decode(), video_id)[0]
|
||||||
|
message = json_data.get('message') or json_data['error_code']
|
||||||
|
if json_data.get('error_subcode') == 'CLIENT_GEO':
|
||||||
|
self.raise_geo_restricted(msg=message)
|
||||||
|
elif json_data.get('error_code') == 'INVALID_POLICY_KEY' and not policy_key_extracted:
|
||||||
|
policy_key = None
|
||||||
|
store_pk(None)
|
||||||
|
continue
|
||||||
|
raise ExtractorError(message, expected=True)
|
||||||
|
raise
|
||||||
|
|
||||||
errors = json_data.get('errors')
|
errors = json_data.get('errors')
|
||||||
if errors and errors[0].get('error_subcode') == 'TVE_AUTH':
|
if errors and errors[0].get('error_subcode') == 'TVE_AUTH':
|
||||||
|
@ -9,21 +9,26 @@ class BusinessInsiderIE(InfoExtractor):
|
|||||||
_VALID_URL = r'https?://(?:[^/]+\.)?businessinsider\.(?:com|nl)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
_VALID_URL = r'https?://(?:[^/]+\.)?businessinsider\.(?:com|nl)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://uk.businessinsider.com/how-much-radiation-youre-exposed-to-in-everyday-life-2016-6',
|
'url': 'http://uk.businessinsider.com/how-much-radiation-youre-exposed-to-in-everyday-life-2016-6',
|
||||||
'md5': 'ca237a53a8eb20b6dc5bd60564d4ab3e',
|
'md5': 'ffed3e1e12a6f950aa2f7d83851b497a',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'hZRllCfw',
|
'id': 'cjGDb0X9',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': "Here's how much radiation you're exposed to in everyday life",
|
'title': "Bananas give you more radiation exposure than living next to a nuclear power plant",
|
||||||
'description': 'md5:9a0d6e2c279948aadaa5e84d6d9b99bd',
|
'description': 'md5:0175a3baf200dd8fa658f94cade841b3',
|
||||||
'upload_date': '20170709',
|
'upload_date': '20160611',
|
||||||
'timestamp': 1499606400,
|
'timestamp': 1465675620,
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.businessinsider.nl/5-scientifically-proven-things-make-you-less-attractive-2017-7/',
|
'url': 'https://www.businessinsider.nl/5-scientifically-proven-things-make-you-less-attractive-2017-7/',
|
||||||
'only_matching': True,
|
'md5': '43f438dbc6da0b89f5ac42f68529d84a',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5zJwd4FK',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Deze dingen zorgen ervoor dat je minder snel een date scoort',
|
||||||
|
'description': 'md5:2af8975825d38a4fed24717bbe51db49',
|
||||||
|
'upload_date': '20170705',
|
||||||
|
'timestamp': 1499270528,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
|
'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -35,7 +40,8 @@ class BusinessInsiderIE(InfoExtractor):
|
|||||||
jwplatform_id = self._search_regex(
|
jwplatform_id = self._search_regex(
|
||||||
(r'data-media-id=["\']([a-zA-Z0-9]{8})',
|
(r'data-media-id=["\']([a-zA-Z0-9]{8})',
|
||||||
r'id=["\']jwplayer_([a-zA-Z0-9]{8})',
|
r'id=["\']jwplayer_([a-zA-Z0-9]{8})',
|
||||||
r'id["\']?\s*:\s*["\']?([a-zA-Z0-9]{8})'),
|
r'id["\']?\s*:\s*["\']?([a-zA-Z0-9]{8})',
|
||||||
|
r'(?:jwplatform\.com/players/|jwplayer_)([a-zA-Z0-9]{8})'),
|
||||||
webpage, 'jwplatform id')
|
webpage, 'jwplatform id')
|
||||||
return self.url_result(
|
return self.url_result(
|
||||||
'jwplatform:%s' % jwplatform_id, ie=JWPlatformIE.ie_key(),
|
'jwplatform:%s' % jwplatform_id, ie=JWPlatformIE.ie_key(),
|
||||||
|
@ -3,7 +3,12 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import parse_duration
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
|
merge_dicts,
|
||||||
|
parse_duration,
|
||||||
|
url_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class BYUtvIE(InfoExtractor):
|
class BYUtvIE(InfoExtractor):
|
||||||
@ -51,7 +56,7 @@ class BYUtvIE(InfoExtractor):
|
|||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
display_id = mobj.group('display_id') or video_id
|
display_id = mobj.group('display_id') or video_id
|
||||||
|
|
||||||
info = self._download_json(
|
video = self._download_json(
|
||||||
'https://api.byutv.org/api3/catalog/getvideosforcontent',
|
'https://api.byutv.org/api3/catalog/getvideosforcontent',
|
||||||
display_id, query={
|
display_id, query={
|
||||||
'contentid': video_id,
|
'contentid': video_id,
|
||||||
@ -62,7 +67,7 @@ class BYUtvIE(InfoExtractor):
|
|||||||
'x-byutv-platformkey': 'xsaaw9c7y5',
|
'x-byutv-platformkey': 'xsaaw9c7y5',
|
||||||
})
|
})
|
||||||
|
|
||||||
ep = info.get('ooyalaVOD')
|
ep = video.get('ooyalaVOD')
|
||||||
if ep:
|
if ep:
|
||||||
return {
|
return {
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
@ -75,18 +80,38 @@ class BYUtvIE(InfoExtractor):
|
|||||||
'thumbnail': ep.get('imageThumbnail'),
|
'thumbnail': ep.get('imageThumbnail'),
|
||||||
}
|
}
|
||||||
|
|
||||||
ep = info['dvr']
|
info = {}
|
||||||
title = ep['title']
|
formats = []
|
||||||
formats = self._extract_m3u8_formats(
|
for format_id, ep in video.items():
|
||||||
ep['videoUrl'], video_id, 'mp4', entry_protocol='m3u8_native',
|
if not isinstance(ep, dict):
|
||||||
m3u8_id='hls')
|
continue
|
||||||
|
video_url = url_or_none(ep.get('videoUrl'))
|
||||||
|
if not video_url:
|
||||||
|
continue
|
||||||
|
ext = determine_ext(video_url)
|
||||||
|
if ext == 'm3u8':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
|
m3u8_id='hls', fatal=False))
|
||||||
|
elif ext == 'mpd':
|
||||||
|
formats.extend(self._extract_mpd_formats(
|
||||||
|
video_url, video_id, mpd_id='dash', fatal=False))
|
||||||
|
else:
|
||||||
|
formats.append({
|
||||||
|
'url': video_url,
|
||||||
|
'format_id': format_id,
|
||||||
|
})
|
||||||
|
merge_dicts(info, {
|
||||||
|
'title': ep.get('title'),
|
||||||
|
'description': ep.get('description'),
|
||||||
|
'thumbnail': ep.get('imageThumbnail'),
|
||||||
|
'duration': parse_duration(ep.get('length')),
|
||||||
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
return {
|
|
||||||
|
return merge_dicts(info, {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'title': title,
|
'title': display_id,
|
||||||
'description': ep.get('description'),
|
|
||||||
'thumbnail': ep.get('imageThumbnail'),
|
|
||||||
'duration': parse_duration(ep.get('length')),
|
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
})
|
||||||
|
@ -13,6 +13,8 @@ from ..utils import (
|
|||||||
int_or_none,
|
int_or_none,
|
||||||
merge_dicts,
|
merge_dicts,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
|
str_or_none,
|
||||||
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -20,15 +22,15 @@ class CanvasIE(InfoExtractor):
|
|||||||
_VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?P<site_id>canvas|een|ketnet|vrt(?:video|nieuws)|sporza)/assets/(?P<id>[^/?#&]+)'
|
_VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?P<site_id>canvas|een|ketnet|vrt(?:video|nieuws)|sporza)/assets/(?P<id>[^/?#&]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://mediazone.vrt.be/api/v1/ketnet/assets/md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
|
'url': 'https://mediazone.vrt.be/api/v1/ketnet/assets/md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
|
||||||
'md5': '90139b746a0a9bd7bb631283f6e2a64e',
|
'md5': '68993eda72ef62386a15ea2cf3c93107',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
|
'id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
|
||||||
'display_id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
|
'display_id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': 'Nachtwacht: De Greystook',
|
'title': 'Nachtwacht: De Greystook',
|
||||||
'description': 'md5:1db3f5dc4c7109c821261e7512975be7',
|
'description': 'Nachtwacht: De Greystook',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
'duration': 1468.03,
|
'duration': 1468.04,
|
||||||
},
|
},
|
||||||
'expected_warnings': ['is not a supported codec', 'Unknown MIME type'],
|
'expected_warnings': ['is not a supported codec', 'Unknown MIME type'],
|
||||||
}, {
|
}, {
|
||||||
@ -39,23 +41,45 @@ class CanvasIE(InfoExtractor):
|
|||||||
'HLS': 'm3u8_native',
|
'HLS': 'm3u8_native',
|
||||||
'HLS_AES': 'm3u8',
|
'HLS_AES': 'm3u8',
|
||||||
}
|
}
|
||||||
|
_REST_API_BASE = 'https://media-services-public.vrt.be/vualto-video-aggregator-web/rest/external/v1'
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
site_id, video_id = mobj.group('site_id'), mobj.group('id')
|
site_id, video_id = mobj.group('site_id'), mobj.group('id')
|
||||||
|
|
||||||
|
# Old API endpoint, serves more formats but may fail for some videos
|
||||||
data = self._download_json(
|
data = self._download_json(
|
||||||
'https://mediazone.vrt.be/api/v1/%s/assets/%s'
|
'https://mediazone.vrt.be/api/v1/%s/assets/%s'
|
||||||
% (site_id, video_id), video_id)
|
% (site_id, video_id), video_id, 'Downloading asset JSON',
|
||||||
|
'Unable to download asset JSON', fatal=False)
|
||||||
|
|
||||||
|
# New API endpoint
|
||||||
|
if not data:
|
||||||
|
token = self._download_json(
|
||||||
|
'%s/tokens' % self._REST_API_BASE, video_id,
|
||||||
|
'Downloading token', data=b'',
|
||||||
|
headers={'Content-Type': 'application/json'})['vrtPlayerToken']
|
||||||
|
data = self._download_json(
|
||||||
|
'%s/videos/%s' % (self._REST_API_BASE, video_id),
|
||||||
|
video_id, 'Downloading video JSON', fatal=False, query={
|
||||||
|
'vrtPlayerToken': token,
|
||||||
|
'client': '%s@PROD' % site_id,
|
||||||
|
}, expected_status=400)
|
||||||
|
message = data.get('message')
|
||||||
|
if message and not data.get('title'):
|
||||||
|
if data.get('code') == 'AUTHENTICATION_REQUIRED':
|
||||||
|
self.raise_login_required(message)
|
||||||
|
raise ExtractorError(message, expected=True)
|
||||||
|
|
||||||
title = data['title']
|
title = data['title']
|
||||||
description = data.get('description')
|
description = data.get('description')
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for target in data['targetUrls']:
|
for target in data['targetUrls']:
|
||||||
format_url, format_type = target.get('url'), target.get('type')
|
format_url, format_type = url_or_none(target.get('url')), str_or_none(target.get('type'))
|
||||||
if not format_url or not format_type:
|
if not format_url or not format_type:
|
||||||
continue
|
continue
|
||||||
|
format_type = format_type.upper()
|
||||||
if format_type in self._HLS_ENTRY_PROTOCOLS_MAP:
|
if format_type in self._HLS_ENTRY_PROTOCOLS_MAP:
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
format_url, video_id, 'mp4', self._HLS_ENTRY_PROTOCOLS_MAP[format_type],
|
format_url, video_id, 'mp4', self._HLS_ENTRY_PROTOCOLS_MAP[format_type],
|
||||||
@ -134,20 +158,20 @@ class CanvasEenIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
'skip': 'Pagina niet gevonden',
|
'skip': 'Pagina niet gevonden',
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.een.be/sorry-voor-alles/herbekijk-sorry-voor-alles',
|
'url': 'https://www.een.be/thuis/emma-pakt-thilly-aan',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'mz-ast-11a587f8-b921-4266-82e2-0bce3e80d07f',
|
'id': 'md-ast-3a24ced2-64d7-44fb-b4ed-ed1aafbf90b8',
|
||||||
'display_id': 'herbekijk-sorry-voor-alles',
|
'display_id': 'emma-pakt-thilly-aan',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Herbekijk Sorry voor alles',
|
'title': 'Emma pakt Thilly aan',
|
||||||
'description': 'md5:8bb2805df8164e5eb95d6a7a29dc0dd3',
|
'description': 'md5:c5c9b572388a99b2690030afa3f3bad7',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
'duration': 3788.06,
|
'duration': 118.24,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
'skip': 'Episode no longer available',
|
'expected_warnings': ['is not a supported codec'],
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.canvas.be/check-point/najaar-2016/de-politie-uw-vriend',
|
'url': 'https://www.canvas.be/check-point/najaar-2016/de-politie-uw-vriend',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -183,19 +207,44 @@ class VrtNUIE(GigyaBaseIE):
|
|||||||
IE_DESC = 'VrtNU.be'
|
IE_DESC = 'VrtNU.be'
|
||||||
_VALID_URL = r'https?://(?:www\.)?vrt\.be/(?P<site_id>vrtnu)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
_VALID_URL = r'https?://(?:www\.)?vrt\.be/(?P<site_id>vrtnu)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
# Available via old API endpoint
|
||||||
'url': 'https://www.vrt.be/vrtnu/a-z/postbus-x/1/postbus-x-s1a1/',
|
'url': 'https://www.vrt.be/vrtnu/a-z/postbus-x/1/postbus-x-s1a1/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'pbs-pub-2e2d8c27-df26-45c9-9dc6-90c78153044d$vid-90c932b1-e21d-4fb8-99b1-db7b49cf74de',
|
'id': 'pbs-pub-2e2d8c27-df26-45c9-9dc6-90c78153044d$vid-90c932b1-e21d-4fb8-99b1-db7b49cf74de',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': 'De zwarte weduwe',
|
'title': 'De zwarte weduwe',
|
||||||
'description': 'md5:d90c21dced7db869a85db89a623998d4',
|
'description': 'md5:db1227b0f318c849ba5eab1fef895ee4',
|
||||||
'duration': 1457.04,
|
'duration': 1457.04,
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
'season': '1',
|
'season': 'Season 1',
|
||||||
'season_number': 1,
|
'season_number': 1,
|
||||||
'episode_number': 1,
|
'episode_number': 1,
|
||||||
},
|
},
|
||||||
'skip': 'This video is only available for registered users'
|
'skip': 'This video is only available for registered users',
|
||||||
|
'params': {
|
||||||
|
'username': '<snip>',
|
||||||
|
'password': '<snip>',
|
||||||
|
},
|
||||||
|
'expected_warnings': ['is not a supported codec'],
|
||||||
|
}, {
|
||||||
|
# Only available via new API endpoint
|
||||||
|
'url': 'https://www.vrt.be/vrtnu/a-z/kamp-waes/1/kamp-waes-s1a5/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'pbs-pub-0763b56c-64fb-4d38-b95b-af60bf433c71$vid-ad36a73c-4735-4f1f-b2c0-a38e6e6aa7e1',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Aflevering 5',
|
||||||
|
'description': 'Wie valt door de mand tijdens een missie?',
|
||||||
|
'duration': 2967.06,
|
||||||
|
'season': 'Season 1',
|
||||||
|
'season_number': 1,
|
||||||
|
'episode_number': 5,
|
||||||
|
},
|
||||||
|
'skip': 'This video is only available for registered users',
|
||||||
|
'params': {
|
||||||
|
'username': '<snip>',
|
||||||
|
'password': '<snip>',
|
||||||
|
},
|
||||||
|
'expected_warnings': ['Unable to download asset JSON', 'is not a supported codec', 'Unknown MIME type'],
|
||||||
}]
|
}]
|
||||||
_NETRC_MACHINE = 'vrtnu'
|
_NETRC_MACHINE = 'vrtnu'
|
||||||
_APIKEY = '3_0Z2HujMtiWq_pkAjgnS2Md2E11a1AwZjYiBETtwNE-EoEHDINgtnvcAOpNgmrVGy'
|
_APIKEY = '3_0Z2HujMtiWq_pkAjgnS2Md2E11a1AwZjYiBETtwNE-EoEHDINgtnvcAOpNgmrVGy'
|
||||||
|
@ -1,8 +1,10 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import hashlib
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
|
from xml.sax.saxutils import escape
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
@ -216,6 +218,29 @@ class CBCWatchBaseIE(InfoExtractor):
|
|||||||
'clearleap': 'http://www.clearleap.com/namespace/clearleap/1.0/',
|
'clearleap': 'http://www.clearleap.com/namespace/clearleap/1.0/',
|
||||||
}
|
}
|
||||||
_GEO_COUNTRIES = ['CA']
|
_GEO_COUNTRIES = ['CA']
|
||||||
|
_LOGIN_URL = 'https://api.loginradius.com/identity/v2/auth/login'
|
||||||
|
_TOKEN_URL = 'https://cloud-api.loginradius.com/sso/jwt/api/token'
|
||||||
|
_API_KEY = '3f4beddd-2061-49b0-ae80-6f1f2ed65b37'
|
||||||
|
_NETRC_MACHINE = 'cbcwatch'
|
||||||
|
|
||||||
|
def _signature(self, email, password):
|
||||||
|
data = json.dumps({
|
||||||
|
'email': email,
|
||||||
|
'password': password,
|
||||||
|
}).encode()
|
||||||
|
headers = {'content-type': 'application/json'}
|
||||||
|
query = {'apikey': self._API_KEY}
|
||||||
|
resp = self._download_json(self._LOGIN_URL, None, data=data, headers=headers, query=query)
|
||||||
|
access_token = resp['access_token']
|
||||||
|
|
||||||
|
# token
|
||||||
|
query = {
|
||||||
|
'access_token': access_token,
|
||||||
|
'apikey': self._API_KEY,
|
||||||
|
'jwtapp': 'jwt',
|
||||||
|
}
|
||||||
|
resp = self._download_json(self._TOKEN_URL, None, headers=headers, query=query)
|
||||||
|
return resp['signature']
|
||||||
|
|
||||||
def _call_api(self, path, video_id):
|
def _call_api(self, path, video_id):
|
||||||
url = path if path.startswith('http') else self._API_BASE_URL + path
|
url = path if path.startswith('http') else self._API_BASE_URL + path
|
||||||
@ -239,7 +264,8 @@ class CBCWatchBaseIE(InfoExtractor):
|
|||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
if self._valid_device_token():
|
if self._valid_device_token():
|
||||||
return
|
return
|
||||||
device = self._downloader.cache.load('cbcwatch', 'device') or {}
|
device = self._downloader.cache.load(
|
||||||
|
'cbcwatch', self._cache_device_key()) or {}
|
||||||
self._device_id, self._device_token = device.get('id'), device.get('token')
|
self._device_id, self._device_token = device.get('id'), device.get('token')
|
||||||
if self._valid_device_token():
|
if self._valid_device_token():
|
||||||
return
|
return
|
||||||
@ -248,16 +274,30 @@ class CBCWatchBaseIE(InfoExtractor):
|
|||||||
def _valid_device_token(self):
|
def _valid_device_token(self):
|
||||||
return self._device_id and self._device_token
|
return self._device_id and self._device_token
|
||||||
|
|
||||||
|
def _cache_device_key(self):
|
||||||
|
email, _ = self._get_login_info()
|
||||||
|
return '%s_device' % hashlib.sha256(email.encode()).hexdigest() if email else 'device'
|
||||||
|
|
||||||
def _register_device(self):
|
def _register_device(self):
|
||||||
self._device_id = self._device_token = None
|
|
||||||
result = self._download_xml(
|
result = self._download_xml(
|
||||||
self._API_BASE_URL + 'device/register',
|
self._API_BASE_URL + 'device/register',
|
||||||
None, 'Acquiring device token',
|
None, 'Acquiring device token',
|
||||||
data=b'<device><type>web</type></device>')
|
data=b'<device><type>web</type></device>')
|
||||||
self._device_id = xpath_text(result, 'deviceId', fatal=True)
|
self._device_id = xpath_text(result, 'deviceId', fatal=True)
|
||||||
self._device_token = xpath_text(result, 'deviceToken', fatal=True)
|
email, password = self._get_login_info()
|
||||||
|
if email and password:
|
||||||
|
signature = self._signature(email, password)
|
||||||
|
data = '<login><token>{0}</token><device><deviceId>{1}</deviceId><type>web</type></device></login>'.format(
|
||||||
|
escape(signature), escape(self._device_id)).encode()
|
||||||
|
url = self._API_BASE_URL + 'device/login'
|
||||||
|
result = self._download_xml(
|
||||||
|
url, None, data=data,
|
||||||
|
headers={'content-type': 'application/xml'})
|
||||||
|
self._device_token = xpath_text(result, 'token', fatal=True)
|
||||||
|
else:
|
||||||
|
self._device_token = xpath_text(result, 'deviceToken', fatal=True)
|
||||||
self._downloader.cache.store(
|
self._downloader.cache.store(
|
||||||
'cbcwatch', 'device', {
|
'cbcwatch', self._cache_device_key(), {
|
||||||
'id': self._device_id,
|
'id': self._device_id,
|
||||||
'token': self._device_token,
|
'token': self._device_token,
|
||||||
})
|
})
|
||||||
|
@ -147,6 +147,8 @@ class CeskaTelevizeIE(InfoExtractor):
|
|||||||
is_live = item.get('type') == 'LIVE'
|
is_live = item.get('type') == 'LIVE'
|
||||||
formats = []
|
formats = []
|
||||||
for format_id, stream_url in item.get('streamUrls', {}).items():
|
for format_id, stream_url in item.get('streamUrls', {}).items():
|
||||||
|
if 'drmOnly=true' in stream_url:
|
||||||
|
continue
|
||||||
if 'playerType=flash' in stream_url:
|
if 'playerType=flash' in stream_url:
|
||||||
stream_formats = self._extract_m3u8_formats(
|
stream_formats = self._extract_m3u8_formats(
|
||||||
stream_url, playlist_id, 'mp4', 'm3u8_native',
|
stream_url, playlist_id, 'mp4', 'm3u8_native',
|
||||||
|
@ -32,7 +32,7 @@ class Channel9IE(InfoExtractor):
|
|||||||
'upload_date': '20130828',
|
'upload_date': '20130828',
|
||||||
'session_code': 'KOS002',
|
'session_code': 'KOS002',
|
||||||
'session_room': 'Arena 1A',
|
'session_room': 'Arena 1A',
|
||||||
'session_speakers': ['Andrew Coates', 'Brady Gaster', 'Mads Kristensen', 'Ed Blankenship', 'Patrick Klug'],
|
'session_speakers': 'count:5',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing',
|
'url': 'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing',
|
||||||
@ -64,15 +64,15 @@ class Channel9IE(InfoExtractor):
|
|||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
}, {
|
|
||||||
'url': 'https://channel9.msdn.com/Niners/Splendid22/Queue/76acff796e8f411184b008028e0d492b/RSS',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'Niners/Splendid22/Queue/76acff796e8f411184b008028e0d492b',
|
|
||||||
'title': 'Channel 9',
|
|
||||||
},
|
|
||||||
'playlist_mincount': 100,
|
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://channel9.msdn.com/Events/DEVintersection/DEVintersection-2016/RSS',
|
'url': 'https://channel9.msdn.com/Events/DEVintersection/DEVintersection-2016/RSS',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'Events/DEVintersection/DEVintersection-2016',
|
||||||
|
'title': 'DEVintersection 2016 Orlando Sessions',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 14,
|
||||||
|
}, {
|
||||||
|
'url': 'https://channel9.msdn.com/Niners/Splendid22/Queue/76acff796e8f411184b008028e0d492b/RSS',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://channel9.msdn.com/Events/Speakers/scott-hanselman/RSS?UrlSafeName=scott-hanselman',
|
'url': 'https://channel9.msdn.com/Events/Speakers/scott-hanselman/RSS?UrlSafeName=scott-hanselman',
|
||||||
@ -112,11 +112,11 @@ class Channel9IE(InfoExtractor):
|
|||||||
episode_data), content_path)
|
episode_data), content_path)
|
||||||
content_id = episode_data['contentId']
|
content_id = episode_data['contentId']
|
||||||
is_session = '/Sessions(' in episode_data['api']
|
is_session = '/Sessions(' in episode_data['api']
|
||||||
content_url = 'https://channel9.msdn.com/odata' + episode_data['api']
|
content_url = 'https://channel9.msdn.com/odata' + episode_data['api'] + '?$select=Captions,CommentCount,MediaLengthInSeconds,PublishedDate,Rating,RatingCount,Title,VideoMP4High,VideoMP4Low,VideoMP4Medium,VideoPlayerPreviewImage,VideoWMV,VideoWMVHQ,Views,'
|
||||||
if is_session:
|
if is_session:
|
||||||
content_url += '?$expand=Speakers'
|
content_url += 'Code,Description,Room,Slides,Speakers,ZipFile&$expand=Speakers'
|
||||||
else:
|
else:
|
||||||
content_url += '?$expand=Authors'
|
content_url += 'Authors,Body&$expand=Authors'
|
||||||
content_data = self._download_json(content_url, content_id)
|
content_data = self._download_json(content_url, content_id)
|
||||||
title = content_data['Title']
|
title = content_data['Title']
|
||||||
|
|
||||||
@ -210,7 +210,7 @@ class Channel9IE(InfoExtractor):
|
|||||||
'id': content_id,
|
'id': content_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': clean_html(content_data.get('Description') or content_data.get('Body')),
|
'description': clean_html(content_data.get('Description') or content_data.get('Body')),
|
||||||
'thumbnail': content_data.get('Thumbnail') or content_data.get('VideoPlayerPreviewImage'),
|
'thumbnail': content_data.get('VideoPlayerPreviewImage'),
|
||||||
'duration': int_or_none(content_data.get('MediaLengthInSeconds')),
|
'duration': int_or_none(content_data.get('MediaLengthInSeconds')),
|
||||||
'timestamp': parse_iso8601(content_data.get('PublishedDate')),
|
'timestamp': parse_iso8601(content_data.get('PublishedDate')),
|
||||||
'avg_rating': int_or_none(content_data.get('Rating')),
|
'avg_rating': int_or_none(content_data.get('Rating')),
|
||||||
|
@ -3,11 +3,15 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import ExtractorError
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
lowercase_escape,
|
||||||
|
url_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class ChaturbateIE(InfoExtractor):
|
class ChaturbateIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:[^/]+\.)?chaturbate\.com/(?P<id>[^/?#]+)'
|
_VALID_URL = r'https?://(?:[^/]+\.)?chaturbate\.com/(?:fullvideo/?\?.*?\bb=)?(?P<id>[^/?&#]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.chaturbate.com/siswet19/',
|
'url': 'https://www.chaturbate.com/siswet19/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -21,6 +25,9 @@ class ChaturbateIE(InfoExtractor):
|
|||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
'skip': 'Room is offline',
|
'skip': 'Room is offline',
|
||||||
|
}, {
|
||||||
|
'url': 'https://chaturbate.com/fullvideo/?b=caylin',
|
||||||
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://en.chaturbate.com/siswet19/',
|
'url': 'https://en.chaturbate.com/siswet19/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -32,14 +39,34 @@ class ChaturbateIE(InfoExtractor):
|
|||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(
|
webpage = self._download_webpage(
|
||||||
url, video_id, headers=self.geo_verification_headers())
|
'https://chaturbate.com/%s/' % video_id, video_id,
|
||||||
|
headers=self.geo_verification_headers())
|
||||||
|
|
||||||
|
found_m3u8_urls = []
|
||||||
|
|
||||||
|
data = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'initialRoomDossier\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
|
||||||
|
webpage, 'data', default='{}', group='value'),
|
||||||
|
video_id, transform_source=lowercase_escape, fatal=False)
|
||||||
|
if data:
|
||||||
|
m3u8_url = url_or_none(data.get('hls_source'))
|
||||||
|
if m3u8_url:
|
||||||
|
found_m3u8_urls.append(m3u8_url)
|
||||||
|
|
||||||
|
if not found_m3u8_urls:
|
||||||
|
for m in re.finditer(
|
||||||
|
r'(\\u002[27])(?P<url>http.+?\.m3u8.*?)\1', webpage):
|
||||||
|
found_m3u8_urls.append(lowercase_escape(m.group('url')))
|
||||||
|
|
||||||
|
if not found_m3u8_urls:
|
||||||
|
for m in re.finditer(
|
||||||
|
r'(["\'])(?P<url>http.+?\.m3u8.*?)\1', webpage):
|
||||||
|
found_m3u8_urls.append(m.group('url'))
|
||||||
|
|
||||||
m3u8_urls = []
|
m3u8_urls = []
|
||||||
|
for found_m3u8_url in found_m3u8_urls:
|
||||||
for m in re.finditer(
|
m3u8_fast_url, m3u8_no_fast_url = found_m3u8_url, found_m3u8_url.replace('_fast', '')
|
||||||
r'(["\'])(?P<url>http.+?\.m3u8.*?)\1', webpage):
|
|
||||||
m3u8_fast_url, m3u8_no_fast_url = m.group('url'), m.group(
|
|
||||||
'url').replace('_fast', '')
|
|
||||||
for m3u8_url in (m3u8_fast_url, m3u8_no_fast_url):
|
for m3u8_url in (m3u8_fast_url, m3u8_no_fast_url):
|
||||||
if m3u8_url not in m3u8_urls:
|
if m3u8_url not in m3u8_urls:
|
||||||
m3u8_urls.append(m3u8_url)
|
m3u8_urls.append(m3u8_url)
|
||||||
@ -59,7 +86,12 @@ class ChaturbateIE(InfoExtractor):
|
|||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for m3u8_url in m3u8_urls:
|
for m3u8_url in m3u8_urls:
|
||||||
m3u8_id = 'fast' if '_fast' in m3u8_url else 'slow'
|
for known_id in ('fast', 'slow'):
|
||||||
|
if '_%s' % known_id in m3u8_url:
|
||||||
|
m3u8_id = known_id
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
m3u8_id = None
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
m3u8_url, video_id, ext='mp4',
|
m3u8_url, video_id, ext='mp4',
|
||||||
# ffmpeg skips segments for fast m3u8
|
# ffmpeg skips segments for fast m3u8
|
||||||
|
@ -1,20 +1,24 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import base64
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
class CloudflareStreamIE(InfoExtractor):
|
class CloudflareStreamIE(InfoExtractor):
|
||||||
|
_DOMAIN_RE = r'(?:cloudflarestream\.com|(?:videodelivery|bytehighway)\.net)'
|
||||||
|
_EMBED_RE = r'embed\.%s/embed/[^/]+\.js\?.*?\bvideo=' % _DOMAIN_RE
|
||||||
|
_ID_RE = r'[\da-f]{32}|[\w-]+\.[\w-]+\.[\w-]+'
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://
|
https?://
|
||||||
(?:
|
(?:
|
||||||
(?:watch\.)?(?:cloudflarestream\.com|videodelivery\.net)/|
|
(?:watch\.)?%s/|
|
||||||
embed\.(?:cloudflarestream\.com|videodelivery\.net)/embed/[^/]+\.js\?.*?\bvideo=
|
%s
|
||||||
)
|
)
|
||||||
(?P<id>[\da-f]+)
|
(?P<id>%s)
|
||||||
'''
|
''' % (_DOMAIN_RE, _EMBED_RE, _ID_RE)
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://embed.cloudflarestream.com/embed/we4g.fla9.latest.js?video=31c9291ab41fac05471db4e73aa11717',
|
'url': 'https://embed.cloudflarestream.com/embed/we4g.fla9.latest.js?video=31c9291ab41fac05471db4e73aa11717',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -41,23 +45,28 @@ class CloudflareStreamIE(InfoExtractor):
|
|||||||
return [
|
return [
|
||||||
mobj.group('url')
|
mobj.group('url')
|
||||||
for mobj in re.finditer(
|
for mobj in re.finditer(
|
||||||
r'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//embed\.(?:cloudflarestream\.com|videodelivery\.net)/embed/[^/]+\.js\?.*?\bvideo=[\da-f]+?.*?)\1',
|
r'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//%s(?:%s).*?)\1' % (CloudflareStreamIE._EMBED_RE, CloudflareStreamIE._ID_RE),
|
||||||
webpage)]
|
webpage)]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
domain = 'bytehighway.net' if 'bytehighway.net/' in url else 'videodelivery.net'
|
||||||
|
base_url = 'https://%s/%s/' % (domain, video_id)
|
||||||
|
if '.' in video_id:
|
||||||
|
video_id = self._parse_json(base64.urlsafe_b64decode(
|
||||||
|
video_id.split('.')[1]), video_id)['sub']
|
||||||
|
manifest_base_url = base_url + 'manifest/video.'
|
||||||
|
|
||||||
formats = self._extract_m3u8_formats(
|
formats = self._extract_m3u8_formats(
|
||||||
'https://cloudflarestream.com/%s/manifest/video.m3u8' % video_id,
|
manifest_base_url + 'm3u8', video_id, 'mp4',
|
||||||
video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls',
|
'm3u8_native', m3u8_id='hls', fatal=False)
|
||||||
fatal=False)
|
|
||||||
formats.extend(self._extract_mpd_formats(
|
formats.extend(self._extract_mpd_formats(
|
||||||
'https://cloudflarestream.com/%s/manifest/video.mpd' % video_id,
|
manifest_base_url + 'mpd', video_id, mpd_id='dash', fatal=False))
|
||||||
video_id, mpd_id='dash', fatal=False))
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': video_id,
|
'title': video_id,
|
||||||
|
'thumbnail': base_url + 'thumbnails/thumbnail.jpg',
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
@ -1,74 +0,0 @@
|
|||||||
# coding: utf-8
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..compat import compat_str
|
|
||||||
from ..utils import (
|
|
||||||
int_or_none,
|
|
||||||
parse_duration,
|
|
||||||
parse_iso8601,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class ComCarCoffIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?comediansincarsgettingcoffee\.com/(?P<id>[a-z0-9\-]*)'
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'http://comediansincarsgettingcoffee.com/miranda-sings-happy-thanksgiving-miranda/',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '2494164',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'upload_date': '20141127',
|
|
||||||
'timestamp': 1417107600,
|
|
||||||
'duration': 1232,
|
|
||||||
'title': 'Happy Thanksgiving Miranda',
|
|
||||||
'description': 'Jerry Seinfeld and his special guest Miranda Sings cruise around town in search of coffee, complaining and apologizing along the way.',
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': 'requires ffmpeg',
|
|
||||||
}
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
display_id = self._match_id(url)
|
|
||||||
if not display_id:
|
|
||||||
display_id = 'comediansincarsgettingcoffee.com'
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
|
||||||
|
|
||||||
full_data = self._parse_json(
|
|
||||||
self._search_regex(
|
|
||||||
r'window\.app\s*=\s*({.+?});\n', webpage, 'full data json'),
|
|
||||||
display_id)['videoData']
|
|
||||||
|
|
||||||
display_id = full_data['activeVideo']['video']
|
|
||||||
video_data = full_data.get('videos', {}).get(display_id) or full_data['singleshots'][display_id]
|
|
||||||
|
|
||||||
video_id = compat_str(video_data['mediaId'])
|
|
||||||
title = video_data['title']
|
|
||||||
formats = self._extract_m3u8_formats(
|
|
||||||
video_data['mediaUrl'], video_id, 'mp4')
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
thumbnails = [{
|
|
||||||
'url': video_data['images']['thumb'],
|
|
||||||
}, {
|
|
||||||
'url': video_data['images']['poster'],
|
|
||||||
}]
|
|
||||||
|
|
||||||
timestamp = int_or_none(video_data.get('pubDateTime')) or parse_iso8601(
|
|
||||||
video_data.get('pubDate'))
|
|
||||||
duration = int_or_none(video_data.get('durationSeconds')) or parse_duration(
|
|
||||||
video_data.get('duration'))
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'display_id': display_id,
|
|
||||||
'title': title,
|
|
||||||
'description': video_data.get('description'),
|
|
||||||
'timestamp': timestamp,
|
|
||||||
'duration': duration,
|
|
||||||
'thumbnails': thumbnails,
|
|
||||||
'formats': formats,
|
|
||||||
'season_number': int_or_none(video_data.get('season')),
|
|
||||||
'episode_number': int_or_none(video_data.get('episode')),
|
|
||||||
'webpage_url': 'http://comediansincarsgettingcoffee.com/%s' % (video_data.get('urlSlug', video_data.get('slug'))),
|
|
||||||
}
|
|
@ -10,12 +10,13 @@ import os
|
|||||||
import random
|
import random
|
||||||
import re
|
import re
|
||||||
import socket
|
import socket
|
||||||
|
import ssl
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
import math
|
import math
|
||||||
|
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_cookiejar,
|
compat_cookiejar_Cookie,
|
||||||
compat_cookies,
|
compat_cookies,
|
||||||
compat_etree_Element,
|
compat_etree_Element,
|
||||||
compat_etree_fromstring,
|
compat_etree_fromstring,
|
||||||
@ -67,6 +68,7 @@ from ..utils import (
|
|||||||
sanitized_Request,
|
sanitized_Request,
|
||||||
sanitize_filename,
|
sanitize_filename,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
|
str_to_int,
|
||||||
strip_or_none,
|
strip_or_none,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
@ -220,7 +222,7 @@ class InfoExtractor(object):
|
|||||||
* "preference" (optional, int) - quality of the image
|
* "preference" (optional, int) - quality of the image
|
||||||
* "width" (optional, int)
|
* "width" (optional, int)
|
||||||
* "height" (optional, int)
|
* "height" (optional, int)
|
||||||
* "resolution" (optional, string "{width}x{height"},
|
* "resolution" (optional, string "{width}x{height}",
|
||||||
deprecated)
|
deprecated)
|
||||||
* "filesize" (optional, int)
|
* "filesize" (optional, int)
|
||||||
thumbnail: Full URL to a video thumbnail image.
|
thumbnail: Full URL to a video thumbnail image.
|
||||||
@ -623,9 +625,12 @@ class InfoExtractor(object):
|
|||||||
url_or_request = update_url_query(url_or_request, query)
|
url_or_request = update_url_query(url_or_request, query)
|
||||||
if data is not None or headers:
|
if data is not None or headers:
|
||||||
url_or_request = sanitized_Request(url_or_request, data, headers)
|
url_or_request = sanitized_Request(url_or_request, data, headers)
|
||||||
|
exceptions = [compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error]
|
||||||
|
if hasattr(ssl, 'CertificateError'):
|
||||||
|
exceptions.append(ssl.CertificateError)
|
||||||
try:
|
try:
|
||||||
return self._downloader.urlopen(url_or_request)
|
return self._downloader.urlopen(url_or_request)
|
||||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
except tuple(exceptions) as err:
|
||||||
if isinstance(err, compat_urllib_error.HTTPError):
|
if isinstance(err, compat_urllib_error.HTTPError):
|
||||||
if self.__can_accept_status_code(err, expected_status):
|
if self.__can_accept_status_code(err, expected_status):
|
||||||
# Retain reference to error to prevent file object from
|
# Retain reference to error to prevent file object from
|
||||||
@ -1182,16 +1187,33 @@ class InfoExtractor(object):
|
|||||||
'twitter card player')
|
'twitter card player')
|
||||||
|
|
||||||
def _search_json_ld(self, html, video_id, expected_type=None, **kwargs):
|
def _search_json_ld(self, html, video_id, expected_type=None, **kwargs):
|
||||||
json_ld = self._search_regex(
|
json_ld_list = list(re.finditer(JSON_LD_RE, html))
|
||||||
JSON_LD_RE, html, 'JSON-LD', group='json_ld', **kwargs)
|
|
||||||
default = kwargs.get('default', NO_DEFAULT)
|
default = kwargs.get('default', NO_DEFAULT)
|
||||||
if not json_ld:
|
|
||||||
return default if default is not NO_DEFAULT else {}
|
|
||||||
# JSON-LD may be malformed and thus `fatal` should be respected.
|
# JSON-LD may be malformed and thus `fatal` should be respected.
|
||||||
# At the same time `default` may be passed that assumes `fatal=False`
|
# At the same time `default` may be passed that assumes `fatal=False`
|
||||||
# for _search_regex. Let's simulate the same behavior here as well.
|
# for _search_regex. Let's simulate the same behavior here as well.
|
||||||
fatal = kwargs.get('fatal', True) if default == NO_DEFAULT else False
|
fatal = kwargs.get('fatal', True) if default == NO_DEFAULT else False
|
||||||
return self._json_ld(json_ld, video_id, fatal=fatal, expected_type=expected_type)
|
json_ld = []
|
||||||
|
for mobj in json_ld_list:
|
||||||
|
json_ld_item = self._parse_json(
|
||||||
|
mobj.group('json_ld'), video_id, fatal=fatal)
|
||||||
|
if not json_ld_item:
|
||||||
|
continue
|
||||||
|
if isinstance(json_ld_item, dict):
|
||||||
|
json_ld.append(json_ld_item)
|
||||||
|
elif isinstance(json_ld_item, (list, tuple)):
|
||||||
|
json_ld.extend(json_ld_item)
|
||||||
|
if json_ld:
|
||||||
|
json_ld = self._json_ld(json_ld, video_id, fatal=fatal, expected_type=expected_type)
|
||||||
|
if json_ld:
|
||||||
|
return json_ld
|
||||||
|
if default is not NO_DEFAULT:
|
||||||
|
return default
|
||||||
|
elif fatal:
|
||||||
|
raise RegexNotFoundError('Unable to extract JSON-LD')
|
||||||
|
else:
|
||||||
|
self._downloader.report_warning('unable to extract JSON-LD %s' % bug_reports_message())
|
||||||
|
return {}
|
||||||
|
|
||||||
def _json_ld(self, json_ld, video_id, fatal=True, expected_type=None):
|
def _json_ld(self, json_ld, video_id, fatal=True, expected_type=None):
|
||||||
if isinstance(json_ld, compat_str):
|
if isinstance(json_ld, compat_str):
|
||||||
@ -1227,7 +1249,10 @@ class InfoExtractor(object):
|
|||||||
interaction_type = is_e.get('interactionType')
|
interaction_type = is_e.get('interactionType')
|
||||||
if not isinstance(interaction_type, compat_str):
|
if not isinstance(interaction_type, compat_str):
|
||||||
continue
|
continue
|
||||||
interaction_count = int_or_none(is_e.get('userInteractionCount'))
|
# For interaction count some sites provide string instead of
|
||||||
|
# an integer (as per spec) with non digit characters (e.g. ",")
|
||||||
|
# so extracting count with more relaxed str_to_int
|
||||||
|
interaction_count = str_to_int(is_e.get('userInteractionCount'))
|
||||||
if interaction_count is None:
|
if interaction_count is None:
|
||||||
continue
|
continue
|
||||||
count_kind = INTERACTION_TYPE_MAP.get(interaction_type.split('/')[-1])
|
count_kind = INTERACTION_TYPE_MAP.get(interaction_type.split('/')[-1])
|
||||||
@ -1247,6 +1272,7 @@ class InfoExtractor(object):
|
|||||||
'thumbnail': url_or_none(e.get('thumbnailUrl') or e.get('thumbnailURL')),
|
'thumbnail': url_or_none(e.get('thumbnailUrl') or e.get('thumbnailURL')),
|
||||||
'duration': parse_duration(e.get('duration')),
|
'duration': parse_duration(e.get('duration')),
|
||||||
'timestamp': unified_timestamp(e.get('uploadDate')),
|
'timestamp': unified_timestamp(e.get('uploadDate')),
|
||||||
|
'uploader': str_or_none(e.get('author')),
|
||||||
'filesize': float_or_none(e.get('contentSize')),
|
'filesize': float_or_none(e.get('contentSize')),
|
||||||
'tbr': int_or_none(e.get('bitrate')),
|
'tbr': int_or_none(e.get('bitrate')),
|
||||||
'width': int_or_none(e.get('width')),
|
'width': int_or_none(e.get('width')),
|
||||||
@ -1256,10 +1282,10 @@ class InfoExtractor(object):
|
|||||||
extract_interaction_statistic(e)
|
extract_interaction_statistic(e)
|
||||||
|
|
||||||
for e in json_ld:
|
for e in json_ld:
|
||||||
if isinstance(e.get('@context'), compat_str) and re.match(r'^https?://schema.org/?$', e.get('@context')):
|
if '@context' in e:
|
||||||
item_type = e.get('@type')
|
item_type = e.get('@type')
|
||||||
if expected_type is not None and expected_type != item_type:
|
if expected_type is not None and expected_type != item_type:
|
||||||
return info
|
continue
|
||||||
if item_type in ('TVEpisode', 'Episode'):
|
if item_type in ('TVEpisode', 'Episode'):
|
||||||
episode_name = unescapeHTML(e.get('name'))
|
episode_name = unescapeHTML(e.get('name'))
|
||||||
info.update({
|
info.update({
|
||||||
@ -1293,11 +1319,17 @@ class InfoExtractor(object):
|
|||||||
})
|
})
|
||||||
elif item_type == 'VideoObject':
|
elif item_type == 'VideoObject':
|
||||||
extract_video_object(e)
|
extract_video_object(e)
|
||||||
continue
|
if expected_type is None:
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
break
|
||||||
video = e.get('video')
|
video = e.get('video')
|
||||||
if isinstance(video, dict) and video.get('@type') == 'VideoObject':
|
if isinstance(video, dict) and video.get('@type') == 'VideoObject':
|
||||||
extract_video_object(video)
|
extract_video_object(video)
|
||||||
break
|
if expected_type is None:
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
break
|
||||||
return dict((k, v) for k, v in info.items() if v is not None)
|
return dict((k, v) for k, v in info.items() if v is not None)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@ -1424,12 +1456,10 @@ class InfoExtractor(object):
|
|||||||
try:
|
try:
|
||||||
self._request_webpage(url, video_id, 'Checking %s URL' % item, headers=headers)
|
self._request_webpage(url, video_id, 'Checking %s URL' % item, headers=headers)
|
||||||
return True
|
return True
|
||||||
except ExtractorError as e:
|
except ExtractorError:
|
||||||
if isinstance(e.cause, compat_urllib_error.URLError):
|
self.to_screen(
|
||||||
self.to_screen(
|
'%s: %s URL is invalid, skipping' % (video_id, item))
|
||||||
'%s: %s URL is invalid, skipping' % (video_id, item))
|
return False
|
||||||
return False
|
|
||||||
raise
|
|
||||||
|
|
||||||
def http_scheme(self):
|
def http_scheme(self):
|
||||||
""" Either "http:" or "https:", depending on the user's preferences """
|
""" Either "http:" or "https:", depending on the user's preferences """
|
||||||
@ -1457,14 +1487,14 @@ class InfoExtractor(object):
|
|||||||
|
|
||||||
def _extract_f4m_formats(self, manifest_url, video_id, preference=None, f4m_id=None,
|
def _extract_f4m_formats(self, manifest_url, video_id, preference=None, f4m_id=None,
|
||||||
transform_source=lambda s: fix_xml_ampersands(s).strip(),
|
transform_source=lambda s: fix_xml_ampersands(s).strip(),
|
||||||
fatal=True, m3u8_id=None):
|
fatal=True, m3u8_id=None, data=None, headers={}, query={}):
|
||||||
manifest = self._download_xml(
|
manifest = self._download_xml(
|
||||||
manifest_url, video_id, 'Downloading f4m manifest',
|
manifest_url, video_id, 'Downloading f4m manifest',
|
||||||
'Unable to download f4m manifest',
|
'Unable to download f4m manifest',
|
||||||
# Some manifests may be malformed, e.g. prosiebensat1 generated manifests
|
# Some manifests may be malformed, e.g. prosiebensat1 generated manifests
|
||||||
# (see https://github.com/ytdl-org/youtube-dl/issues/6215#issuecomment-121704244)
|
# (see https://github.com/ytdl-org/youtube-dl/issues/6215#issuecomment-121704244)
|
||||||
transform_source=transform_source,
|
transform_source=transform_source,
|
||||||
fatal=fatal)
|
fatal=fatal, data=data, headers=headers, query=query)
|
||||||
|
|
||||||
if manifest is False:
|
if manifest is False:
|
||||||
return []
|
return []
|
||||||
@ -1588,12 +1618,13 @@ class InfoExtractor(object):
|
|||||||
def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
|
def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
|
||||||
entry_protocol='m3u8', preference=None,
|
entry_protocol='m3u8', preference=None,
|
||||||
m3u8_id=None, note=None, errnote=None,
|
m3u8_id=None, note=None, errnote=None,
|
||||||
fatal=True, live=False):
|
fatal=True, live=False, data=None, headers={},
|
||||||
|
query={}):
|
||||||
res = self._download_webpage_handle(
|
res = self._download_webpage_handle(
|
||||||
m3u8_url, video_id,
|
m3u8_url, video_id,
|
||||||
note=note or 'Downloading m3u8 information',
|
note=note or 'Downloading m3u8 information',
|
||||||
errnote=errnote or 'Failed to download m3u8 information',
|
errnote=errnote or 'Failed to download m3u8 information',
|
||||||
fatal=fatal)
|
fatal=fatal, data=data, headers=headers, query=query)
|
||||||
|
|
||||||
if res is False:
|
if res is False:
|
||||||
return []
|
return []
|
||||||
@ -1767,6 +1798,19 @@ class InfoExtractor(object):
|
|||||||
# the same GROUP-ID
|
# the same GROUP-ID
|
||||||
f['acodec'] = 'none'
|
f['acodec'] = 'none'
|
||||||
formats.append(f)
|
formats.append(f)
|
||||||
|
|
||||||
|
# for DailyMotion
|
||||||
|
progressive_uri = last_stream_inf.get('PROGRESSIVE-URI')
|
||||||
|
if progressive_uri:
|
||||||
|
http_f = f.copy()
|
||||||
|
del http_f['manifest_url']
|
||||||
|
http_f.update({
|
||||||
|
'format_id': f['format_id'].replace('hls-', 'http-'),
|
||||||
|
'protocol': 'http',
|
||||||
|
'url': progressive_uri,
|
||||||
|
})
|
||||||
|
formats.append(http_f)
|
||||||
|
|
||||||
last_stream_inf = {}
|
last_stream_inf = {}
|
||||||
return formats
|
return formats
|
||||||
|
|
||||||
@ -2011,12 +2055,12 @@ class InfoExtractor(object):
|
|||||||
})
|
})
|
||||||
return entries
|
return entries
|
||||||
|
|
||||||
def _extract_mpd_formats(self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, fatal=True, formats_dict={}):
|
def _extract_mpd_formats(self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, fatal=True, formats_dict={}, data=None, headers={}, query={}):
|
||||||
res = self._download_xml_handle(
|
res = self._download_xml_handle(
|
||||||
mpd_url, video_id,
|
mpd_url, video_id,
|
||||||
note=note or 'Downloading MPD manifest',
|
note=note or 'Downloading MPD manifest',
|
||||||
errnote=errnote or 'Failed to download MPD manifest',
|
errnote=errnote or 'Failed to download MPD manifest',
|
||||||
fatal=fatal)
|
fatal=fatal, data=data, headers=headers, query=query)
|
||||||
if res is False:
|
if res is False:
|
||||||
return []
|
return []
|
||||||
mpd_doc, urlh = res
|
mpd_doc, urlh = res
|
||||||
@ -2319,15 +2363,17 @@ class InfoExtractor(object):
|
|||||||
self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
|
self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
|
||||||
return formats
|
return formats
|
||||||
|
|
||||||
def _extract_ism_formats(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True):
|
def _extract_ism_formats(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True, data=None, headers={}, query={}):
|
||||||
res = self._download_xml_handle(
|
res = self._download_xml_handle(
|
||||||
ism_url, video_id,
|
ism_url, video_id,
|
||||||
note=note or 'Downloading ISM manifest',
|
note=note or 'Downloading ISM manifest',
|
||||||
errnote=errnote or 'Failed to download ISM manifest',
|
errnote=errnote or 'Failed to download ISM manifest',
|
||||||
fatal=fatal)
|
fatal=fatal, data=data, headers=headers, query=query)
|
||||||
if res is False:
|
if res is False:
|
||||||
return []
|
return []
|
||||||
ism_doc, urlh = res
|
ism_doc, urlh = res
|
||||||
|
if ism_doc is None:
|
||||||
|
return []
|
||||||
|
|
||||||
return self._parse_ism_formats(ism_doc, urlh.geturl(), ism_id)
|
return self._parse_ism_formats(ism_doc, urlh.geturl(), ism_id)
|
||||||
|
|
||||||
@ -2691,7 +2737,7 @@ class InfoExtractor(object):
|
|||||||
entry = {
|
entry = {
|
||||||
'id': this_video_id,
|
'id': this_video_id,
|
||||||
'title': unescapeHTML(video_data['title'] if require_title else video_data.get('title')),
|
'title': unescapeHTML(video_data['title'] if require_title else video_data.get('title')),
|
||||||
'description': video_data.get('description'),
|
'description': clean_html(video_data.get('description')),
|
||||||
'thumbnail': urljoin(base_url, self._proto_relative_url(video_data.get('image'))),
|
'thumbnail': urljoin(base_url, self._proto_relative_url(video_data.get('image'))),
|
||||||
'timestamp': int_or_none(video_data.get('pubdate')),
|
'timestamp': int_or_none(video_data.get('pubdate')),
|
||||||
'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')),
|
'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')),
|
||||||
@ -2806,7 +2852,7 @@ class InfoExtractor(object):
|
|||||||
|
|
||||||
def _set_cookie(self, domain, name, value, expire_time=None, port=None,
|
def _set_cookie(self, domain, name, value, expire_time=None, port=None,
|
||||||
path='/', secure=False, discard=False, rest={}, **kwargs):
|
path='/', secure=False, discard=False, rest={}, **kwargs):
|
||||||
cookie = compat_cookiejar.Cookie(
|
cookie = compat_cookiejar_Cookie(
|
||||||
0, name, value, port, port is not None, domain, True,
|
0, name, value, port, port is not None, domain, True,
|
||||||
domain.startswith('.'), path, True, secure, expire_time,
|
domain.startswith('.'), path, True, secure, expire_time,
|
||||||
discard, None, None, rest)
|
discard, None, None, rest)
|
||||||
|
118
youtube_dl/extractor/contv.py
Normal file
118
youtube_dl/extractor/contv.py
Normal file
@ -0,0 +1,118 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class CONtvIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?contv\.com/details-movie/(?P<id>[^/]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.contv.com/details-movie/CEG10022949/days-of-thrills-&-laughter',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'CEG10022949',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Days Of Thrills & Laughter',
|
||||||
|
'description': 'md5:5d6b3d0b1829bb93eb72898c734802eb',
|
||||||
|
'upload_date': '20180703',
|
||||||
|
'timestamp': 1530634789.61,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.contv.com/details-movie/CLIP-show_fotld_bts/fight-of-the-living-dead:-behind-the-scenes-bites',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'CLIP-show_fotld_bts',
|
||||||
|
'title': 'Fight of the Living Dead: Behind the Scenes Bites',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 7,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
details = self._download_json(
|
||||||
|
'http://metax.contv.live.junctiontv.net/metax/2.5/details/' + video_id,
|
||||||
|
video_id, query={'device': 'web'})
|
||||||
|
|
||||||
|
if details.get('type') == 'episodic':
|
||||||
|
seasons = self._download_json(
|
||||||
|
'http://metax.contv.live.junctiontv.net/metax/2.5/seriesfeed/json/' + video_id,
|
||||||
|
video_id)
|
||||||
|
entries = []
|
||||||
|
for season in seasons:
|
||||||
|
for episode in season.get('episodes', []):
|
||||||
|
episode_id = episode.get('id')
|
||||||
|
if not episode_id:
|
||||||
|
continue
|
||||||
|
entries.append(self.url_result(
|
||||||
|
'https://www.contv.com/details-movie/' + episode_id,
|
||||||
|
CONtvIE.ie_key(), episode_id))
|
||||||
|
return self.playlist_result(entries, video_id, details.get('title'))
|
||||||
|
|
||||||
|
m_details = details['details']
|
||||||
|
title = details['title']
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
|
||||||
|
media_hls_url = m_details.get('media_hls_url')
|
||||||
|
if media_hls_url:
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
media_hls_url, video_id, 'mp4',
|
||||||
|
m3u8_id='hls', fatal=False))
|
||||||
|
|
||||||
|
media_mp4_url = m_details.get('media_mp4_url')
|
||||||
|
if media_mp4_url:
|
||||||
|
formats.append({
|
||||||
|
'format_id': 'http',
|
||||||
|
'url': media_mp4_url,
|
||||||
|
})
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
captions = m_details.get('captions') or {}
|
||||||
|
for caption_url in captions.values():
|
||||||
|
subtitles.setdefault('en', []).append({
|
||||||
|
'url': caption_url
|
||||||
|
})
|
||||||
|
|
||||||
|
thumbnails = []
|
||||||
|
for image in m_details.get('images', []):
|
||||||
|
image_url = image.get('url')
|
||||||
|
if not image_url:
|
||||||
|
continue
|
||||||
|
thumbnails.append({
|
||||||
|
'url': image_url,
|
||||||
|
'width': int_or_none(image.get('width')),
|
||||||
|
'height': int_or_none(image.get('height')),
|
||||||
|
})
|
||||||
|
|
||||||
|
description = None
|
||||||
|
for p in ('large_', 'medium_', 'small_', ''):
|
||||||
|
d = m_details.get(p + 'description')
|
||||||
|
if d:
|
||||||
|
description = d
|
||||||
|
break
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'formats': formats,
|
||||||
|
'thumbnails': thumbnails,
|
||||||
|
'description': description,
|
||||||
|
'timestamp': float_or_none(details.get('metax_added_on'), 1000),
|
||||||
|
'subtitles': subtitles,
|
||||||
|
'duration': float_or_none(m_details.get('duration'), 1000),
|
||||||
|
'view_count': int_or_none(details.get('num_watched')),
|
||||||
|
'like_count': int_or_none(details.get('num_fav')),
|
||||||
|
'categories': details.get('category'),
|
||||||
|
'tags': details.get('tags'),
|
||||||
|
'season_number': int_or_none(details.get('season')),
|
||||||
|
'episode_number': int_or_none(details.get('episode')),
|
||||||
|
'release_year': int_or_none(details.get('pub_year')),
|
||||||
|
}
|
@ -4,7 +4,12 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .theplatform import ThePlatformFeedIE
|
from .theplatform import ThePlatformFeedIE
|
||||||
from ..utils import int_or_none
|
from ..utils import (
|
||||||
|
dict_get,
|
||||||
|
ExtractorError,
|
||||||
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class CorusIE(ThePlatformFeedIE):
|
class CorusIE(ThePlatformFeedIE):
|
||||||
@ -12,24 +17,49 @@ class CorusIE(ThePlatformFeedIE):
|
|||||||
https?://
|
https?://
|
||||||
(?:www\.)?
|
(?:www\.)?
|
||||||
(?P<domain>
|
(?P<domain>
|
||||||
(?:globaltv|etcanada)\.com|
|
(?:
|
||||||
(?:hgtv|foodnetwork|slice|history|showcase|bigbrothercanada)\.ca
|
globaltv|
|
||||||
|
etcanada|
|
||||||
|
seriesplus|
|
||||||
|
wnetwork|
|
||||||
|
ytv
|
||||||
|
)\.com|
|
||||||
|
(?:
|
||||||
|
hgtv|
|
||||||
|
foodnetwork|
|
||||||
|
slice|
|
||||||
|
history|
|
||||||
|
showcase|
|
||||||
|
bigbrothercanada|
|
||||||
|
abcspark|
|
||||||
|
disney(?:channel|lachaine)
|
||||||
|
)\.ca
|
||||||
|
)
|
||||||
|
/(?:[^/]+/)*
|
||||||
|
(?:
|
||||||
|
video\.html\?.*?\bv=|
|
||||||
|
videos?/(?:[^/]+/)*(?:[a-z0-9-]+-)?
|
||||||
|
)
|
||||||
|
(?P<id>
|
||||||
|
[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}|
|
||||||
|
(?:[A-Z]{4})?\d{12,20}
|
||||||
)
|
)
|
||||||
/(?:video/(?:[^/]+/)?|(?:[^/]+/)+(?:videos/[a-z0-9-]+-|video\.html\?.*?\bv=))
|
|
||||||
(?P<id>\d+)
|
|
||||||
'''
|
'''
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.hgtv.ca/shows/bryan-inc/videos/movie-night-popcorn-with-bryan-870923331648/',
|
'url': 'http://www.hgtv.ca/shows/bryan-inc/videos/movie-night-popcorn-with-bryan-870923331648/',
|
||||||
'md5': '05dcbca777bf1e58c2acbb57168ad3a6',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '870923331648',
|
'id': '870923331648',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Movie Night Popcorn with Bryan',
|
'title': 'Movie Night Popcorn with Bryan',
|
||||||
'description': 'Bryan whips up homemade popcorn, the old fashion way for Jojo and Lincoln.',
|
'description': 'Bryan whips up homemade popcorn, the old fashion way for Jojo and Lincoln.',
|
||||||
'uploader': 'SHWM-NEW',
|
|
||||||
'upload_date': '20170206',
|
'upload_date': '20170206',
|
||||||
'timestamp': 1486392197,
|
'timestamp': 1486392197,
|
||||||
},
|
},
|
||||||
|
'params': {
|
||||||
|
'format': 'bestvideo',
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
'expected_warnings': ['Failed to parse JSON'],
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.foodnetwork.ca/shows/chopped/video/episode/chocolate-obsession/video.html?v=872683587753',
|
'url': 'http://www.foodnetwork.ca/shows/chopped/video/episode/chocolate-obsession/video.html?v=872683587753',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -48,58 +78,83 @@ class CorusIE(ThePlatformFeedIE):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'https://www.bigbrothercanada.ca/video/big-brother-canada-704/1457812035894/',
|
'url': 'https://www.bigbrothercanada.ca/video/big-brother-canada-704/1457812035894/',
|
||||||
'only_matching': True
|
'only_matching': True
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.seriesplus.com/emissions/dre-mary-mort-sur-ordonnance/videos/deux-coeurs-battant/SERP0055626330000200/',
|
||||||
|
'only_matching': True
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.disneychannel.ca/shows/gabby-duran-the-unsittables/video/crybaby-duran-clip/2f557eec-0588-11ea-ae2b-e2c6776b770e/',
|
||||||
|
'only_matching': True
|
||||||
}]
|
}]
|
||||||
|
_GEO_BYPASS = False
|
||||||
_TP_FEEDS = {
|
_SITE_MAP = {
|
||||||
'globaltv': {
|
'globaltv': 'series',
|
||||||
'feed_id': 'ChQqrem0lNUp',
|
'etcanada': 'series',
|
||||||
'account_id': 2269680845,
|
'foodnetwork': 'food',
|
||||||
},
|
'bigbrothercanada': 'series',
|
||||||
'etcanada': {
|
'disneychannel': 'disneyen',
|
||||||
'feed_id': 'ChQqrem0lNUp',
|
'disneylachaine': 'disneyfr',
|
||||||
'account_id': 2269680845,
|
|
||||||
},
|
|
||||||
'hgtv': {
|
|
||||||
'feed_id': 'L0BMHXi2no43',
|
|
||||||
'account_id': 2414428465,
|
|
||||||
},
|
|
||||||
'foodnetwork': {
|
|
||||||
'feed_id': 'ukK8o58zbRmJ',
|
|
||||||
'account_id': 2414429569,
|
|
||||||
},
|
|
||||||
'slice': {
|
|
||||||
'feed_id': '5tUJLgV2YNJ5',
|
|
||||||
'account_id': 2414427935,
|
|
||||||
},
|
|
||||||
'history': {
|
|
||||||
'feed_id': 'tQFx_TyyEq4J',
|
|
||||||
'account_id': 2369613659,
|
|
||||||
},
|
|
||||||
'showcase': {
|
|
||||||
'feed_id': '9H6qyshBZU3E',
|
|
||||||
'account_id': 2414426607,
|
|
||||||
},
|
|
||||||
'bigbrothercanada': {
|
|
||||||
'feed_id': 'ChQqrem0lNUp',
|
|
||||||
'account_id': 2269680845,
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
domain, video_id = re.match(self._VALID_URL, url).groups()
|
domain, video_id = re.match(self._VALID_URL, url).groups()
|
||||||
feed_info = self._TP_FEEDS[domain.split('.')[0]]
|
site = domain.split('.')[0]
|
||||||
return self._extract_feed_info('dtjsEC', feed_info['feed_id'], 'byId=' + video_id, video_id, lambda e: {
|
path = self._SITE_MAP.get(site, site)
|
||||||
'episode_number': int_or_none(e.get('pl1$episode')),
|
if path != 'series':
|
||||||
'season_number': int_or_none(e.get('pl1$season')),
|
path = 'migration/' + path
|
||||||
'series': e.get('pl1$show'),
|
video = self._download_json(
|
||||||
}, {
|
'https://globalcontent.corusappservices.com/templates/%s/playlist/' % path,
|
||||||
'HLS': {
|
video_id, query={'byId': video_id},
|
||||||
'manifest': 'm3u',
|
headers={'Accept': 'application/json'})[0]
|
||||||
},
|
title = video['title']
|
||||||
'DesktopHLS Default': {
|
|
||||||
'manifest': 'm3u',
|
formats = []
|
||||||
},
|
for source in video.get('sources', []):
|
||||||
'MP4 MBR': {
|
smil_url = source.get('file')
|
||||||
'manifest': 'm3u',
|
if not smil_url:
|
||||||
},
|
continue
|
||||||
}, feed_info['account_id'])
|
source_type = source.get('type')
|
||||||
|
note = 'Downloading%s smil file' % (' ' + source_type if source_type else '')
|
||||||
|
resp = self._download_webpage(
|
||||||
|
smil_url, video_id, note, fatal=False,
|
||||||
|
headers=self.geo_verification_headers())
|
||||||
|
if not resp:
|
||||||
|
continue
|
||||||
|
error = self._parse_json(resp, video_id, fatal=False)
|
||||||
|
if error:
|
||||||
|
if error.get('exception') == 'GeoLocationBlocked':
|
||||||
|
self.raise_geo_restricted(countries=['CA'])
|
||||||
|
raise ExtractorError(error['description'])
|
||||||
|
smil = self._parse_xml(resp, video_id, fatal=False)
|
||||||
|
if smil is None:
|
||||||
|
continue
|
||||||
|
namespace = self._parse_smil_namespace(smil)
|
||||||
|
formats.extend(self._parse_smil_formats(
|
||||||
|
smil, smil_url, video_id, namespace))
|
||||||
|
if not formats and video.get('drm'):
|
||||||
|
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
for track in video.get('tracks', []):
|
||||||
|
track_url = track.get('file')
|
||||||
|
if not track_url:
|
||||||
|
continue
|
||||||
|
lang = 'fr' if site in ('disneylachaine', 'seriesplus') else 'en'
|
||||||
|
subtitles.setdefault(lang, []).append({'url': track_url})
|
||||||
|
|
||||||
|
metadata = video.get('metadata') or {}
|
||||||
|
get_number = lambda x: int_or_none(video.get('pl1$' + x) or metadata.get(x + 'Number'))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'formats': formats,
|
||||||
|
'thumbnail': dict_get(video, ('defaultThumbnailUrl', 'thumbnail', 'image')),
|
||||||
|
'description': video.get('description'),
|
||||||
|
'timestamp': int_or_none(video.get('availableDate'), 1000),
|
||||||
|
'subtitles': subtitles,
|
||||||
|
'duration': float_or_none(metadata.get('duration')),
|
||||||
|
'series': dict_get(video, ('show', 'pl1$show')),
|
||||||
|
'season_number': get_number('season'),
|
||||||
|
'episode_number': get_number('episode'),
|
||||||
|
}
|
||||||
|
@ -13,6 +13,7 @@ from ..compat import (
|
|||||||
compat_b64decode,
|
compat_b64decode,
|
||||||
compat_etree_Element,
|
compat_etree_Element,
|
||||||
compat_etree_fromstring,
|
compat_etree_fromstring,
|
||||||
|
compat_str,
|
||||||
compat_urllib_parse_urlencode,
|
compat_urllib_parse_urlencode,
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
@ -25,9 +26,9 @@ from ..utils import (
|
|||||||
intlist_to_bytes,
|
intlist_to_bytes,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
lowercase_escape,
|
lowercase_escape,
|
||||||
|
merge_dicts,
|
||||||
remove_end,
|
remove_end,
|
||||||
sanitized_Request,
|
sanitized_Request,
|
||||||
unified_strdate,
|
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
xpath_text,
|
xpath_text,
|
||||||
)
|
)
|
||||||
@ -136,6 +137,7 @@ class CrunchyrollIE(CrunchyrollBaseIE, VRVIE):
|
|||||||
# rtmp
|
# rtmp
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
'skip': 'Video gone',
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.crunchyroll.com/media-589804/culture-japan-1',
|
'url': 'http://www.crunchyroll.com/media-589804/culture-japan-1',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -157,11 +159,12 @@ class CrunchyrollIE(CrunchyrollBaseIE, VRVIE):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '702409',
|
'id': '702409',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Re:ZERO -Starting Life in Another World- Episode 5 – The Morning of Our Promise Is Still Distant',
|
'title': compat_str,
|
||||||
'description': 'md5:97664de1ab24bbf77a9c01918cb7dca9',
|
'description': compat_str,
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
'uploader': 'TV TOKYO',
|
'uploader': 'Re:Zero Partners',
|
||||||
'upload_date': '20160508',
|
'timestamp': 1462098900,
|
||||||
|
'upload_date': '20160501',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
# m3u8 download
|
# m3u8 download
|
||||||
@ -172,12 +175,13 @@ class CrunchyrollIE(CrunchyrollBaseIE, VRVIE):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '727589',
|
'id': '727589',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': "KONOSUBA -God's blessing on this wonderful world! 2 Episode 1 – Give Me Deliverance From This Judicial Injustice!",
|
'title': compat_str,
|
||||||
'description': 'md5:cbcf05e528124b0f3a0a419fc805ea7d',
|
'description': compat_str,
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
'uploader': 'Kadokawa Pictures Inc.',
|
'uploader': 'Kadokawa Pictures Inc.',
|
||||||
'upload_date': '20170118',
|
'timestamp': 1484130900,
|
||||||
'series': "KONOSUBA -God's blessing on this wonderful world!",
|
'upload_date': '20170111',
|
||||||
|
'series': compat_str,
|
||||||
'season': "KONOSUBA -God's blessing on this wonderful world! 2",
|
'season': "KONOSUBA -God's blessing on this wonderful world! 2",
|
||||||
'season_number': 2,
|
'season_number': 2,
|
||||||
'episode': 'Give Me Deliverance From This Judicial Injustice!',
|
'episode': 'Give Me Deliverance From This Judicial Injustice!',
|
||||||
@ -200,10 +204,11 @@ class CrunchyrollIE(CrunchyrollBaseIE, VRVIE):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '535080',
|
'id': '535080',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '11eyes Episode 1 – Red Night ~ Piros éjszaka',
|
'title': compat_str,
|
||||||
'description': 'Kakeru and Yuka are thrown into an alternate nightmarish world they call "Red Night".',
|
'description': compat_str,
|
||||||
'uploader': 'Marvelous AQL Inc.',
|
'uploader': 'Marvelous AQL Inc.',
|
||||||
'upload_date': '20091021',
|
'timestamp': 1255512600,
|
||||||
|
'upload_date': '20091014',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
# Just test metadata extraction
|
# Just test metadata extraction
|
||||||
@ -224,15 +229,17 @@ class CrunchyrollIE(CrunchyrollBaseIE, VRVIE):
|
|||||||
# just test metadata extraction
|
# just test metadata extraction
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
'skip': 'Video gone',
|
||||||
}, {
|
}, {
|
||||||
# A video with a vastly different season name compared to the series name
|
# A video with a vastly different season name compared to the series name
|
||||||
'url': 'http://www.crunchyroll.com/nyarko-san-another-crawling-chaos/episode-1-test-590532',
|
'url': 'http://www.crunchyroll.com/nyarko-san-another-crawling-chaos/episode-1-test-590532',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '590532',
|
'id': '590532',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Haiyoru! Nyaruani (ONA) Episode 1 – Test',
|
'title': compat_str,
|
||||||
'description': 'Mahiro and Nyaruko talk about official certification.',
|
'description': compat_str,
|
||||||
'uploader': 'TV TOKYO',
|
'uploader': 'TV TOKYO',
|
||||||
|
'timestamp': 1330956000,
|
||||||
'upload_date': '20120305',
|
'upload_date': '20120305',
|
||||||
'series': 'Nyarko-san: Another Crawling Chaos',
|
'series': 'Nyarko-san: Another Crawling Chaos',
|
||||||
'season': 'Haiyoru! Nyaruani (ONA)',
|
'season': 'Haiyoru! Nyaruani (ONA)',
|
||||||
@ -442,23 +449,21 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
|||||||
webpage, 'language', default=None, group='lang')
|
webpage, 'language', default=None, group='lang')
|
||||||
|
|
||||||
video_title = self._html_search_regex(
|
video_title = self._html_search_regex(
|
||||||
r'(?s)<h1[^>]*>((?:(?!<h1).)*?<span[^>]+itemprop=["\']title["\'][^>]*>(?:(?!<h1).)+?)</h1>',
|
(r'(?s)<h1[^>]*>((?:(?!<h1).)*?<(?:span[^>]+itemprop=["\']title["\']|meta[^>]+itemprop=["\']position["\'])[^>]*>(?:(?!<h1).)+?)</h1>',
|
||||||
webpage, 'video_title')
|
r'<title>(.+?),\s+-\s+.+? Crunchyroll'),
|
||||||
|
webpage, 'video_title', default=None)
|
||||||
|
if not video_title:
|
||||||
|
video_title = re.sub(r'^Watch\s+', '', self._og_search_description(webpage))
|
||||||
video_title = re.sub(r' {2,}', ' ', video_title)
|
video_title = re.sub(r' {2,}', ' ', video_title)
|
||||||
video_description = (self._parse_json(self._html_search_regex(
|
video_description = (self._parse_json(self._html_search_regex(
|
||||||
r'<script[^>]*>\s*.+?\[media_id=%s\].+?({.+?"description"\s*:.+?})\);' % video_id,
|
r'<script[^>]*>\s*.+?\[media_id=%s\].+?({.+?"description"\s*:.+?})\);' % video_id,
|
||||||
webpage, 'description', default='{}'), video_id) or media_metadata).get('description')
|
webpage, 'description', default='{}'), video_id) or media_metadata).get('description')
|
||||||
if video_description:
|
if video_description:
|
||||||
video_description = lowercase_escape(video_description.replace(r'\r\n', '\n'))
|
video_description = lowercase_escape(video_description.replace(r'\r\n', '\n'))
|
||||||
video_upload_date = self._html_search_regex(
|
|
||||||
[r'<div>Availability for free users:(.+?)</div>', r'<div>[^<>]+<span>\s*(.+?\d{4})\s*</span></div>'],
|
|
||||||
webpage, 'video_upload_date', fatal=False, flags=re.DOTALL)
|
|
||||||
if video_upload_date:
|
|
||||||
video_upload_date = unified_strdate(video_upload_date)
|
|
||||||
video_uploader = self._html_search_regex(
|
video_uploader = self._html_search_regex(
|
||||||
# try looking for both an uploader that's a link and one that's not
|
# try looking for both an uploader that's a link and one that's not
|
||||||
[r'<a[^>]+href="/publisher/[^"]+"[^>]*>([^<]+)</a>', r'<div>\s*Publisher:\s*<span>\s*(.+?)\s*</span>\s*</div>'],
|
[r'<a[^>]+href="/publisher/[^"]+"[^>]*>([^<]+)</a>', r'<div>\s*Publisher:\s*<span>\s*(.+?)\s*</span>\s*</div>'],
|
||||||
webpage, 'video_uploader', fatal=False)
|
webpage, 'video_uploader', default=False)
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for stream in media.get('streams', []):
|
for stream in media.get('streams', []):
|
||||||
@ -611,14 +616,15 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
|||||||
r'(?s)<h\d[^>]+id=["\']showmedia_about_episode_num[^>]+>.+?</h\d>\s*<h4>\s*Season (\d+)',
|
r'(?s)<h\d[^>]+id=["\']showmedia_about_episode_num[^>]+>.+?</h\d>\s*<h4>\s*Season (\d+)',
|
||||||
webpage, 'season number', default=None))
|
webpage, 'season number', default=None))
|
||||||
|
|
||||||
return {
|
info = self._search_json_ld(webpage, video_id, default={})
|
||||||
|
|
||||||
|
return merge_dicts({
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
'description': video_description,
|
'description': video_description,
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'uploader': video_uploader,
|
'uploader': video_uploader,
|
||||||
'upload_date': video_upload_date,
|
|
||||||
'series': series,
|
'series': series,
|
||||||
'season': season,
|
'season': season,
|
||||||
'season_number': season_number,
|
'season_number': season_number,
|
||||||
@ -626,7 +632,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
|||||||
'episode_number': episode_number,
|
'episode_number': episode_number,
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}, info)
|
||||||
|
|
||||||
|
|
||||||
class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE):
|
class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE):
|
||||||
|
@ -3,6 +3,7 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import unified_timestamp
|
from ..utils import unified_timestamp
|
||||||
|
from .youtube import YoutubeIE
|
||||||
|
|
||||||
|
|
||||||
class CtsNewsIE(InfoExtractor):
|
class CtsNewsIE(InfoExtractor):
|
||||||
@ -14,8 +15,8 @@ class CtsNewsIE(InfoExtractor):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '201501291578109',
|
'id': '201501291578109',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '以色列.真主黨交火 3人死亡',
|
'title': '以色列.真主黨交火 3人死亡 - 華視新聞網',
|
||||||
'description': '以色列和黎巴嫩真主黨,爆發五年最嚴重衝突,雙方砲轟交火,兩名以軍死亡,還有一名西班牙籍的聯合國維和人...',
|
'description': '以色列和黎巴嫩真主黨,爆發五年最嚴重衝突,雙方砲轟交火,兩名以軍死亡,還有一名西班牙籍的聯合國維和人員也不幸罹難。大陸陝西、河南、安徽、江蘇和湖北五個省份出現大暴雪,嚴重影響陸空交通,不過九華山卻出現...',
|
||||||
'timestamp': 1422528540,
|
'timestamp': 1422528540,
|
||||||
'upload_date': '20150129',
|
'upload_date': '20150129',
|
||||||
}
|
}
|
||||||
@ -26,7 +27,7 @@ class CtsNewsIE(InfoExtractor):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '201309031304098',
|
'id': '201309031304098',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '韓國31歲童顏男 貌如十多歲小孩',
|
'title': '韓國31歲童顏男 貌如十多歲小孩 - 華視新聞網',
|
||||||
'description': '越有年紀的人,越希望看起來年輕一點,而南韓卻有一位31歲的男子,看起來像是11、12歲的小孩,身...',
|
'description': '越有年紀的人,越希望看起來年輕一點,而南韓卻有一位31歲的男子,看起來像是11、12歲的小孩,身...',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
'timestamp': 1378205880,
|
'timestamp': 1378205880,
|
||||||
@ -62,8 +63,7 @@ class CtsNewsIE(InfoExtractor):
|
|||||||
video_url = mp4_feed['source_url']
|
video_url = mp4_feed['source_url']
|
||||||
else:
|
else:
|
||||||
self.to_screen('Not CTSPlayer video, trying Youtube...')
|
self.to_screen('Not CTSPlayer video, trying Youtube...')
|
||||||
youtube_url = self._search_regex(
|
youtube_url = YoutubeIE._extract_url(page)
|
||||||
r'src="(//www\.youtube\.com/embed/[^"]+)"', page, 'youtube url')
|
|
||||||
|
|
||||||
return self.url_result(youtube_url, ie='Youtube')
|
return self.url_result(youtube_url, ie='Youtube')
|
||||||
|
|
||||||
|
@ -1,64 +1,105 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import base64
|
|
||||||
import functools
|
import functools
|
||||||
import hashlib
|
|
||||||
import itertools
|
|
||||||
import json
|
import json
|
||||||
import random
|
|
||||||
import re
|
import re
|
||||||
import string
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_struct_pack
|
from ..compat import compat_HTTPError
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
age_restricted,
|
||||||
error_to_compat_str,
|
clean_html,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
mimetype2ext,
|
|
||||||
OnDemandPagedList,
|
OnDemandPagedList,
|
||||||
parse_iso8601,
|
|
||||||
sanitized_Request,
|
|
||||||
str_to_int,
|
|
||||||
try_get,
|
try_get,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
update_url_query,
|
|
||||||
url_or_none,
|
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class DailymotionBaseInfoExtractor(InfoExtractor):
|
class DailymotionBaseInfoExtractor(InfoExtractor):
|
||||||
|
_FAMILY_FILTER = None
|
||||||
|
_HEADERS = {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'Origin': 'https://www.dailymotion.com',
|
||||||
|
}
|
||||||
|
_NETRC_MACHINE = 'dailymotion'
|
||||||
|
|
||||||
|
def _get_dailymotion_cookies(self):
|
||||||
|
return self._get_cookies('https://www.dailymotion.com/')
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _build_request(url):
|
def _get_cookie_value(cookies, name):
|
||||||
"""Build a request with the family filter disabled"""
|
cookie = cookies.get(name)
|
||||||
request = sanitized_Request(url)
|
if cookie:
|
||||||
request.add_header('Cookie', 'family_filter=off; ff=off')
|
return cookie.value
|
||||||
return request
|
|
||||||
|
|
||||||
def _download_webpage_handle_no_ff(self, url, *args, **kwargs):
|
def _set_dailymotion_cookie(self, name, value):
|
||||||
request = self._build_request(url)
|
self._set_cookie('www.dailymotion.com', name, value)
|
||||||
return self._download_webpage_handle(request, *args, **kwargs)
|
|
||||||
|
|
||||||
def _download_webpage_no_ff(self, url, *args, **kwargs):
|
def _real_initialize(self):
|
||||||
request = self._build_request(url)
|
cookies = self._get_dailymotion_cookies()
|
||||||
return self._download_webpage(request, *args, **kwargs)
|
ff = self._get_cookie_value(cookies, 'ff')
|
||||||
|
self._FAMILY_FILTER = ff == 'on' if ff else age_restricted(18, self._downloader.params.get('age_limit'))
|
||||||
|
self._set_dailymotion_cookie('ff', 'on' if self._FAMILY_FILTER else 'off')
|
||||||
|
|
||||||
|
def _call_api(self, object_type, xid, object_fields, note, filter_extra=None):
|
||||||
|
if not self._HEADERS.get('Authorization'):
|
||||||
|
cookies = self._get_dailymotion_cookies()
|
||||||
|
token = self._get_cookie_value(cookies, 'access_token') or self._get_cookie_value(cookies, 'client_token')
|
||||||
|
if not token:
|
||||||
|
data = {
|
||||||
|
'client_id': 'f1a362d288c1b98099c7',
|
||||||
|
'client_secret': 'eea605b96e01c796ff369935357eca920c5da4c5',
|
||||||
|
}
|
||||||
|
username, password = self._get_login_info()
|
||||||
|
if username:
|
||||||
|
data.update({
|
||||||
|
'grant_type': 'password',
|
||||||
|
'password': password,
|
||||||
|
'username': username,
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
data['grant_type'] = 'client_credentials'
|
||||||
|
try:
|
||||||
|
token = self._download_json(
|
||||||
|
'https://graphql.api.dailymotion.com/oauth/token',
|
||||||
|
None, 'Downloading Access Token',
|
||||||
|
data=urlencode_postdata(data))['access_token']
|
||||||
|
except ExtractorError as e:
|
||||||
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
|
||||||
|
raise ExtractorError(self._parse_json(
|
||||||
|
e.cause.read().decode(), xid)['error_description'], expected=True)
|
||||||
|
raise
|
||||||
|
self._set_dailymotion_cookie('access_token' if username else 'client_token', token)
|
||||||
|
self._HEADERS['Authorization'] = 'Bearer ' + token
|
||||||
|
|
||||||
|
resp = self._download_json(
|
||||||
|
'https://graphql.api.dailymotion.com/', xid, note, data=json.dumps({
|
||||||
|
'query': '''{
|
||||||
|
%s(xid: "%s"%s) {
|
||||||
|
%s
|
||||||
|
}
|
||||||
|
}''' % (object_type, xid, ', ' + filter_extra if filter_extra else '', object_fields),
|
||||||
|
}).encode(), headers=self._HEADERS)
|
||||||
|
obj = resp['data'][object_type]
|
||||||
|
if not obj:
|
||||||
|
raise ExtractorError(resp['errors'][0]['message'], expected=True)
|
||||||
|
return obj
|
||||||
|
|
||||||
|
|
||||||
class DailymotionIE(DailymotionBaseInfoExtractor):
|
class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||||
_VALID_URL = r'(?i)https?://(?:(www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(?:(?:embed|swf|#)/)?video|swf)/(?P<id>[^/?_]+)'
|
_VALID_URL = r'''(?ix)
|
||||||
|
https?://
|
||||||
|
(?:
|
||||||
|
(?:(?:www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(?:(?:embed|swf|\#)/)?video|swf)|
|
||||||
|
(?:www\.)?lequipe\.fr/video
|
||||||
|
)
|
||||||
|
/(?P<id>[^/?_]+)(?:.+?\bplaylist=(?P<playlist_id>x[0-9a-z]+))?
|
||||||
|
'''
|
||||||
IE_NAME = 'dailymotion'
|
IE_NAME = 'dailymotion'
|
||||||
|
|
||||||
_FORMATS = [
|
|
||||||
('stream_h264_ld_url', 'ld'),
|
|
||||||
('stream_h264_url', 'standard'),
|
|
||||||
('stream_h264_hq_url', 'hq'),
|
|
||||||
('stream_h264_hd_url', 'hd'),
|
|
||||||
('stream_h264_hd1080_url', 'hd180'),
|
|
||||||
]
|
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.dailymotion.com/video/x5kesuj_office-christmas-party-review-jason-bateman-olivia-munn-t-j-miller_news',
|
'url': 'http://www.dailymotion.com/video/x5kesuj_office-christmas-party-review-jason-bateman-olivia-munn-t-j-miller_news',
|
||||||
'md5': '074b95bdee76b9e3654137aee9c79dfe',
|
'md5': '074b95bdee76b9e3654137aee9c79dfe',
|
||||||
@ -67,7 +108,6 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Office Christmas Party Review – Jason Bateman, Olivia Munn, T.J. Miller',
|
'title': 'Office Christmas Party Review – Jason Bateman, Olivia Munn, T.J. Miller',
|
||||||
'description': 'Office Christmas Party Review - Jason Bateman, Olivia Munn, T.J. Miller',
|
'description': 'Office Christmas Party Review - Jason Bateman, Olivia Munn, T.J. Miller',
|
||||||
'thumbnail': r're:^https?:.*\.(?:jpg|png)$',
|
|
||||||
'duration': 187,
|
'duration': 187,
|
||||||
'timestamp': 1493651285,
|
'timestamp': 1493651285,
|
||||||
'upload_date': '20170501',
|
'upload_date': '20170501',
|
||||||
@ -133,7 +173,22 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://www.dailymotion.com/swf/x3ss1m_funny-magic-trick-barry-and-stuart_fun',
|
'url': 'http://www.dailymotion.com/swf/x3ss1m_funny-magic-trick-barry-and-stuart_fun',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.lequipe.fr/video/x791mem',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.lequipe.fr/video/k7MtHciueyTcrFtFKA2',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.dailymotion.com/video/x3z49k?playlist=xv4bw',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
_GEO_BYPASS = False
|
||||||
|
_COMMON_MEDIA_FIELDS = '''description
|
||||||
|
geoblockedCountries {
|
||||||
|
allowed
|
||||||
|
}
|
||||||
|
xid'''
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_urls(webpage):
|
def _extract_urls(webpage):
|
||||||
@ -149,264 +204,140 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
|||||||
return urls
|
return urls
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id, playlist_id = re.match(self._VALID_URL, url).groups()
|
||||||
|
|
||||||
webpage = self._download_webpage_no_ff(
|
if playlist_id:
|
||||||
'https://www.dailymotion.com/video/%s' % video_id, video_id)
|
if not self._downloader.params.get('noplaylist'):
|
||||||
|
self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % playlist_id)
|
||||||
|
return self.url_result(
|
||||||
|
'http://www.dailymotion.com/playlist/' + playlist_id,
|
||||||
|
'DailymotionPlaylist', playlist_id)
|
||||||
|
self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
|
||||||
|
|
||||||
age_limit = self._rta_search(webpage)
|
password = self._downloader.params.get('videopassword')
|
||||||
|
media = self._call_api(
|
||||||
description = self._og_search_description(
|
'media', video_id, '''... on Video {
|
||||||
webpage, default=None) or self._html_search_meta(
|
%s
|
||||||
'description', webpage, 'description')
|
stats {
|
||||||
|
likes {
|
||||||
view_count_str = self._search_regex(
|
total
|
||||||
(r'<meta[^>]+itemprop="interactionCount"[^>]+content="UserPlays:([\s\d,.]+)"',
|
|
||||||
r'video_views_count[^>]+>\s+([\s\d\,.]+)'),
|
|
||||||
webpage, 'view count', default=None)
|
|
||||||
if view_count_str:
|
|
||||||
view_count_str = re.sub(r'\s', '', view_count_str)
|
|
||||||
view_count = str_to_int(view_count_str)
|
|
||||||
comment_count = int_or_none(self._search_regex(
|
|
||||||
r'<meta[^>]+itemprop="interactionCount"[^>]+content="UserComments:(\d+)"',
|
|
||||||
webpage, 'comment count', default=None))
|
|
||||||
|
|
||||||
player_v5 = self._search_regex(
|
|
||||||
[r'buildPlayer\(({.+?})\);\n', # See https://github.com/ytdl-org/youtube-dl/issues/7826
|
|
||||||
r'playerV5\s*=\s*dmp\.create\([^,]+?,\s*({.+?})\);',
|
|
||||||
r'buildPlayer\(({.+?})\);',
|
|
||||||
r'var\s+config\s*=\s*({.+?});',
|
|
||||||
# New layout regex (see https://github.com/ytdl-org/youtube-dl/issues/13580)
|
|
||||||
r'__PLAYER_CONFIG__\s*=\s*({.+?});'],
|
|
||||||
webpage, 'player v5', default=None)
|
|
||||||
if player_v5:
|
|
||||||
player = self._parse_json(player_v5, video_id, fatal=False) or {}
|
|
||||||
metadata = try_get(player, lambda x: x['metadata'], dict)
|
|
||||||
if not metadata:
|
|
||||||
metadata_url = url_or_none(try_get(
|
|
||||||
player, lambda x: x['context']['metadata_template_url1']))
|
|
||||||
if metadata_url:
|
|
||||||
metadata_url = metadata_url.replace(':videoId', video_id)
|
|
||||||
else:
|
|
||||||
metadata_url = update_url_query(
|
|
||||||
'https://www.dailymotion.com/player/metadata/video/%s'
|
|
||||||
% video_id, {
|
|
||||||
'embedder': url,
|
|
||||||
'integration': 'inline',
|
|
||||||
'GK_PV5_NEON': '1',
|
|
||||||
})
|
|
||||||
metadata = self._download_json(
|
|
||||||
metadata_url, video_id, 'Downloading metadata JSON')
|
|
||||||
|
|
||||||
if try_get(metadata, lambda x: x['error']['type']) == 'password_protected':
|
|
||||||
password = self._downloader.params.get('videopassword')
|
|
||||||
if password:
|
|
||||||
r = int(metadata['id'][1:], 36)
|
|
||||||
us64e = lambda x: base64.urlsafe_b64encode(x).decode().strip('=')
|
|
||||||
t = ''.join(random.choice(string.ascii_letters) for i in range(10))
|
|
||||||
n = us64e(compat_struct_pack('I', r))
|
|
||||||
i = us64e(hashlib.md5(('%s%d%s' % (password, r, t)).encode()).digest())
|
|
||||||
metadata = self._download_json(
|
|
||||||
'http://www.dailymotion.com/player/metadata/video/p' + i + t + n, video_id)
|
|
||||||
|
|
||||||
self._check_error(metadata)
|
|
||||||
|
|
||||||
formats = []
|
|
||||||
for quality, media_list in metadata['qualities'].items():
|
|
||||||
for media in media_list:
|
|
||||||
media_url = media.get('url')
|
|
||||||
if not media_url:
|
|
||||||
continue
|
|
||||||
type_ = media.get('type')
|
|
||||||
if type_ == 'application/vnd.lumberjack.manifest':
|
|
||||||
continue
|
|
||||||
ext = mimetype2ext(type_) or determine_ext(media_url)
|
|
||||||
if ext == 'm3u8':
|
|
||||||
m3u8_formats = self._extract_m3u8_formats(
|
|
||||||
media_url, video_id, 'mp4', preference=-1,
|
|
||||||
m3u8_id='hls', fatal=False)
|
|
||||||
for f in m3u8_formats:
|
|
||||||
f['url'] = f['url'].split('#')[0]
|
|
||||||
formats.append(f)
|
|
||||||
elif ext == 'f4m':
|
|
||||||
formats.extend(self._extract_f4m_formats(
|
|
||||||
media_url, video_id, preference=-1, f4m_id='hds', fatal=False))
|
|
||||||
else:
|
|
||||||
f = {
|
|
||||||
'url': media_url,
|
|
||||||
'format_id': 'http-%s' % quality,
|
|
||||||
'ext': ext,
|
|
||||||
}
|
|
||||||
m = re.search(r'H264-(?P<width>\d+)x(?P<height>\d+)', media_url)
|
|
||||||
if m:
|
|
||||||
f.update({
|
|
||||||
'width': int(m.group('width')),
|
|
||||||
'height': int(m.group('height')),
|
|
||||||
})
|
|
||||||
formats.append(f)
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
title = metadata['title']
|
|
||||||
duration = int_or_none(metadata.get('duration'))
|
|
||||||
timestamp = int_or_none(metadata.get('created_time'))
|
|
||||||
thumbnail = metadata.get('poster_url')
|
|
||||||
uploader = metadata.get('owner', {}).get('screenname')
|
|
||||||
uploader_id = metadata.get('owner', {}).get('id')
|
|
||||||
|
|
||||||
subtitles = {}
|
|
||||||
subtitles_data = metadata.get('subtitles', {}).get('data', {})
|
|
||||||
if subtitles_data and isinstance(subtitles_data, dict):
|
|
||||||
for subtitle_lang, subtitle in subtitles_data.items():
|
|
||||||
subtitles[subtitle_lang] = [{
|
|
||||||
'ext': determine_ext(subtitle_url),
|
|
||||||
'url': subtitle_url,
|
|
||||||
} for subtitle_url in subtitle.get('urls', [])]
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': title,
|
|
||||||
'description': description,
|
|
||||||
'thumbnail': thumbnail,
|
|
||||||
'duration': duration,
|
|
||||||
'timestamp': timestamp,
|
|
||||||
'uploader': uploader,
|
|
||||||
'uploader_id': uploader_id,
|
|
||||||
'age_limit': age_limit,
|
|
||||||
'view_count': view_count,
|
|
||||||
'comment_count': comment_count,
|
|
||||||
'formats': formats,
|
|
||||||
'subtitles': subtitles,
|
|
||||||
}
|
|
||||||
|
|
||||||
# vevo embed
|
|
||||||
vevo_id = self._search_regex(
|
|
||||||
r'<link rel="video_src" href="[^"]*?vevo\.com[^"]*?video=(?P<id>[\w]*)',
|
|
||||||
webpage, 'vevo embed', default=None)
|
|
||||||
if vevo_id:
|
|
||||||
return self.url_result('vevo:%s' % vevo_id, 'Vevo')
|
|
||||||
|
|
||||||
# fallback old player
|
|
||||||
embed_page = self._download_webpage_no_ff(
|
|
||||||
'https://www.dailymotion.com/embed/video/%s' % video_id,
|
|
||||||
video_id, 'Downloading embed page')
|
|
||||||
|
|
||||||
timestamp = parse_iso8601(self._html_search_meta(
|
|
||||||
'video:release_date', webpage, 'upload date'))
|
|
||||||
|
|
||||||
info = self._parse_json(
|
|
||||||
self._search_regex(
|
|
||||||
r'var info = ({.*?}),$', embed_page,
|
|
||||||
'video info', flags=re.MULTILINE),
|
|
||||||
video_id)
|
|
||||||
|
|
||||||
self._check_error(info)
|
|
||||||
|
|
||||||
formats = []
|
|
||||||
for (key, format_id) in self._FORMATS:
|
|
||||||
video_url = info.get(key)
|
|
||||||
if video_url is not None:
|
|
||||||
m_size = re.search(r'H264-(\d+)x(\d+)', video_url)
|
|
||||||
if m_size is not None:
|
|
||||||
width, height = map(int_or_none, (m_size.group(1), m_size.group(2)))
|
|
||||||
else:
|
|
||||||
width, height = None, None
|
|
||||||
formats.append({
|
|
||||||
'url': video_url,
|
|
||||||
'ext': 'mp4',
|
|
||||||
'format_id': format_id,
|
|
||||||
'width': width,
|
|
||||||
'height': height,
|
|
||||||
})
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
# subtitles
|
|
||||||
video_subtitles = self.extract_subtitles(video_id, webpage)
|
|
||||||
|
|
||||||
title = self._og_search_title(webpage, default=None)
|
|
||||||
if title is None:
|
|
||||||
title = self._html_search_regex(
|
|
||||||
r'(?s)<span\s+id="video_title"[^>]*>(.*?)</span>', webpage,
|
|
||||||
'title')
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'formats': formats,
|
|
||||||
'uploader': info['owner.screenname'],
|
|
||||||
'timestamp': timestamp,
|
|
||||||
'title': title,
|
|
||||||
'description': description,
|
|
||||||
'subtitles': video_subtitles,
|
|
||||||
'thumbnail': info['thumbnail_url'],
|
|
||||||
'age_limit': age_limit,
|
|
||||||
'view_count': view_count,
|
|
||||||
'duration': info['duration']
|
|
||||||
}
|
}
|
||||||
|
views {
|
||||||
|
total
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
... on Live {
|
||||||
|
%s
|
||||||
|
audienceCount
|
||||||
|
isOnAir
|
||||||
|
}''' % (self._COMMON_MEDIA_FIELDS, self._COMMON_MEDIA_FIELDS), 'Downloading media JSON metadata',
|
||||||
|
'password: "%s"' % self._downloader.params.get('videopassword') if password else None)
|
||||||
|
xid = media['xid']
|
||||||
|
|
||||||
def _check_error(self, info):
|
metadata = self._download_json(
|
||||||
error = info.get('error')
|
'https://www.dailymotion.com/player/metadata/video/' + xid,
|
||||||
|
xid, 'Downloading metadata JSON',
|
||||||
|
query={'app': 'com.dailymotion.neon'})
|
||||||
|
|
||||||
|
error = metadata.get('error')
|
||||||
if error:
|
if error:
|
||||||
title = error.get('title') or error['message']
|
title = error.get('title') or error['raw_message']
|
||||||
# See https://developer.dailymotion.com/api#access-error
|
# See https://developer.dailymotion.com/api#access-error
|
||||||
if error.get('code') == 'DM007':
|
if error.get('code') == 'DM007':
|
||||||
self.raise_geo_restricted(msg=title)
|
allowed_countries = try_get(media, lambda x: x['geoblockedCountries']['allowed'], list)
|
||||||
|
self.raise_geo_restricted(msg=title, countries=allowed_countries)
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
'%s said: %s' % (self.IE_NAME, title), expected=True)
|
'%s said: %s' % (self.IE_NAME, title), expected=True)
|
||||||
|
|
||||||
def _get_subtitles(self, video_id, webpage):
|
title = metadata['title']
|
||||||
try:
|
is_live = media.get('isOnAir')
|
||||||
sub_list = self._download_webpage(
|
formats = []
|
||||||
'https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id,
|
for quality, media_list in metadata['qualities'].items():
|
||||||
video_id, note=False)
|
for m in media_list:
|
||||||
except ExtractorError as err:
|
media_url = m.get('url')
|
||||||
self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
|
media_type = m.get('type')
|
||||||
return {}
|
if not media_url or media_type == 'application/vnd.lumberjack.manifest':
|
||||||
info = json.loads(sub_list)
|
continue
|
||||||
if (info['total'] > 0):
|
if media_type == 'application/x-mpegURL':
|
||||||
sub_lang_list = dict((l['language'], [{'url': l['url'], 'ext': 'srt'}]) for l in info['list'])
|
formats.extend(self._extract_m3u8_formats(
|
||||||
return sub_lang_list
|
media_url, video_id, 'mp4',
|
||||||
self._downloader.report_warning('video doesn\'t have subtitles')
|
'm3u8' if is_live else 'm3u8_native',
|
||||||
return {}
|
m3u8_id='hls', fatal=False))
|
||||||
|
else:
|
||||||
|
f = {
|
||||||
|
'url': media_url,
|
||||||
|
'format_id': 'http-' + quality,
|
||||||
|
}
|
||||||
|
m = re.search(r'/H264-(\d+)x(\d+)(?:-(60)/)?', media_url)
|
||||||
|
if m:
|
||||||
|
width, height, fps = map(int_or_none, m.groups())
|
||||||
|
f.update({
|
||||||
|
'fps': fps,
|
||||||
|
'height': height,
|
||||||
|
'width': width,
|
||||||
|
})
|
||||||
|
formats.append(f)
|
||||||
|
for f in formats:
|
||||||
|
f['url'] = f['url'].split('#')[0]
|
||||||
|
if not f.get('fps') and f['format_id'].endswith('@60'):
|
||||||
|
f['fps'] = 60
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
subtitles_data = try_get(metadata, lambda x: x['subtitles']['data'], dict) or {}
|
||||||
|
for subtitle_lang, subtitle in subtitles_data.items():
|
||||||
|
subtitles[subtitle_lang] = [{
|
||||||
|
'url': subtitle_url,
|
||||||
|
} for subtitle_url in subtitle.get('urls', [])]
|
||||||
|
|
||||||
|
thumbnails = []
|
||||||
|
for height, poster_url in metadata.get('posters', {}).items():
|
||||||
|
thumbnails.append({
|
||||||
|
'height': int_or_none(height),
|
||||||
|
'id': height,
|
||||||
|
'url': poster_url,
|
||||||
|
})
|
||||||
|
|
||||||
|
owner = metadata.get('owner') or {}
|
||||||
|
stats = media.get('stats') or {}
|
||||||
|
get_count = lambda x: int_or_none(try_get(stats, lambda y: y[x + 's']['total']))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': self._live_title(title) if is_live else title,
|
||||||
|
'description': clean_html(media.get('description')),
|
||||||
|
'thumbnails': thumbnails,
|
||||||
|
'duration': int_or_none(metadata.get('duration')) or None,
|
||||||
|
'timestamp': int_or_none(metadata.get('created_time')),
|
||||||
|
'uploader': owner.get('screenname'),
|
||||||
|
'uploader_id': owner.get('id') or metadata.get('screenname'),
|
||||||
|
'age_limit': 18 if metadata.get('explicit') else 0,
|
||||||
|
'tags': metadata.get('tags'),
|
||||||
|
'view_count': get_count('view') or int_or_none(media.get('audienceCount')),
|
||||||
|
'like_count': get_count('like'),
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
'is_live': is_live,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
|
class DailymotionPlaylistBaseIE(DailymotionBaseInfoExtractor):
|
||||||
IE_NAME = 'dailymotion:playlist'
|
|
||||||
_VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>x[0-9a-z]+)'
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q',
|
|
||||||
'info_dict': {
|
|
||||||
'title': 'SPORT',
|
|
||||||
'id': 'xv4bw',
|
|
||||||
},
|
|
||||||
'playlist_mincount': 20,
|
|
||||||
}]
|
|
||||||
_PAGE_SIZE = 100
|
_PAGE_SIZE = 100
|
||||||
|
|
||||||
def _fetch_page(self, playlist_id, authorizaion, page):
|
def _fetch_page(self, playlist_id, page):
|
||||||
page += 1
|
page += 1
|
||||||
videos = self._download_json(
|
videos = self._call_api(
|
||||||
'https://graphql.api.dailymotion.com',
|
self._OBJECT_TYPE, playlist_id,
|
||||||
playlist_id, 'Downloading page %d' % page,
|
'''videos(allowExplicit: %s, first: %d, page: %d) {
|
||||||
data=json.dumps({
|
|
||||||
'query': '''{
|
|
||||||
collection(xid: "%s") {
|
|
||||||
videos(first: %d, page: %d) {
|
|
||||||
pageInfo {
|
|
||||||
hasNextPage
|
|
||||||
nextPage
|
|
||||||
}
|
|
||||||
edges {
|
edges {
|
||||||
node {
|
node {
|
||||||
xid
|
xid
|
||||||
url
|
url
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}''' % ('false' if self._FAMILY_FILTER else 'true', self._PAGE_SIZE, page),
|
||||||
}
|
'Downloading page %d' % page)['videos']
|
||||||
}''' % (playlist_id, self._PAGE_SIZE, page)
|
|
||||||
}).encode(), headers={
|
|
||||||
'Authorization': authorizaion,
|
|
||||||
'Origin': 'https://www.dailymotion.com',
|
|
||||||
})['data']['collection']['videos']
|
|
||||||
for edge in videos['edges']:
|
for edge in videos['edges']:
|
||||||
node = edge['node']
|
node = edge['node']
|
||||||
yield self.url_result(
|
yield self.url_result(
|
||||||
@ -414,86 +345,49 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
playlist_id = self._match_id(url)
|
playlist_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, playlist_id)
|
|
||||||
api = self._parse_json(self._search_regex(
|
|
||||||
r'__PLAYER_CONFIG__\s*=\s*({.+?});',
|
|
||||||
webpage, 'player config'), playlist_id)['context']['api']
|
|
||||||
auth = self._download_json(
|
|
||||||
api.get('auth_url', 'https://graphql.api.dailymotion.com/oauth/token'),
|
|
||||||
playlist_id, data=urlencode_postdata({
|
|
||||||
'client_id': api.get('client_id', 'f1a362d288c1b98099c7'),
|
|
||||||
'client_secret': api.get('client_secret', 'eea605b96e01c796ff369935357eca920c5da4c5'),
|
|
||||||
'grant_type': 'client_credentials',
|
|
||||||
}))
|
|
||||||
authorizaion = '%s %s' % (auth.get('token_type', 'Bearer'), auth['access_token'])
|
|
||||||
entries = OnDemandPagedList(functools.partial(
|
entries = OnDemandPagedList(functools.partial(
|
||||||
self._fetch_page, playlist_id, authorizaion), self._PAGE_SIZE)
|
self._fetch_page, playlist_id), self._PAGE_SIZE)
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
entries, playlist_id,
|
entries, playlist_id)
|
||||||
self._og_search_title(webpage))
|
|
||||||
|
|
||||||
|
|
||||||
class DailymotionUserIE(DailymotionBaseInfoExtractor):
|
class DailymotionPlaylistIE(DailymotionPlaylistBaseIE):
|
||||||
|
IE_NAME = 'dailymotion:playlist'
|
||||||
|
_VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>x[0-9a-z]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'xv4bw',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 20,
|
||||||
|
}]
|
||||||
|
_OBJECT_TYPE = 'collection'
|
||||||
|
|
||||||
|
|
||||||
|
class DailymotionUserIE(DailymotionPlaylistBaseIE):
|
||||||
IE_NAME = 'dailymotion:user'
|
IE_NAME = 'dailymotion:user'
|
||||||
_VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist)/)(?:(?:old/)?user/)?(?P<user>[^/]+)'
|
_VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist)/)(?:(?:old/)?user/)?(?P<id>[^/]+)'
|
||||||
_MORE_PAGES_INDICATOR = r'(?s)<div class="pages[^"]*">.*?<a\s+class="[^"]*?icon-arrow_right[^"]*?"'
|
|
||||||
_PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s'
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.dailymotion.com/user/nqtv',
|
'url': 'https://www.dailymotion.com/user/nqtv',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'nqtv',
|
'id': 'nqtv',
|
||||||
'title': 'Rémi Gaillard',
|
|
||||||
},
|
},
|
||||||
'playlist_mincount': 100,
|
'playlist_mincount': 152,
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.dailymotion.com/user/UnderProject',
|
'url': 'http://www.dailymotion.com/user/UnderProject',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'UnderProject',
|
'id': 'UnderProject',
|
||||||
'title': 'UnderProject',
|
|
||||||
},
|
},
|
||||||
'playlist_mincount': 1800,
|
'playlist_mincount': 1000,
|
||||||
'expected_warnings': [
|
|
||||||
'Stopped at duplicated page',
|
|
||||||
],
|
|
||||||
'skip': 'Takes too long time',
|
'skip': 'Takes too long time',
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.dailymotion.com/user/nqtv',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'nqtv',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 148,
|
||||||
|
'params': {
|
||||||
|
'age_limit': 0,
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
_OBJECT_TYPE = 'channel'
|
||||||
def _extract_entries(self, id):
|
|
||||||
video_ids = set()
|
|
||||||
processed_urls = set()
|
|
||||||
for pagenum in itertools.count(1):
|
|
||||||
page_url = self._PAGE_TEMPLATE % (id, pagenum)
|
|
||||||
webpage, urlh = self._download_webpage_handle_no_ff(
|
|
||||||
page_url, id, 'Downloading page %s' % pagenum)
|
|
||||||
if urlh.geturl() in processed_urls:
|
|
||||||
self.report_warning('Stopped at duplicated page %s, which is the same as %s' % (
|
|
||||||
page_url, urlh.geturl()), id)
|
|
||||||
break
|
|
||||||
|
|
||||||
processed_urls.add(urlh.geturl())
|
|
||||||
|
|
||||||
for video_id in re.findall(r'data-xid="(.+?)"', webpage):
|
|
||||||
if video_id not in video_ids:
|
|
||||||
yield self.url_result(
|
|
||||||
'http://www.dailymotion.com/video/%s' % video_id,
|
|
||||||
DailymotionIE.ie_key(), video_id)
|
|
||||||
video_ids.add(video_id)
|
|
||||||
|
|
||||||
if re.search(self._MORE_PAGES_INDICATOR, webpage) is None:
|
|
||||||
break
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
mobj = re.match(self._VALID_URL, url)
|
|
||||||
user = mobj.group('user')
|
|
||||||
webpage = self._download_webpage(
|
|
||||||
'https://www.dailymotion.com/user/%s' % user, user)
|
|
||||||
full_user = unescapeHTML(self._html_search_regex(
|
|
||||||
r'<a class="nav-image" title="([^"]+)" href="/%s">' % re.escape(user),
|
|
||||||
webpage, 'user'))
|
|
||||||
|
|
||||||
return {
|
|
||||||
'_type': 'playlist',
|
|
||||||
'id': user,
|
|
||||||
'title': full_user,
|
|
||||||
'entries': self._extract_entries(user),
|
|
||||||
}
|
|
||||||
|
@ -1,154 +0,0 @@
|
|||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
import base64
|
|
||||||
import json
|
|
||||||
import random
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..aes import (
|
|
||||||
aes_cbc_decrypt,
|
|
||||||
aes_cbc_encrypt,
|
|
||||||
)
|
|
||||||
from ..compat import compat_b64decode
|
|
||||||
from ..utils import (
|
|
||||||
bytes_to_intlist,
|
|
||||||
bytes_to_long,
|
|
||||||
extract_attributes,
|
|
||||||
ExtractorError,
|
|
||||||
intlist_to_bytes,
|
|
||||||
js_to_json,
|
|
||||||
int_or_none,
|
|
||||||
long_to_bytes,
|
|
||||||
pkcs1pad,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class DaisukiMottoIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://motto\.daisuki\.net/framewatch/embed/[^/]+/(?P<id>[0-9a-zA-Z]{3})'
|
|
||||||
|
|
||||||
_TEST = {
|
|
||||||
'url': 'http://motto.daisuki.net/framewatch/embed/embedDRAGONBALLSUPERUniverseSurvivalsaga/V2e/760/428',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'V2e',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': '#117 SHOWDOWN OF LOVE! ANDROIDS VS UNIVERSE 2!!',
|
|
||||||
'subtitles': {
|
|
||||||
'mul': [{
|
|
||||||
'ext': 'ttml',
|
|
||||||
}],
|
|
||||||
},
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': True, # AES-encrypted HLS stream
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
# The public key in PEM format can be found in clientlibs_anime_watch.min.js
|
|
||||||
_RSA_KEY = (0xc5524c25e8e14b366b3754940beeb6f96cb7e2feef0b932c7659a0c5c3bf173d602464c2df73d693b513ae06ff1be8f367529ab30bf969c5640522181f2a0c51ea546ae120d3d8d908595e4eff765b389cde080a1ef7f1bbfb07411cc568db73b7f521cedf270cbfbe0ddbc29b1ac9d0f2d8f4359098caffee6d07915020077d, 65537)
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url)
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
|
|
||||||
flashvars = self._parse_json(self._search_regex(
|
|
||||||
r'(?s)var\s+flashvars\s*=\s*({.+?});', webpage, 'flashvars'),
|
|
||||||
video_id, transform_source=js_to_json)
|
|
||||||
|
|
||||||
iv = [0] * 16
|
|
||||||
|
|
||||||
data = {}
|
|
||||||
for key in ('device_cd', 'mv_id', 'ss1_prm', 'ss2_prm', 'ss3_prm', 'ss_id'):
|
|
||||||
data[key] = flashvars.get(key, '')
|
|
||||||
|
|
||||||
encrypted_rtn = None
|
|
||||||
|
|
||||||
# Some AES keys are rejected. Try it with different AES keys
|
|
||||||
for idx in range(5):
|
|
||||||
aes_key = [random.randint(0, 254) for _ in range(32)]
|
|
||||||
padded_aeskey = intlist_to_bytes(pkcs1pad(aes_key, 128))
|
|
||||||
|
|
||||||
n, e = self._RSA_KEY
|
|
||||||
encrypted_aeskey = long_to_bytes(pow(bytes_to_long(padded_aeskey), e, n))
|
|
||||||
init_data = self._download_json(
|
|
||||||
'http://motto.daisuki.net/fastAPI/bgn/init/',
|
|
||||||
video_id, query={
|
|
||||||
's': flashvars.get('s', ''),
|
|
||||||
'c': flashvars.get('ss3_prm', ''),
|
|
||||||
'e': url,
|
|
||||||
'd': base64.b64encode(intlist_to_bytes(aes_cbc_encrypt(
|
|
||||||
bytes_to_intlist(json.dumps(data)),
|
|
||||||
aes_key, iv))).decode('ascii'),
|
|
||||||
'a': base64.b64encode(encrypted_aeskey).decode('ascii'),
|
|
||||||
}, note='Downloading JSON metadata' + (' (try #%d)' % (idx + 1) if idx > 0 else ''))
|
|
||||||
|
|
||||||
if 'rtn' in init_data:
|
|
||||||
encrypted_rtn = init_data['rtn']
|
|
||||||
break
|
|
||||||
|
|
||||||
self._sleep(5, video_id)
|
|
||||||
|
|
||||||
if encrypted_rtn is None:
|
|
||||||
raise ExtractorError('Failed to fetch init data')
|
|
||||||
|
|
||||||
rtn = self._parse_json(
|
|
||||||
intlist_to_bytes(aes_cbc_decrypt(bytes_to_intlist(
|
|
||||||
compat_b64decode(encrypted_rtn)),
|
|
||||||
aes_key, iv)).decode('utf-8').rstrip('\0'),
|
|
||||||
video_id)
|
|
||||||
|
|
||||||
title = rtn['title_str']
|
|
||||||
|
|
||||||
formats = self._extract_m3u8_formats(
|
|
||||||
rtn['play_url'], video_id, ext='mp4', entry_protocol='m3u8_native')
|
|
||||||
|
|
||||||
subtitles = {}
|
|
||||||
caption_url = rtn.get('caption_url')
|
|
||||||
if caption_url:
|
|
||||||
# mul: multiple languages
|
|
||||||
subtitles['mul'] = [{
|
|
||||||
'url': caption_url,
|
|
||||||
'ext': 'ttml',
|
|
||||||
}]
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': title,
|
|
||||||
'formats': formats,
|
|
||||||
'subtitles': subtitles,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class DaisukiMottoPlaylistIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://motto\.daisuki\.net/(?P<id>information)/'
|
|
||||||
|
|
||||||
_TEST = {
|
|
||||||
'url': 'http://motto.daisuki.net/information/',
|
|
||||||
'info_dict': {
|
|
||||||
'title': 'DRAGON BALL SUPER',
|
|
||||||
},
|
|
||||||
'playlist_mincount': 117,
|
|
||||||
}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
playlist_id = self._match_id(url)
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, playlist_id)
|
|
||||||
|
|
||||||
entries = []
|
|
||||||
for li in re.findall(r'(<li[^>]+?data-product_id="[a-zA-Z0-9]{3}"[^>]+>)', webpage):
|
|
||||||
attr = extract_attributes(li)
|
|
||||||
ad_id = attr.get('data-ad_id')
|
|
||||||
product_id = attr.get('data-product_id')
|
|
||||||
if ad_id and product_id:
|
|
||||||
episode_id = attr.get('data-chapter')
|
|
||||||
entries.append({
|
|
||||||
'_type': 'url_transparent',
|
|
||||||
'url': 'http://motto.daisuki.net/framewatch/embed/%s/%s/760/428' % (ad_id, product_id),
|
|
||||||
'episode_id': episode_id,
|
|
||||||
'episode_number': int_or_none(episode_id),
|
|
||||||
'ie_key': 'DaisukiMotto',
|
|
||||||
})
|
|
||||||
|
|
||||||
return self.playlist_result(entries, playlist_title='DRAGON BALL SUPER')
|
|
@ -2,25 +2,21 @@
|
|||||||
|
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
import itertools
|
import itertools
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_parse_qs,
|
compat_parse_qs,
|
||||||
compat_urllib_parse_unquote,
|
compat_urllib_parse_unquote,
|
||||||
compat_urllib_parse_urlencode,
|
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
|
||||||
int_or_none,
|
|
||||||
str_to_int,
|
|
||||||
xpath_text,
|
|
||||||
unescapeHTML,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class DaumIE(InfoExtractor):
|
class DaumBaseIE(InfoExtractor):
|
||||||
|
_KAKAO_EMBED_BASE = 'http://tv.kakao.com/embed/player/cliplink/'
|
||||||
|
|
||||||
|
|
||||||
|
class DaumIE(DaumBaseIE):
|
||||||
_VALID_URL = r'https?://(?:(?:m\.)?tvpot\.daum\.net/v/|videofarm\.daum\.net/controller/player/VodPlayer\.swf\?vid=)(?P<id>[^?#&]+)'
|
_VALID_URL = r'https?://(?:(?:m\.)?tvpot\.daum\.net/v/|videofarm\.daum\.net/controller/player/VodPlayer\.swf\?vid=)(?P<id>[^?#&]+)'
|
||||||
IE_NAME = 'daum.net'
|
IE_NAME = 'daum.net'
|
||||||
|
|
||||||
@ -36,6 +32,9 @@ class DaumIE(InfoExtractor):
|
|||||||
'duration': 2117,
|
'duration': 2117,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
|
'uploader_id': 186139,
|
||||||
|
'uploader': '콘간지',
|
||||||
|
'timestamp': 1387310323,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://m.tvpot.daum.net/v/65139429',
|
'url': 'http://m.tvpot.daum.net/v/65139429',
|
||||||
@ -44,11 +43,14 @@ class DaumIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '1297회, \'아빠 아들로 태어나길 잘 했어\' 민수, 감동의 눈물[아빠 어디가] 20150118',
|
'title': '1297회, \'아빠 아들로 태어나길 잘 했어\' 민수, 감동의 눈물[아빠 어디가] 20150118',
|
||||||
'description': 'md5:79794514261164ff27e36a21ad229fc5',
|
'description': 'md5:79794514261164ff27e36a21ad229fc5',
|
||||||
'upload_date': '20150604',
|
'upload_date': '20150118',
|
||||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||||
'duration': 154,
|
'duration': 154,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
|
'uploader': 'MBC 예능',
|
||||||
|
'uploader_id': 132251,
|
||||||
|
'timestamp': 1421604228,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://tvpot.daum.net/v/07dXWRka62Y%24',
|
'url': 'http://tvpot.daum.net/v/07dXWRka62Y%24',
|
||||||
@ -59,12 +61,15 @@ class DaumIE(InfoExtractor):
|
|||||||
'id': 'vwIpVpCQsT8$',
|
'id': 'vwIpVpCQsT8$',
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'title': '01-Korean War ( Trouble on the horizon )',
|
'title': '01-Korean War ( Trouble on the horizon )',
|
||||||
'description': '\nKorean War 01\nTrouble on the horizon\n전쟁의 먹구름',
|
'description': 'Korean War 01\r\nTrouble on the horizon\r\n전쟁의 먹구름',
|
||||||
'upload_date': '20080223',
|
'upload_date': '20080223',
|
||||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||||
'duration': 249,
|
'duration': 249,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
|
'uploader': '까칠한 墮落始祖 황비홍님의',
|
||||||
|
'uploader_id': 560824,
|
||||||
|
'timestamp': 1203770745,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# Requires dte_type=WEB (#9972)
|
# Requires dte_type=WEB (#9972)
|
||||||
@ -73,60 +78,24 @@ class DaumIE(InfoExtractor):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 's3794Uf1NZeZ1qMpGpeqeRU',
|
'id': 's3794Uf1NZeZ1qMpGpeqeRU',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny) [쇼! 음악중심] 508회 20160611',
|
'title': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)',
|
||||||
'description': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)\n\n[쇼! 음악중심] 20160611, 507회',
|
'description': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)\r\n\r\n[쇼! 음악중심] 20160611, 507회',
|
||||||
'upload_date': '20160611',
|
'upload_date': '20170129',
|
||||||
|
'uploader': '쇼! 음악중심',
|
||||||
|
'uploader_id': 2653210,
|
||||||
|
'timestamp': 1485684628,
|
||||||
},
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = compat_urllib_parse_unquote(self._match_id(url))
|
video_id = compat_urllib_parse_unquote(self._match_id(url))
|
||||||
movie_data = self._download_json(
|
if not video_id.isdigit():
|
||||||
'http://videofarm.daum.net/controller/api/closed/v1_2/IntegratedMovieData.json',
|
video_id += '@my'
|
||||||
video_id, 'Downloading video formats info', query={'vid': video_id, 'dte_type': 'WEB'})
|
return self.url_result(
|
||||||
|
self._KAKAO_EMBED_BASE + video_id, 'Kakao', video_id)
|
||||||
# For urls like http://m.tvpot.daum.net/v/65139429, where the video_id is really a clipid
|
|
||||||
if not movie_data.get('output_list', {}).get('output_list') and re.match(r'^\d+$', video_id):
|
|
||||||
return self.url_result('http://tvpot.daum.net/clip/ClipView.do?clipid=%s' % video_id)
|
|
||||||
|
|
||||||
info = self._download_xml(
|
|
||||||
'http://tvpot.daum.net/clip/ClipInfoXml.do', video_id,
|
|
||||||
'Downloading video info', query={'vid': video_id})
|
|
||||||
|
|
||||||
formats = []
|
|
||||||
for format_el in movie_data['output_list']['output_list']:
|
|
||||||
profile = format_el['profile']
|
|
||||||
format_query = compat_urllib_parse_urlencode({
|
|
||||||
'vid': video_id,
|
|
||||||
'profile': profile,
|
|
||||||
})
|
|
||||||
url_doc = self._download_xml(
|
|
||||||
'http://videofarm.daum.net/controller/api/open/v1_2/MovieLocation.apixml?' + format_query,
|
|
||||||
video_id, note='Downloading video data for %s format' % profile)
|
|
||||||
format_url = url_doc.find('result/url').text
|
|
||||||
formats.append({
|
|
||||||
'url': format_url,
|
|
||||||
'format_id': profile,
|
|
||||||
'width': int_or_none(format_el.get('width')),
|
|
||||||
'height': int_or_none(format_el.get('height')),
|
|
||||||
'filesize': int_or_none(format_el.get('filesize')),
|
|
||||||
})
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': info.find('TITLE').text,
|
|
||||||
'formats': formats,
|
|
||||||
'thumbnail': xpath_text(info, 'THUMB_URL'),
|
|
||||||
'description': xpath_text(info, 'CONTENTS'),
|
|
||||||
'duration': int_or_none(xpath_text(info, 'DURATION')),
|
|
||||||
'upload_date': info.find('REGDTTM').text[:8],
|
|
||||||
'view_count': str_to_int(xpath_text(info, 'PLAY_CNT')),
|
|
||||||
'comment_count': str_to_int(xpath_text(info, 'COMMENT_CNT')),
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class DaumClipIE(InfoExtractor):
|
class DaumClipIE(DaumBaseIE):
|
||||||
_VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/(?:clip/ClipView.(?:do|tv)|mypot/View.do)\?.*?clipid=(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/(?:clip/ClipView.(?:do|tv)|mypot/View.do)\?.*?clipid=(?P<id>\d+)'
|
||||||
IE_NAME = 'daum.net:clip'
|
IE_NAME = 'daum.net:clip'
|
||||||
_URL_TEMPLATE = 'http://tvpot.daum.net/clip/ClipView.do?clipid=%s'
|
_URL_TEMPLATE = 'http://tvpot.daum.net/clip/ClipView.do?clipid=%s'
|
||||||
@ -142,6 +111,9 @@ class DaumClipIE(InfoExtractor):
|
|||||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||||
'duration': 3868,
|
'duration': 3868,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
|
'uploader': 'GOMeXP',
|
||||||
|
'uploader_id': 6667,
|
||||||
|
'timestamp': 1377911092,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://m.tvpot.daum.net/clip/ClipView.tv?clipid=54999425',
|
'url': 'http://m.tvpot.daum.net/clip/ClipView.tv?clipid=54999425',
|
||||||
@ -154,22 +126,8 @@ class DaumClipIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
clip_info = self._download_json(
|
return self.url_result(
|
||||||
'http://tvpot.daum.net/mypot/json/GetClipInfo.do?clipid=%s' % video_id,
|
self._KAKAO_EMBED_BASE + video_id, 'Kakao', video_id)
|
||||||
video_id, 'Downloading clip info')['clip_bean']
|
|
||||||
|
|
||||||
return {
|
|
||||||
'_type': 'url_transparent',
|
|
||||||
'id': video_id,
|
|
||||||
'url': 'http://tvpot.daum.net/v/%s' % clip_info['vid'],
|
|
||||||
'title': unescapeHTML(clip_info['title']),
|
|
||||||
'thumbnail': clip_info.get('thumb_url'),
|
|
||||||
'description': clip_info.get('contents'),
|
|
||||||
'duration': int_or_none(clip_info.get('duration')),
|
|
||||||
'upload_date': clip_info.get('up_date')[:8],
|
|
||||||
'view_count': int_or_none(clip_info.get('play_count')),
|
|
||||||
'ie_key': 'Daum',
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class DaumListIE(InfoExtractor):
|
class DaumListIE(InfoExtractor):
|
||||||
|
@ -7,50 +7,51 @@ from .common import InfoExtractor
|
|||||||
|
|
||||||
|
|
||||||
class DBTVIE(InfoExtractor):
|
class DBTVIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?dbtv\.no/(?:[^/]+/)?(?P<id>[0-9]+)(?:#(?P<display_id>.+))?'
|
_VALID_URL = r'https?://(?:www\.)?dagbladet\.no/video/(?:(?:embed|(?P<display_id>[^/]+))/)?(?P<id>[0-9A-Za-z_-]{11}|[a-zA-Z0-9]{8})'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://dbtv.no/3649835190001#Skulle_teste_ut_fornøyelsespark,_men_kollegaen_var_bare_opptatt_av_bikinikroppen',
|
'url': 'https://www.dagbladet.no/video/PynxJnNWChE/',
|
||||||
'md5': '2e24f67936517b143a234b4cadf792ec',
|
'md5': 'b8f850ba1860adbda668d367f9b77699',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '3649835190001',
|
'id': 'PynxJnNWChE',
|
||||||
'display_id': 'Skulle_teste_ut_fornøyelsespark,_men_kollegaen_var_bare_opptatt_av_bikinikroppen',
|
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Skulle teste ut fornøyelsespark, men kollegaen var bare opptatt av bikinikroppen',
|
'title': 'Skulle teste ut fornøyelsespark, men kollegaen var bare opptatt av bikinikroppen',
|
||||||
'description': 'md5:1504a54606c4dde3e4e61fc97aa857e0',
|
'description': 'md5:49cc8370e7d66e8a2ef15c3b4631fd3f',
|
||||||
'thumbnail': r're:https?://.*\.jpg',
|
'thumbnail': r're:https?://.*\.jpg',
|
||||||
'timestamp': 1404039863,
|
'upload_date': '20160916',
|
||||||
'upload_date': '20140629',
|
'duration': 69,
|
||||||
'duration': 69.544,
|
'uploader_id': 'UCk5pvsyZJoYJBd7_oFPTlRQ',
|
||||||
'uploader_id': '1027729757001',
|
'uploader': 'Dagbladet',
|
||||||
},
|
},
|
||||||
'add_ie': ['BrightcoveNew']
|
'add_ie': ['Youtube']
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://dbtv.no/3649835190001',
|
'url': 'https://www.dagbladet.no/video/embed/xlGmyIeN9Jo/?autoplay=false',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.dbtv.no/lazyplayer/4631135248001',
|
'url': 'https://www.dagbladet.no/video/truer-iran-bor-passe-dere/PalfB2Cw',
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'http://dbtv.no/vice/5000634109001',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'http://dbtv.no/filmtrailer/3359293614001',
|
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_urls(webpage):
|
def _extract_urls(webpage):
|
||||||
return [url for _, url in re.findall(
|
return [url for _, url in re.findall(
|
||||||
r'<iframe[^>]+src=(["\'])((?:https?:)?//(?:www\.)?dbtv\.no/(?:lazy)?player/\d+.*?)\1',
|
r'<iframe[^>]+src=(["\'])((?:https?:)?//(?:www\.)?dagbladet\.no/video/embed/(?:[0-9A-Za-z_-]{11}|[a-zA-Z0-9]{8}).*?)\1',
|
||||||
webpage)]
|
webpage)]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id, display_id = re.match(self._VALID_URL, url).groups()
|
display_id, video_id = re.match(self._VALID_URL, url).groups()
|
||||||
|
info = {
|
||||||
return {
|
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
'url': 'http://players.brightcove.net/1027729757001/default_default/index.html?videoId=%s' % video_id,
|
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'ie_key': 'BrightcoveNew',
|
|
||||||
}
|
}
|
||||||
|
if len(video_id) == 11:
|
||||||
|
info.update({
|
||||||
|
'url': video_id,
|
||||||
|
'ie_key': 'Youtube',
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
info.update({
|
||||||
|
'url': 'jwplatform:' + video_id,
|
||||||
|
'ie_key': 'JWPlatform',
|
||||||
|
})
|
||||||
|
return info
|
||||||
|
@ -16,10 +16,11 @@ class DctpTvIE(InfoExtractor):
|
|||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# 4x3
|
# 4x3
|
||||||
'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/',
|
'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/',
|
||||||
|
'md5': '3ffbd1556c3fe210724d7088fad723e3',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '95eaa4f33dad413aa17b4ee613cccc6c',
|
'id': '95eaa4f33dad413aa17b4ee613cccc6c',
|
||||||
'display_id': 'videoinstallation-fuer-eine-kaufhausfassade',
|
'display_id': 'videoinstallation-fuer-eine-kaufhausfassade',
|
||||||
'ext': 'flv',
|
'ext': 'm4v',
|
||||||
'title': 'Videoinstallation für eine Kaufhausfassade',
|
'title': 'Videoinstallation für eine Kaufhausfassade',
|
||||||
'description': 'Kurzfilm',
|
'description': 'Kurzfilm',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
@ -27,10 +28,6 @@ class DctpTvIE(InfoExtractor):
|
|||||||
'timestamp': 1302172322,
|
'timestamp': 1302172322,
|
||||||
'upload_date': '20110407',
|
'upload_date': '20110407',
|
||||||
},
|
},
|
||||||
'params': {
|
|
||||||
# rtmp download
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
}, {
|
}, {
|
||||||
# 16x9
|
# 16x9
|
||||||
'url': 'http://www.dctp.tv/filme/sind-youtuber-die-besseren-lehrer/',
|
'url': 'http://www.dctp.tv/filme/sind-youtuber-die-besseren-lehrer/',
|
||||||
@ -59,33 +56,26 @@ class DctpTvIE(InfoExtractor):
|
|||||||
|
|
||||||
uuid = media['uuid']
|
uuid = media['uuid']
|
||||||
title = media['title']
|
title = media['title']
|
||||||
ratio = '16x9' if media.get('is_wide') else '4x3'
|
is_wide = media.get('is_wide')
|
||||||
play_path = 'mp4:%s_dctp_0500_%s.m4v' % (uuid, ratio)
|
formats = []
|
||||||
|
|
||||||
servers = self._download_json(
|
def add_formats(suffix):
|
||||||
'http://www.dctp.tv/streaming_servers/', display_id,
|
templ = 'https://%%s/%s_dctp_%s.m4v' % (uuid, suffix)
|
||||||
note='Downloading server list JSON', fatal=False)
|
formats.extend([{
|
||||||
|
'format_id': 'hls-' + suffix,
|
||||||
|
'url': templ % 'cdn-segments.dctp.tv' + '/playlist.m3u8',
|
||||||
|
'protocol': 'm3u8_native',
|
||||||
|
}, {
|
||||||
|
'format_id': 's3-' + suffix,
|
||||||
|
'url': templ % 'completed-media.s3.amazonaws.com',
|
||||||
|
}, {
|
||||||
|
'format_id': 'http-' + suffix,
|
||||||
|
'url': templ % 'cdn-media.dctp.tv',
|
||||||
|
}])
|
||||||
|
|
||||||
if servers:
|
add_formats('0500_' + ('16x9' if is_wide else '4x3'))
|
||||||
endpoint = next(
|
if is_wide:
|
||||||
server['endpoint']
|
add_formats('720p')
|
||||||
for server in servers
|
|
||||||
if url_or_none(server.get('endpoint'))
|
|
||||||
and 'cloudfront' in server['endpoint'])
|
|
||||||
else:
|
|
||||||
endpoint = 'rtmpe://s2pqqn4u96e4j8.cloudfront.net/cfx/st/'
|
|
||||||
|
|
||||||
app = self._search_regex(
|
|
||||||
r'^rtmpe?://[^/]+/(?P<app>.*)$', endpoint, 'app')
|
|
||||||
|
|
||||||
formats = [{
|
|
||||||
'url': endpoint,
|
|
||||||
'app': app,
|
|
||||||
'play_path': play_path,
|
|
||||||
'page_url': url,
|
|
||||||
'player_url': 'http://svm-prod-dctptv-static.s3.amazonaws.com/dctptv-relaunch2012-110.swf',
|
|
||||||
'ext': 'flv',
|
|
||||||
}]
|
|
||||||
|
|
||||||
thumbnails = []
|
thumbnails = []
|
||||||
images = media.get('images')
|
images = media.get('images')
|
||||||
|
@ -5,31 +5,24 @@ import re
|
|||||||
import string
|
import string
|
||||||
|
|
||||||
from .discoverygo import DiscoveryGoBaseIE
|
from .discoverygo import DiscoveryGoBaseIE
|
||||||
from ..compat import (
|
from ..compat import compat_urllib_parse_unquote
|
||||||
compat_str,
|
from ..utils import ExtractorError
|
||||||
compat_urllib_parse_unquote,
|
|
||||||
)
|
|
||||||
from ..utils import (
|
|
||||||
ExtractorError,
|
|
||||||
try_get,
|
|
||||||
)
|
|
||||||
from ..compat import compat_HTTPError
|
from ..compat import compat_HTTPError
|
||||||
|
|
||||||
|
|
||||||
class DiscoveryIE(DiscoveryGoBaseIE):
|
class DiscoveryIE(DiscoveryGoBaseIE):
|
||||||
_VALID_URL = r'''(?x)https?://
|
_VALID_URL = r'''(?x)https?://
|
||||||
(?P<site>
|
(?P<site>
|
||||||
(?:www\.)?
|
go\.discovery|
|
||||||
|
www\.
|
||||||
(?:
|
(?:
|
||||||
discovery|
|
|
||||||
investigationdiscovery|
|
investigationdiscovery|
|
||||||
discoverylife|
|
discoverylife|
|
||||||
animalplanet|
|
animalplanet|
|
||||||
ahctv|
|
ahctv|
|
||||||
destinationamerica|
|
destinationamerica|
|
||||||
sciencechannel|
|
sciencechannel|
|
||||||
tlc|
|
tlc
|
||||||
velocity
|
|
||||||
)|
|
)|
|
||||||
watch\.
|
watch\.
|
||||||
(?:
|
(?:
|
||||||
@ -40,15 +33,15 @@ class DiscoveryIE(DiscoveryGoBaseIE):
|
|||||||
cookingchanneltv|
|
cookingchanneltv|
|
||||||
motortrend
|
motortrend
|
||||||
)
|
)
|
||||||
)\.com(?P<path>/tv-shows/[^/]+/(?:video|full-episode)s/(?P<id>[^./?#]+))'''
|
)\.com/tv-shows/(?P<show_slug>[^/]+)/(?:video|full-episode)s/(?P<id>[^./?#]+)'''
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.discovery.com/tv-shows/cash-cab/videos/dave-foley',
|
'url': 'https://go.discovery.com/tv-shows/cash-cab/videos/riding-with-matthew-perry',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '5a2d9b4d6b66d17a5026e1fd',
|
'id': '5a2f35ce6b66d17a5026e29e',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Dave Foley',
|
'title': 'Riding with Matthew Perry',
|
||||||
'description': 'md5:4b39bcafccf9167ca42810eb5f28b01f',
|
'description': 'md5:a34333153e79bc4526019a5129e7f878',
|
||||||
'duration': 608,
|
'duration': 84,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True, # requires ffmpeg
|
'skip_download': True, # requires ffmpeg
|
||||||
@ -56,20 +49,20 @@ class DiscoveryIE(DiscoveryGoBaseIE):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'https://www.investigationdiscovery.com/tv-shows/final-vision/full-episodes/final-vision',
|
'url': 'https://www.investigationdiscovery.com/tv-shows/final-vision/full-episodes/final-vision',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://go.discovery.com/tv-shows/alaskan-bush-people/videos/follow-your-own-road',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# using `show_slug` is important to get the correct video data
|
||||||
|
'url': 'https://www.sciencechannel.com/tv-shows/mythbusters-on-science/full-episodes/christmas-special',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
_GEO_COUNTRIES = ['US']
|
_GEO_COUNTRIES = ['US']
|
||||||
_GEO_BYPASS = False
|
_GEO_BYPASS = False
|
||||||
|
_API_BASE_URL = 'https://api.discovery.com/v1/'
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
site, path, display_id = re.match(self._VALID_URL, url).groups()
|
site, show_slug, display_id = re.match(self._VALID_URL, url).groups()
|
||||||
webpage = self._download_webpage(url, display_id)
|
|
||||||
|
|
||||||
react_data = self._parse_json(self._search_regex(
|
|
||||||
r'window\.__reactTransmitPacket\s*=\s*({.+?});',
|
|
||||||
webpage, 'react data'), display_id)
|
|
||||||
content_blocks = react_data['layout'][path]['contentBlocks']
|
|
||||||
video = next(cb for cb in content_blocks if cb.get('type') == 'video')['content']['items'][0]
|
|
||||||
video_id = video['id']
|
|
||||||
|
|
||||||
access_token = None
|
access_token = None
|
||||||
cookies = self._get_cookies(url)
|
cookies = self._get_cookies(url)
|
||||||
@ -79,27 +72,36 @@ class DiscoveryIE(DiscoveryGoBaseIE):
|
|||||||
if auth_storage_cookie and auth_storage_cookie.value:
|
if auth_storage_cookie and auth_storage_cookie.value:
|
||||||
auth_storage = self._parse_json(compat_urllib_parse_unquote(
|
auth_storage = self._parse_json(compat_urllib_parse_unquote(
|
||||||
compat_urllib_parse_unquote(auth_storage_cookie.value)),
|
compat_urllib_parse_unquote(auth_storage_cookie.value)),
|
||||||
video_id, fatal=False) or {}
|
display_id, fatal=False) or {}
|
||||||
access_token = auth_storage.get('a') or auth_storage.get('access_token')
|
access_token = auth_storage.get('a') or auth_storage.get('access_token')
|
||||||
|
|
||||||
if not access_token:
|
if not access_token:
|
||||||
access_token = self._download_json(
|
access_token = self._download_json(
|
||||||
'https://%s.com/anonymous' % site, display_id, query={
|
'https://%s.com/anonymous' % site, display_id,
|
||||||
|
'Downloading token JSON metadata', query={
|
||||||
'authRel': 'authorization',
|
'authRel': 'authorization',
|
||||||
'client_id': try_get(
|
'client_id': '3020a40c2356a645b4b4',
|
||||||
react_data, lambda x: x['application']['apiClientId'],
|
|
||||||
compat_str) or '3020a40c2356a645b4b4',
|
|
||||||
'nonce': ''.join([random.choice(string.ascii_letters) for _ in range(32)]),
|
'nonce': ''.join([random.choice(string.ascii_letters) for _ in range(32)]),
|
||||||
'redirectUri': 'https://fusion.ddmcdn.com/app/mercury-sdk/180/redirectHandler.html?https://www.%s.com' % site,
|
'redirectUri': 'https://www.discovery.com/',
|
||||||
})['access_token']
|
})['access_token']
|
||||||
|
|
||||||
try:
|
headers = self.geo_verification_headers()
|
||||||
headers = self.geo_verification_headers()
|
headers['Authorization'] = 'Bearer ' + access_token
|
||||||
headers['Authorization'] = 'Bearer ' + access_token
|
|
||||||
|
|
||||||
|
try:
|
||||||
|
video = self._download_json(
|
||||||
|
self._API_BASE_URL + 'content/videos',
|
||||||
|
display_id, 'Downloading content JSON metadata',
|
||||||
|
headers=headers, query={
|
||||||
|
'embed': 'show.name',
|
||||||
|
'fields': 'authenticated,description.detailed,duration,episodeNumber,id,name,parental.rating,season.number,show,tags',
|
||||||
|
'slug': display_id,
|
||||||
|
'show_slug': show_slug,
|
||||||
|
})[0]
|
||||||
|
video_id = video['id']
|
||||||
stream = self._download_json(
|
stream = self._download_json(
|
||||||
'https://api.discovery.com/v1/streaming/video/' + video_id,
|
self._API_BASE_URL + 'streaming/video/' + video_id,
|
||||||
display_id, headers=headers)
|
display_id, 'Downloading streaming JSON metadata', headers=headers)
|
||||||
except ExtractorError as e:
|
except ExtractorError as e:
|
||||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403):
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403):
|
||||||
e_description = self._parse_json(
|
e_description = self._parse_json(
|
||||||
|
@ -3,63 +3,38 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .brightcove import BrightcoveLegacyIE
|
|
||||||
from .dplay import DPlayIE
|
from .dplay import DPlayIE
|
||||||
from ..compat import (
|
|
||||||
compat_parse_qs,
|
|
||||||
compat_urlparse,
|
|
||||||
)
|
|
||||||
from ..utils import smuggle_url
|
|
||||||
|
|
||||||
|
|
||||||
class DiscoveryNetworksDeIE(DPlayIE):
|
class DiscoveryNetworksDeIE(DPlayIE):
|
||||||
_VALID_URL = r'''(?x)https?://(?:www\.)?(?P<site>discovery|tlc|animalplanet|dmax)\.de/
|
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:tlc|dmax)\.de|dplay\.co\.uk)/(?:programme|show)/(?P<programme>[^/]+)/video/(?P<alternate_id>[^/]+)'
|
||||||
(?:
|
|
||||||
.*\#(?P<id>\d+)|
|
|
||||||
(?:[^/]+/)*videos/(?P<display_id>[^/?#]+)|
|
|
||||||
programme/(?P<programme>[^/]+)/video/(?P<alternate_id>[^/]+)
|
|
||||||
)'''
|
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.tlc.de/sendungen/breaking-amish/videos/#3235167922001',
|
'url': 'https://www.tlc.de/programme/breaking-amish/video/die-welt-da-drauen/DCB331270001100',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '3235167922001',
|
'id': '78867',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Breaking Amish: Die Welt da draußen',
|
'title': 'Die Welt da draußen',
|
||||||
'description': (
|
'description': 'md5:61033c12b73286e409d99a41742ef608',
|
||||||
'Vier Amische und eine Mennonitin wagen in New York'
|
'timestamp': 1554069600,
|
||||||
' den Sprung in ein komplett anderes Leben. Begleitet sie auf'
|
'upload_date': '20190331',
|
||||||
' ihrem spannenden Weg.'),
|
},
|
||||||
'timestamp': 1396598084,
|
'params': {
|
||||||
'upload_date': '20140404',
|
'format': 'bestvideo',
|
||||||
'uploader_id': '1659832546',
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.dmax.de/programme/storage-hunters-uk/videos/storage-hunters-uk-episode-6/',
|
'url': 'https://www.dmax.de/programme/dmax-highlights/video/tuning-star-sidney-hoffmann-exklusiv-bei-dmax/191023082312316',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.discovery.de/#5332316765001',
|
'url': 'https://www.dplay.co.uk/show/ghost-adventures/video/hotel-leger-103620/EHD_280313B',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1659832546/default_default/index.html?videoId=%s'
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
domain, programme, alternate_id = re.match(self._VALID_URL, url).groups()
|
||||||
alternate_id = mobj.group('alternate_id')
|
country = 'GB' if domain == 'dplay.co.uk' else 'DE'
|
||||||
if alternate_id:
|
realm = 'questuk' if country == 'GB' else domain.replace('.', '')
|
||||||
self._initialize_geo_bypass({
|
return self._get_disco_api_info(
|
||||||
'countries': ['DE'],
|
url, '%s/%s' % (programme, alternate_id),
|
||||||
})
|
'sonic-eu1-prod.disco-api.com', realm, country)
|
||||||
return self._get_disco_api_info(
|
|
||||||
url, '%s/%s' % (mobj.group('programme'), alternate_id),
|
|
||||||
'sonic-eu1-prod.disco-api.com', mobj.group('site') + 'de')
|
|
||||||
brightcove_id = mobj.group('id')
|
|
||||||
if not brightcove_id:
|
|
||||||
title = mobj.group('title')
|
|
||||||
webpage = self._download_webpage(url, title)
|
|
||||||
brightcove_legacy_url = BrightcoveLegacyIE._extract_brightcove_url(webpage)
|
|
||||||
brightcove_id = compat_parse_qs(compat_urlparse.urlparse(
|
|
||||||
brightcove_legacy_url).query)['@videoPlayer'][0]
|
|
||||||
return self.url_result(smuggle_url(
|
|
||||||
self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, {'geo_countries': ['DE']}),
|
|
||||||
'BrightcoveNew', brightcove_id)
|
|
||||||
|
97
youtube_dl/extractor/dlive.py
Normal file
97
youtube_dl/extractor/dlive.py
Normal file
@ -0,0 +1,97 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import int_or_none
|
||||||
|
|
||||||
|
|
||||||
|
class DLiveVODIE(InfoExtractor):
|
||||||
|
IE_NAME = 'dlive:vod'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?dlive\.tv/p/(?P<uploader_id>.+?)\+(?P<id>[^/?#&]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://dlive.tv/p/pdp+3mTzOl4WR',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3mTzOl4WR',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Minecraft with james charles epic',
|
||||||
|
'upload_date': '20190701',
|
||||||
|
'timestamp': 1562011015,
|
||||||
|
'uploader_id': 'pdp',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://dlive.tv/p/pdpreplay+D-RD-xSZg',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
uploader_id, vod_id = re.match(self._VALID_URL, url).groups()
|
||||||
|
broadcast = self._download_json(
|
||||||
|
'https://graphigo.prd.dlive.tv/', vod_id,
|
||||||
|
data=json.dumps({'query': '''query {
|
||||||
|
pastBroadcast(permlink:"%s+%s") {
|
||||||
|
content
|
||||||
|
createdAt
|
||||||
|
length
|
||||||
|
playbackUrl
|
||||||
|
title
|
||||||
|
thumbnailUrl
|
||||||
|
viewCount
|
||||||
|
}
|
||||||
|
}''' % (uploader_id, vod_id)}).encode())['data']['pastBroadcast']
|
||||||
|
title = broadcast['title']
|
||||||
|
formats = self._extract_m3u8_formats(
|
||||||
|
broadcast['playbackUrl'], vod_id, 'mp4', 'm3u8_native')
|
||||||
|
self._sort_formats(formats)
|
||||||
|
return {
|
||||||
|
'id': vod_id,
|
||||||
|
'title': title,
|
||||||
|
'uploader_id': uploader_id,
|
||||||
|
'formats': formats,
|
||||||
|
'description': broadcast.get('content'),
|
||||||
|
'thumbnail': broadcast.get('thumbnailUrl'),
|
||||||
|
'timestamp': int_or_none(broadcast.get('createdAt'), 1000),
|
||||||
|
'view_count': int_or_none(broadcast.get('viewCount')),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class DLiveStreamIE(InfoExtractor):
|
||||||
|
IE_NAME = 'dlive:stream'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?dlive\.tv/(?!p/)(?P<id>[\w.-]+)'
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_name = self._match_id(url)
|
||||||
|
user = self._download_json(
|
||||||
|
'https://graphigo.prd.dlive.tv/', display_name,
|
||||||
|
data=json.dumps({'query': '''query {
|
||||||
|
userByDisplayName(displayname:"%s") {
|
||||||
|
livestream {
|
||||||
|
content
|
||||||
|
createdAt
|
||||||
|
title
|
||||||
|
thumbnailUrl
|
||||||
|
watchingCount
|
||||||
|
}
|
||||||
|
username
|
||||||
|
}
|
||||||
|
}''' % display_name}).encode())['data']['userByDisplayName']
|
||||||
|
livestream = user['livestream']
|
||||||
|
title = livestream['title']
|
||||||
|
username = user['username']
|
||||||
|
formats = self._extract_m3u8_formats(
|
||||||
|
'https://live.prd.dlive.tv/hls/live/%s.m3u8' % username,
|
||||||
|
display_name, 'mp4')
|
||||||
|
self._sort_formats(formats)
|
||||||
|
return {
|
||||||
|
'id': display_name,
|
||||||
|
'title': self._live_title(title),
|
||||||
|
'uploader': display_name,
|
||||||
|
'uploader_id': username,
|
||||||
|
'formats': formats,
|
||||||
|
'description': livestream.get('content'),
|
||||||
|
'thumbnail': livestream.get('thumbnailUrl'),
|
||||||
|
'is_live': True,
|
||||||
|
'timestamp': int_or_none(livestream.get('createdAt'), 1000),
|
||||||
|
'view_count': int_or_none(livestream.get('watchingCount')),
|
||||||
|
}
|
@ -1,74 +1,68 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import json
|
|
||||||
import re
|
import re
|
||||||
import time
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import compat_HTTPError
|
||||||
compat_HTTPError,
|
|
||||||
compat_str,
|
|
||||||
compat_urlparse,
|
|
||||||
)
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
remove_end,
|
|
||||||
try_get,
|
|
||||||
unified_strdate,
|
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
update_url_query,
|
|
||||||
urljoin,
|
|
||||||
USER_AGENTS,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class DPlayIE(InfoExtractor):
|
class DPlayIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?P<domain>www\.(?P<host>dplay\.(?P<country>dk|se|no)))/(?:video(?:er|s)/)?(?P<id>[^/]+/[^/?#]+)'
|
_VALID_URL = r'''(?x)https?://
|
||||||
|
(?P<domain>
|
||||||
|
(?:www\.)?(?P<host>dplay\.(?P<country>dk|fi|jp|se|no))|
|
||||||
|
(?P<subdomain_country>es|it)\.dplay\.com
|
||||||
|
)/[^/]+/(?P<id>[^/]+/[^/?#]+)'''
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# non geo restricted, via secure api, unsigned download hls URL
|
# non geo restricted, via secure api, unsigned download hls URL
|
||||||
'url': 'http://www.dplay.se/nugammalt-77-handelser-som-format-sverige/season-1-svensken-lar-sig-njuta-av-livet/',
|
'url': 'https://www.dplay.se/videos/nugammalt-77-handelser-som-format-sverige/nugammalt-77-handelser-som-format-sverige-101',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '3172',
|
'id': '13628',
|
||||||
'display_id': 'nugammalt-77-handelser-som-format-sverige/season-1-svensken-lar-sig-njuta-av-livet',
|
'display_id': 'nugammalt-77-handelser-som-format-sverige/nugammalt-77-handelser-som-format-sverige-101',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Svensken lär sig njuta av livet',
|
'title': 'Svensken lär sig njuta av livet',
|
||||||
'description': 'md5:d3819c9bccffd0fe458ca42451dd50d8',
|
'description': 'md5:d3819c9bccffd0fe458ca42451dd50d8',
|
||||||
'duration': 2650,
|
'duration': 2649.856,
|
||||||
'timestamp': 1365454320,
|
'timestamp': 1365453720,
|
||||||
'upload_date': '20130408',
|
'upload_date': '20130408',
|
||||||
'creator': 'Kanal 5 (Home)',
|
'creator': 'Kanal 5',
|
||||||
'series': 'Nugammalt - 77 händelser som format Sverige',
|
'series': 'Nugammalt - 77 händelser som format Sverige',
|
||||||
'season_number': 1,
|
'season_number': 1,
|
||||||
'episode_number': 1,
|
'episode_number': 1,
|
||||||
'age_limit': 0,
|
},
|
||||||
|
'params': {
|
||||||
|
'format': 'bestvideo',
|
||||||
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# geo restricted, via secure api, unsigned download hls URL
|
# geo restricted, via secure api, unsigned download hls URL
|
||||||
'url': 'http://www.dplay.dk/mig-og-min-mor/season-6-episode-12/',
|
'url': 'http://www.dplay.dk/videoer/ted-bundy-mind-of-a-monster/ted-bundy-mind-of-a-monster',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '70816',
|
'id': '104465',
|
||||||
'display_id': 'mig-og-min-mor/season-6-episode-12',
|
'display_id': 'ted-bundy-mind-of-a-monster/ted-bundy-mind-of-a-monster',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Episode 12',
|
'title': 'Ted Bundy: Mind Of A Monster',
|
||||||
'description': 'md5:9c86e51a93f8a4401fc9641ef9894c90',
|
'description': 'md5:8b780f6f18de4dae631668b8a9637995',
|
||||||
'duration': 2563,
|
'duration': 5290.027,
|
||||||
'timestamp': 1429696800,
|
'timestamp': 1570694400,
|
||||||
'upload_date': '20150422',
|
'upload_date': '20191010',
|
||||||
'creator': 'Kanal 4 (Home)',
|
'creator': 'ID - Investigation Discovery',
|
||||||
'series': 'Mig og min mor',
|
'series': 'Ted Bundy: Mind Of A Monster',
|
||||||
'season_number': 6,
|
'season_number': 1,
|
||||||
'episode_number': 12,
|
'episode_number': 1,
|
||||||
'age_limit': 0,
|
},
|
||||||
|
'params': {
|
||||||
|
'format': 'bestvideo',
|
||||||
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
}, {
|
|
||||||
# geo restricted, via direct unsigned hls URL
|
|
||||||
'url': 'http://www.dplay.no/pga-tour/season-1-hoydepunkter-18-21-februar/',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
}, {
|
||||||
# disco-api
|
# disco-api
|
||||||
'url': 'https://www.dplay.no/videoer/i-kongens-klr/sesong-1-episode-7',
|
'url': 'https://www.dplay.no/videoer/i-kongens-klr/sesong-1-episode-7',
|
||||||
@ -89,19 +83,59 @@ class DPlayIE(InfoExtractor):
|
|||||||
'format': 'bestvideo',
|
'format': 'bestvideo',
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
'skip': 'Available for Premium users',
|
||||||
}, {
|
}, {
|
||||||
|
'url': 'http://it.dplay.com/nove/biografie-imbarazzanti/luigi-di-maio-la-psicosi-di-stanislawskij/',
|
||||||
'url': 'https://www.dplay.dk/videoer/singleliv/season-5-episode-3',
|
'md5': '2b808ffb00fc47b884a172ca5d13053c',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6918',
|
||||||
|
'display_id': 'biografie-imbarazzanti/luigi-di-maio-la-psicosi-di-stanislawskij',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Luigi Di Maio: la psicosi di Stanislawskij',
|
||||||
|
'description': 'md5:3c7a4303aef85868f867a26f5cc14813',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpe?g',
|
||||||
|
'upload_date': '20160524',
|
||||||
|
'timestamp': 1464076800,
|
||||||
|
'series': 'Biografie imbarazzanti',
|
||||||
|
'season_number': 1,
|
||||||
|
'episode': 'Episode 1',
|
||||||
|
'episode_number': 1,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://es.dplay.com/dmax/la-fiebre-del-oro/temporada-8-episodio-1/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '21652',
|
||||||
|
'display_id': 'la-fiebre-del-oro/temporada-8-episodio-1',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Episodio 1',
|
||||||
|
'description': 'md5:b9dcff2071086e003737485210675f69',
|
||||||
|
'thumbnail': r're:^https?://.*\.png',
|
||||||
|
'upload_date': '20180709',
|
||||||
|
'timestamp': 1531173540,
|
||||||
|
'series': 'La fiebre del oro',
|
||||||
|
'season_number': 8,
|
||||||
|
'episode': 'Episode 1',
|
||||||
|
'episode_number': 1,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.dplay.fi/videot/shifting-gears-with-aaron-kaufman/episode-16',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.dplay.se/videos/sofias-anglar/sofias-anglar-1001',
|
'url': 'https://www.dplay.jp/video/gold-rush/24086',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _get_disco_api_info(self, url, display_id, disco_host, realm):
|
def _get_disco_api_info(self, url, display_id, disco_host, realm, country):
|
||||||
disco_base = 'https://' + disco_host
|
geo_countries = [country.upper()]
|
||||||
|
self._initialize_geo_bypass({
|
||||||
|
'countries': geo_countries,
|
||||||
|
})
|
||||||
|
disco_base = 'https://%s/' % disco_host
|
||||||
token = self._download_json(
|
token = self._download_json(
|
||||||
'%s/token' % disco_base, display_id, 'Downloading token',
|
disco_base + 'token', display_id, 'Downloading token',
|
||||||
query={
|
query={
|
||||||
'realm': realm,
|
'realm': realm,
|
||||||
})['data']['attributes']['token']
|
})['data']['attributes']['token']
|
||||||
@ -110,17 +144,35 @@ class DPlayIE(InfoExtractor):
|
|||||||
'Authorization': 'Bearer ' + token,
|
'Authorization': 'Bearer ' + token,
|
||||||
}
|
}
|
||||||
video = self._download_json(
|
video = self._download_json(
|
||||||
'%s/content/videos/%s' % (disco_base, display_id), display_id,
|
disco_base + 'content/videos/' + display_id, display_id,
|
||||||
headers=headers, query={
|
headers=headers, query={
|
||||||
'include': 'show'
|
'fields[channel]': 'name',
|
||||||
|
'fields[image]': 'height,src,width',
|
||||||
|
'fields[show]': 'name',
|
||||||
|
'fields[tag]': 'name',
|
||||||
|
'fields[video]': 'description,episodeNumber,name,publishStart,seasonNumber,videoDuration',
|
||||||
|
'include': 'images,primaryChannel,show,tags'
|
||||||
})
|
})
|
||||||
video_id = video['data']['id']
|
video_id = video['data']['id']
|
||||||
info = video['data']['attributes']
|
info = video['data']['attributes']
|
||||||
title = info['name']
|
title = info['name'].strip()
|
||||||
formats = []
|
formats = []
|
||||||
for format_id, format_dict in self._download_json(
|
try:
|
||||||
'%s/playback/videoPlaybackInfo/%s' % (disco_base, video_id),
|
streaming = self._download_json(
|
||||||
display_id, headers=headers)['data']['attributes']['streaming'].items():
|
disco_base + 'playback/videoPlaybackInfo/' + video_id,
|
||||||
|
display_id, headers=headers)['data']['attributes']['streaming']
|
||||||
|
except ExtractorError as e:
|
||||||
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||||
|
info = self._parse_json(e.cause.read().decode('utf-8'), display_id)
|
||||||
|
error = info['errors'][0]
|
||||||
|
error_code = error.get('code')
|
||||||
|
if error_code == 'access.denied.geoblocked':
|
||||||
|
self.raise_geo_restricted(countries=geo_countries)
|
||||||
|
elif error_code == 'access.denied.missingpackage':
|
||||||
|
self.raise_login_required()
|
||||||
|
raise ExtractorError(info['errors'][0]['detail'], expected=True)
|
||||||
|
raise
|
||||||
|
for format_id, format_dict in streaming.items():
|
||||||
if not isinstance(format_dict, dict):
|
if not isinstance(format_dict, dict):
|
||||||
continue
|
continue
|
||||||
format_url = format_dict.get('url')
|
format_url = format_dict.get('url')
|
||||||
@ -142,235 +194,54 @@ class DPlayIE(InfoExtractor):
|
|||||||
})
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
series = None
|
creator = series = None
|
||||||
try:
|
tags = []
|
||||||
included = video.get('included')
|
thumbnails = []
|
||||||
if isinstance(included, list):
|
included = video.get('included') or []
|
||||||
show = next(e for e in included if e.get('type') == 'show')
|
if isinstance(included, list):
|
||||||
series = try_get(
|
for e in included:
|
||||||
show, lambda x: x['attributes']['name'], compat_str)
|
attributes = e.get('attributes')
|
||||||
except StopIteration:
|
if not attributes:
|
||||||
pass
|
continue
|
||||||
|
e_type = e.get('type')
|
||||||
|
if e_type == 'channel':
|
||||||
|
creator = attributes.get('name')
|
||||||
|
elif e_type == 'image':
|
||||||
|
src = attributes.get('src')
|
||||||
|
if src:
|
||||||
|
thumbnails.append({
|
||||||
|
'url': src,
|
||||||
|
'width': int_or_none(attributes.get('width')),
|
||||||
|
'height': int_or_none(attributes.get('height')),
|
||||||
|
})
|
||||||
|
if e_type == 'show':
|
||||||
|
series = attributes.get('name')
|
||||||
|
elif e_type == 'tag':
|
||||||
|
name = attributes.get('name')
|
||||||
|
if name:
|
||||||
|
tags.append(name)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': info.get('description'),
|
'description': info.get('description'),
|
||||||
'duration': float_or_none(
|
'duration': float_or_none(info.get('videoDuration'), 1000),
|
||||||
info.get('videoDuration'), scale=1000),
|
|
||||||
'timestamp': unified_timestamp(info.get('publishStart')),
|
'timestamp': unified_timestamp(info.get('publishStart')),
|
||||||
'series': series,
|
'series': series,
|
||||||
'season_number': int_or_none(info.get('seasonNumber')),
|
'season_number': int_or_none(info.get('seasonNumber')),
|
||||||
'episode_number': int_or_none(info.get('episodeNumber')),
|
'episode_number': int_or_none(info.get('episodeNumber')),
|
||||||
'age_limit': int_or_none(info.get('minimum_age')),
|
'creator': creator,
|
||||||
|
'tags': tags,
|
||||||
|
'thumbnails': thumbnails,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
display_id = mobj.group('id')
|
display_id = mobj.group('id')
|
||||||
domain = mobj.group('domain')
|
domain = mobj.group('domain').lstrip('www.')
|
||||||
|
country = mobj.group('country') or mobj.group('subdomain_country')
|
||||||
self._initialize_geo_bypass({
|
host = 'disco-api.' + domain if domain.startswith('dplay.') else 'eu2-prod.disco-api.com'
|
||||||
'countries': [mobj.group('country').upper()],
|
return self._get_disco_api_info(
|
||||||
})
|
url, display_id, host, 'dplay' + country, country)
|
||||||
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
|
||||||
|
|
||||||
video_id = self._search_regex(
|
|
||||||
r'data-video-id=["\'](\d+)', webpage, 'video id', default=None)
|
|
||||||
|
|
||||||
if not video_id:
|
|
||||||
host = mobj.group('host')
|
|
||||||
return self._get_disco_api_info(
|
|
||||||
url, display_id, 'disco-api.' + host, host.replace('.', ''))
|
|
||||||
|
|
||||||
info = self._download_json(
|
|
||||||
'http://%s/api/v2/ajax/videos?video_id=%s' % (domain, video_id),
|
|
||||||
video_id)['data'][0]
|
|
||||||
|
|
||||||
title = info['title']
|
|
||||||
|
|
||||||
PROTOCOLS = ('hls', 'hds')
|
|
||||||
formats = []
|
|
||||||
|
|
||||||
def extract_formats(protocol, manifest_url):
|
|
||||||
if protocol == 'hls':
|
|
||||||
m3u8_formats = self._extract_m3u8_formats(
|
|
||||||
manifest_url, video_id, ext='mp4',
|
|
||||||
entry_protocol='m3u8_native', m3u8_id=protocol, fatal=False)
|
|
||||||
# Sometimes final URLs inside m3u8 are unsigned, let's fix this
|
|
||||||
# ourselves. Also fragments' URLs are only served signed for
|
|
||||||
# Safari user agent.
|
|
||||||
query = compat_urlparse.parse_qs(compat_urlparse.urlparse(manifest_url).query)
|
|
||||||
for m3u8_format in m3u8_formats:
|
|
||||||
m3u8_format.update({
|
|
||||||
'url': update_url_query(m3u8_format['url'], query),
|
|
||||||
'http_headers': {
|
|
||||||
'User-Agent': USER_AGENTS['Safari'],
|
|
||||||
},
|
|
||||||
})
|
|
||||||
formats.extend(m3u8_formats)
|
|
||||||
elif protocol == 'hds':
|
|
||||||
formats.extend(self._extract_f4m_formats(
|
|
||||||
manifest_url + '&hdcore=3.8.0&plugin=flowplayer-3.8.0.0',
|
|
||||||
video_id, f4m_id=protocol, fatal=False))
|
|
||||||
|
|
||||||
domain_tld = domain.split('.')[-1]
|
|
||||||
if domain_tld in ('se', 'dk', 'no'):
|
|
||||||
for protocol in PROTOCOLS:
|
|
||||||
# Providing dsc-geo allows to bypass geo restriction in some cases
|
|
||||||
self._set_cookie(
|
|
||||||
'secure.dplay.%s' % domain_tld, 'dsc-geo',
|
|
||||||
json.dumps({
|
|
||||||
'countryCode': domain_tld.upper(),
|
|
||||||
'expiry': (time.time() + 20 * 60) * 1000,
|
|
||||||
}))
|
|
||||||
stream = self._download_json(
|
|
||||||
'https://secure.dplay.%s/secure/api/v2/user/authorization/stream/%s?stream_type=%s'
|
|
||||||
% (domain_tld, video_id, protocol), video_id,
|
|
||||||
'Downloading %s stream JSON' % protocol, fatal=False)
|
|
||||||
if stream and stream.get(protocol):
|
|
||||||
extract_formats(protocol, stream[protocol])
|
|
||||||
|
|
||||||
# The last resort is to try direct unsigned hls/hds URLs from info dictionary.
|
|
||||||
# Sometimes this does work even when secure API with dsc-geo has failed (e.g.
|
|
||||||
# http://www.dplay.no/pga-tour/season-1-hoydepunkter-18-21-februar/).
|
|
||||||
if not formats:
|
|
||||||
for protocol in PROTOCOLS:
|
|
||||||
if info.get(protocol):
|
|
||||||
extract_formats(protocol, info[protocol])
|
|
||||||
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
subtitles = {}
|
|
||||||
for lang in ('se', 'sv', 'da', 'nl', 'no'):
|
|
||||||
for format_id in ('web_vtt', 'vtt', 'srt'):
|
|
||||||
subtitle_url = info.get('subtitles_%s_%s' % (lang, format_id))
|
|
||||||
if subtitle_url:
|
|
||||||
subtitles.setdefault(lang, []).append({'url': subtitle_url})
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'display_id': display_id,
|
|
||||||
'title': title,
|
|
||||||
'description': info.get('video_metadata_longDescription'),
|
|
||||||
'duration': int_or_none(info.get('video_metadata_length'), scale=1000),
|
|
||||||
'timestamp': int_or_none(info.get('video_publish_date')),
|
|
||||||
'creator': info.get('video_metadata_homeChannel'),
|
|
||||||
'series': info.get('video_metadata_show'),
|
|
||||||
'season_number': int_or_none(info.get('season')),
|
|
||||||
'episode_number': int_or_none(info.get('episode')),
|
|
||||||
'age_limit': int_or_none(info.get('minimum_age')),
|
|
||||||
'formats': formats,
|
|
||||||
'subtitles': subtitles,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class DPlayItIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://it\.dplay\.com/[^/]+/[^/]+/(?P<id>[^/?#]+)'
|
|
||||||
_GEO_COUNTRIES = ['IT']
|
|
||||||
_TEST = {
|
|
||||||
'url': 'http://it.dplay.com/nove/biografie-imbarazzanti/luigi-di-maio-la-psicosi-di-stanislawskij/',
|
|
||||||
'md5': '2b808ffb00fc47b884a172ca5d13053c',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '6918',
|
|
||||||
'display_id': 'luigi-di-maio-la-psicosi-di-stanislawskij',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Biografie imbarazzanti: Luigi Di Maio: la psicosi di Stanislawskij',
|
|
||||||
'description': 'md5:3c7a4303aef85868f867a26f5cc14813',
|
|
||||||
'thumbnail': r're:^https?://.*\.jpe?g',
|
|
||||||
'upload_date': '20160524',
|
|
||||||
'series': 'Biografie imbarazzanti',
|
|
||||||
'season_number': 1,
|
|
||||||
'episode': 'Luigi Di Maio: la psicosi di Stanislawskij',
|
|
||||||
'episode_number': 1,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
display_id = self._match_id(url)
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
|
||||||
|
|
||||||
title = remove_end(self._og_search_title(webpage), ' | Dplay')
|
|
||||||
|
|
||||||
video_id = None
|
|
||||||
|
|
||||||
info = self._search_regex(
|
|
||||||
r'playback_json\s*:\s*JSON\.parse\s*\(\s*("(?:\\.|[^"\\])+?")',
|
|
||||||
webpage, 'playback JSON', default=None)
|
|
||||||
if info:
|
|
||||||
for _ in range(2):
|
|
||||||
info = self._parse_json(info, display_id, fatal=False)
|
|
||||||
if not info:
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
video_id = try_get(info, lambda x: x['data']['id'])
|
|
||||||
|
|
||||||
if not info:
|
|
||||||
info_url = self._search_regex(
|
|
||||||
(r'playback_json_url\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1',
|
|
||||||
r'url\s*[:=]\s*["\'](?P<url>(?:https?:)?//[^/]+/playback/videoPlaybackInfo/\d+)'),
|
|
||||||
webpage, 'info url', group='url')
|
|
||||||
|
|
||||||
info_url = urljoin(url, info_url)
|
|
||||||
video_id = info_url.rpartition('/')[-1]
|
|
||||||
|
|
||||||
try:
|
|
||||||
info = self._download_json(
|
|
||||||
info_url, display_id, headers={
|
|
||||||
'Authorization': 'Bearer %s' % self._get_cookies(url).get(
|
|
||||||
'dplayit_token').value,
|
|
||||||
'Referer': url,
|
|
||||||
})
|
|
||||||
if isinstance(info, compat_str):
|
|
||||||
info = self._parse_json(info, display_id)
|
|
||||||
except ExtractorError as e:
|
|
||||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 403):
|
|
||||||
info = self._parse_json(e.cause.read().decode('utf-8'), display_id)
|
|
||||||
error = info['errors'][0]
|
|
||||||
if error.get('code') == 'access.denied.geoblocked':
|
|
||||||
self.raise_geo_restricted(
|
|
||||||
msg=error.get('detail'), countries=self._GEO_COUNTRIES)
|
|
||||||
raise ExtractorError(info['errors'][0]['detail'], expected=True)
|
|
||||||
raise
|
|
||||||
|
|
||||||
hls_url = info['data']['attributes']['streaming']['hls']['url']
|
|
||||||
|
|
||||||
formats = self._extract_m3u8_formats(
|
|
||||||
hls_url, display_id, ext='mp4', entry_protocol='m3u8_native',
|
|
||||||
m3u8_id='hls')
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
series = self._html_search_regex(
|
|
||||||
r'(?s)<h1[^>]+class=["\'].*?\bshow_title\b.*?["\'][^>]*>(.+?)</h1>',
|
|
||||||
webpage, 'series', fatal=False)
|
|
||||||
episode = self._search_regex(
|
|
||||||
r'<p[^>]+class=["\'].*?\bdesc_ep\b.*?["\'][^>]*>\s*<br/>\s*<b>([^<]+)',
|
|
||||||
webpage, 'episode', fatal=False)
|
|
||||||
|
|
||||||
mobj = re.search(
|
|
||||||
r'(?s)<span[^>]+class=["\']dates["\'][^>]*>.+?\bS\.(?P<season_number>\d+)\s+E\.(?P<episode_number>\d+)\s*-\s*(?P<upload_date>\d{2}/\d{2}/\d{4})',
|
|
||||||
webpage)
|
|
||||||
if mobj:
|
|
||||||
season_number = int(mobj.group('season_number'))
|
|
||||||
episode_number = int(mobj.group('episode_number'))
|
|
||||||
upload_date = unified_strdate(mobj.group('upload_date'))
|
|
||||||
else:
|
|
||||||
season_number = episode_number = upload_date = None
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': compat_str(video_id or display_id),
|
|
||||||
'display_id': display_id,
|
|
||||||
'title': title,
|
|
||||||
'description': self._og_search_description(webpage),
|
|
||||||
'thumbnail': self._og_search_thumbnail(webpage),
|
|
||||||
'series': series,
|
|
||||||
'season_number': season_number,
|
|
||||||
'episode': episode,
|
|
||||||
'episode_number': episode_number,
|
|
||||||
'upload_date': upload_date,
|
|
||||||
'formats': formats,
|
|
||||||
}
|
|
||||||
|
@ -17,6 +17,7 @@ from ..utils import (
|
|||||||
float_or_none,
|
float_or_none,
|
||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
|
try_get,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
@ -24,7 +25,14 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class DRTVIE(InfoExtractor):
|
class DRTVIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv/se|nyheder|radio(?:/ondemand)?)/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)'
|
_VALID_URL = r'''(?x)
|
||||||
|
https?://
|
||||||
|
(?:
|
||||||
|
(?:www\.)?dr\.dk/(?:tv/se|nyheder|radio(?:/ondemand)?)/(?:[^/]+/)*|
|
||||||
|
(?:www\.)?(?:dr\.dk|dr-massive\.com)/drtv/(?:se|episode)/
|
||||||
|
)
|
||||||
|
(?P<id>[\da-z_-]+)
|
||||||
|
'''
|
||||||
_GEO_BYPASS = False
|
_GEO_BYPASS = False
|
||||||
_GEO_COUNTRIES = ['DK']
|
_GEO_COUNTRIES = ['DK']
|
||||||
IE_NAME = 'drtv'
|
IE_NAME = 'drtv'
|
||||||
@ -83,6 +91,26 @@ class DRTVIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'https://www.dr.dk/radio/p4kbh/regionale-nyheder-kh4/p4-nyheder-2019-06-26-17-30-9',
|
'url': 'https://www.dr.dk/radio/p4kbh/regionale-nyheder-kh4/p4-nyheder-2019-06-26-17-30-9',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.dr.dk/drtv/se/bonderoeven_71769',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '00951930010',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Bonderøven (1:8)',
|
||||||
|
'description': 'md5:3cf18fc0d3b205745d4505f896af8121',
|
||||||
|
'timestamp': 1546542000,
|
||||||
|
'upload_date': '20190103',
|
||||||
|
'duration': 2576.6,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.dr.dk/drtv/episode/bonderoeven_71769',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://dr-massive.com/drtv/se/bonderoeven_71769',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@ -100,13 +128,32 @@ class DRTVIE(InfoExtractor):
|
|||||||
webpage, 'video id', default=None)
|
webpage, 'video id', default=None)
|
||||||
|
|
||||||
if not video_id:
|
if not video_id:
|
||||||
video_id = compat_urllib_parse_unquote(self._search_regex(
|
video_id = self._search_regex(
|
||||||
r'(urn(?:%3A|:)dr(?:%3A|:)mu(?:%3A|:)programcard(?:%3A|:)[\da-f]+)',
|
r'(urn(?:%3A|:)dr(?:%3A|:)mu(?:%3A|:)programcard(?:%3A|:)[\da-f]+)',
|
||||||
webpage, 'urn'))
|
webpage, 'urn', default=None)
|
||||||
|
if video_id:
|
||||||
|
video_id = compat_urllib_parse_unquote(video_id)
|
||||||
|
|
||||||
|
_PROGRAMCARD_BASE = 'https://www.dr.dk/mu-online/api/1.4/programcard'
|
||||||
|
query = {'expanded': 'true'}
|
||||||
|
|
||||||
|
if video_id:
|
||||||
|
programcard_url = '%s/%s' % (_PROGRAMCARD_BASE, video_id)
|
||||||
|
else:
|
||||||
|
programcard_url = _PROGRAMCARD_BASE
|
||||||
|
page = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'data\s*=\s*({.+?})\s*(?:;|</script)', webpage,
|
||||||
|
'data'), '1')['cache']['page']
|
||||||
|
page = page[list(page.keys())[0]]
|
||||||
|
item = try_get(
|
||||||
|
page, (lambda x: x['item'], lambda x: x['entries'][0]['item']),
|
||||||
|
dict)
|
||||||
|
video_id = item['customId'].split(':')[-1]
|
||||||
|
query['productionnumber'] = video_id
|
||||||
|
|
||||||
data = self._download_json(
|
data = self._download_json(
|
||||||
'https://www.dr.dk/mu-online/api/1.4/programcard/%s' % video_id,
|
programcard_url, video_id, 'Downloading video JSON', query=query)
|
||||||
video_id, 'Downloading video JSON', query={'expanded': 'true'})
|
|
||||||
|
|
||||||
title = str_or_none(data.get('Title')) or re.sub(
|
title = str_or_none(data.get('Title')) or re.sub(
|
||||||
r'\s*\|\s*(?:TV\s*\|\s*DR|DRTV)$', '',
|
r'\s*\|\s*(?:TV\s*\|\s*DR|DRTV)$', '',
|
||||||
|
@ -1,20 +1,17 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_b64decode
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
qualities,
|
qualities,
|
||||||
sanitized_Request,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class DumpertIE(InfoExtractor):
|
class DumpertIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?P<protocol>https?)://(?:www\.)?dumpert\.nl/(?:mediabase|embed)/(?P<id>[0-9]+/[0-9a-zA-Z]+)'
|
_VALID_URL = r'(?P<protocol>https?)://(?:(?:www|legacy)\.)?dumpert\.nl/(?:mediabase|embed|item)/(?P<id>[0-9]+[/_][0-9a-zA-Z]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.dumpert.nl/mediabase/6646981/951bc60f/',
|
'url': 'https://www.dumpert.nl/item/6646981_951bc60f',
|
||||||
'md5': '1b9318d7d5054e7dcb9dc7654f21d643',
|
'md5': '1b9318d7d5054e7dcb9dc7654f21d643',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '6646981/951bc60f',
|
'id': '6646981/951bc60f',
|
||||||
@ -24,46 +21,60 @@ class DumpertIE(InfoExtractor):
|
|||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.dumpert.nl/embed/6675421/dc440fe7/',
|
'url': 'https://www.dumpert.nl/embed/6675421_dc440fe7',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://legacy.dumpert.nl/mediabase/6646981/951bc60f',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://legacy.dumpert.nl/embed/6675421/dc440fe7',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url).replace('_', '/')
|
||||||
video_id = mobj.group('id')
|
item = self._download_json(
|
||||||
protocol = mobj.group('protocol')
|
'http://api-live.dumpert.nl/mobile_api/json/info/' + video_id.replace('/', '_'),
|
||||||
|
video_id)['items'][0]
|
||||||
url = '%s://www.dumpert.nl/mediabase/%s' % (protocol, video_id)
|
title = item['title']
|
||||||
req = sanitized_Request(url)
|
media = next(m for m in item['media'] if m.get('mediatype') == 'VIDEO')
|
||||||
req.add_header('Cookie', 'nsfw=1; cpc=10')
|
|
||||||
webpage = self._download_webpage(req, video_id)
|
|
||||||
|
|
||||||
files_base64 = self._search_regex(
|
|
||||||
r'data-files="([^"]+)"', webpage, 'data files')
|
|
||||||
|
|
||||||
files = self._parse_json(
|
|
||||||
compat_b64decode(files_base64).decode('utf-8'),
|
|
||||||
video_id)
|
|
||||||
|
|
||||||
quality = qualities(['flv', 'mobile', 'tablet', '720p'])
|
quality = qualities(['flv', 'mobile', 'tablet', '720p'])
|
||||||
|
formats = []
|
||||||
formats = [{
|
for variant in media.get('variants', []):
|
||||||
'url': video_url,
|
uri = variant.get('uri')
|
||||||
'format_id': format_id,
|
if not uri:
|
||||||
'quality': quality(format_id),
|
continue
|
||||||
} for format_id, video_url in files.items() if format_id != 'still']
|
version = variant.get('version')
|
||||||
|
formats.append({
|
||||||
|
'url': uri,
|
||||||
|
'format_id': version,
|
||||||
|
'quality': quality(version),
|
||||||
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
title = self._html_search_meta(
|
thumbnails = []
|
||||||
'title', webpage) or self._og_search_title(webpage)
|
stills = item.get('stills') or {}
|
||||||
description = self._html_search_meta(
|
for t in ('thumb', 'still'):
|
||||||
'description', webpage) or self._og_search_description(webpage)
|
for s in ('', '-medium', '-large'):
|
||||||
thumbnail = files.get('still') or self._og_search_thumbnail(webpage)
|
still_id = t + s
|
||||||
|
still_url = stills.get(still_id)
|
||||||
|
if not still_url:
|
||||||
|
continue
|
||||||
|
thumbnails.append({
|
||||||
|
'id': still_id,
|
||||||
|
'url': still_url,
|
||||||
|
})
|
||||||
|
|
||||||
|
stats = item.get('stats') or {}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': description,
|
'description': item.get('description'),
|
||||||
'thumbnail': thumbnail,
|
'thumbnails': thumbnails,
|
||||||
'formats': formats
|
'formats': formats,
|
||||||
|
'duration': int_or_none(media.get('duration')),
|
||||||
|
'like_count': int_or_none(stats.get('kudos_total')),
|
||||||
|
'view_count': int_or_none(stats.get('views_total')),
|
||||||
}
|
}
|
||||||
|
@ -2,6 +2,7 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import json
|
import json
|
||||||
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
@ -18,7 +19,7 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class EinthusanIE(InfoExtractor):
|
class EinthusanIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://einthusan\.tv/movie/watch/(?P<id>[^/?#&]+)'
|
_VALID_URL = r'https?://(?P<host>einthusan\.(?:tv|com|ca))/movie/watch/(?P<id>[^/?#&]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://einthusan.tv/movie/watch/9097/',
|
'url': 'https://einthusan.tv/movie/watch/9097/',
|
||||||
'md5': 'ff0f7f2065031b8a2cf13a933731c035',
|
'md5': 'ff0f7f2065031b8a2cf13a933731c035',
|
||||||
@ -32,6 +33,12 @@ class EinthusanIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'https://einthusan.tv/movie/watch/51MZ/?lang=hindi',
|
'url': 'https://einthusan.tv/movie/watch/51MZ/?lang=hindi',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://einthusan.com/movie/watch/9097/',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://einthusan.ca/movie/watch/4E9n/?lang=hindi',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
# reversed from jsoncrypto.prototype.decrypt() in einthusan-PGMovieWatcher.js
|
# reversed from jsoncrypto.prototype.decrypt() in einthusan-PGMovieWatcher.js
|
||||||
@ -41,7 +48,9 @@ class EinthusanIE(InfoExtractor):
|
|||||||
)).decode('utf-8'), video_id)
|
)).decode('utf-8'), video_id)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
host = mobj.group('host')
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
@ -53,7 +62,7 @@ class EinthusanIE(InfoExtractor):
|
|||||||
page_id = self._html_search_regex(
|
page_id = self._html_search_regex(
|
||||||
'<html[^>]+data-pageid="([^"]+)"', webpage, 'page ID')
|
'<html[^>]+data-pageid="([^"]+)"', webpage, 'page ID')
|
||||||
video_data = self._download_json(
|
video_data = self._download_json(
|
||||||
'https://einthusan.tv/ajax/movie/watch/%s/' % video_id, video_id,
|
'https://%s/ajax/movie/watch/%s/' % (host, video_id), video_id,
|
||||||
data=urlencode_postdata({
|
data=urlencode_postdata({
|
||||||
'xEvent': 'UIVideoPlayer.PingOutcome',
|
'xEvent': 'UIVideoPlayer.PingOutcome',
|
||||||
'xJson': json.dumps({
|
'xJson': json.dumps({
|
||||||
|
@ -4,7 +4,6 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_str
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
encode_base_n,
|
encode_base_n,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
@ -55,7 +54,7 @@ class EpornerIE(InfoExtractor):
|
|||||||
|
|
||||||
webpage, urlh = self._download_webpage_handle(url, display_id)
|
webpage, urlh = self._download_webpage_handle(url, display_id)
|
||||||
|
|
||||||
video_id = self._match_id(compat_str(urlh.geturl()))
|
video_id = self._match_id(urlh.geturl())
|
||||||
|
|
||||||
hash = self._search_regex(
|
hash = self._search_regex(
|
||||||
r'hash\s*:\s*["\']([\da-f]{32})', webpage, 'hash')
|
r'hash\s*:\s*["\']([\da-f]{32})', webpage, 'hash')
|
||||||
|
@ -216,17 +216,14 @@ class FiveThirtyEightIE(InfoExtractor):
|
|||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://fivethirtyeight.com/features/how-the-6-8-raiders-can-still-make-the-playoffs/',
|
'url': 'http://fivethirtyeight.com/features/how-the-6-8-raiders-can-still-make-the-playoffs/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '21846851',
|
'id': '56032156',
|
||||||
'ext': 'mp4',
|
'ext': 'flv',
|
||||||
'title': 'FiveThirtyEight: The Raiders can still make the playoffs',
|
'title': 'FiveThirtyEight: The Raiders can still make the playoffs',
|
||||||
'description': 'Neil Paine breaks down the simplest scenario that will put the Raiders into the playoffs at 8-8.',
|
'description': 'Neil Paine breaks down the simplest scenario that will put the Raiders into the playoffs at 8-8.',
|
||||||
'timestamp': 1513960621,
|
|
||||||
'upload_date': '20171222',
|
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
'expected_warnings': ['Unable to download f4m manifest'],
|
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@ -234,9 +231,8 @@ class FiveThirtyEightIE(InfoExtractor):
|
|||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
video_id = self._search_regex(
|
embed_url = self._search_regex(
|
||||||
r'data-video-id=["\'](?P<id>\d+)',
|
r'<iframe[^>]+src=["\'](https?://fivethirtyeight\.abcnews\.go\.com/video/embed/\d+/\d+)',
|
||||||
webpage, 'video id', group='id')
|
webpage, 'embed url')
|
||||||
|
|
||||||
return self.url_result(
|
return self.url_result(embed_url, 'AbcNewsVideo')
|
||||||
'http://espn.go.com/video/clip?id=%s' % video_id, ESPNIE.ie_key())
|
|
||||||
|
@ -15,7 +15,7 @@ from ..utils import (
|
|||||||
class ExpressenIE(InfoExtractor):
|
class ExpressenIE(InfoExtractor):
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://
|
https?://
|
||||||
(?:www\.)?expressen\.se/
|
(?:www\.)?(?:expressen|di)\.se/
|
||||||
(?:(?:tvspelare/video|videoplayer/embed)/)?
|
(?:(?:tvspelare/video|videoplayer/embed)/)?
|
||||||
tv/(?:[^/]+/)*
|
tv/(?:[^/]+/)*
|
||||||
(?P<id>[^/?#&]+)
|
(?P<id>[^/?#&]+)
|
||||||
@ -42,13 +42,16 @@ class ExpressenIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'https://www.expressen.se/videoplayer/embed/tv/ditv/ekonomistudion/experterna-har-ar-fragorna-som-avgor-valet/?embed=true&external=true&autoplay=true&startVolume=0&partnerId=di',
|
'url': 'https://www.expressen.se/videoplayer/embed/tv/ditv/ekonomistudion/experterna-har-ar-fragorna-som-avgor-valet/?embed=true&external=true&autoplay=true&startVolume=0&partnerId=di',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.di.se/videoplayer/embed/tv/ditv/borsmorgon/implantica-rusar-70--under-borspremiaren-hor-styrelsemedlemmen/?embed=true&external=true&autoplay=true&startVolume=0&partnerId=di',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_urls(webpage):
|
def _extract_urls(webpage):
|
||||||
return [
|
return [
|
||||||
mobj.group('url') for mobj in re.finditer(
|
mobj.group('url') for mobj in re.finditer(
|
||||||
r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?expressen\.se/(?:tvspelare/video|videoplayer/embed)/tv/.+?)\1',
|
r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?(?:expressen|di)\.se/(?:tvspelare/video|videoplayer/embed)/tv/.+?)\1',
|
||||||
webpage)]
|
webpage)]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -18,10 +18,10 @@ from .acast import (
|
|||||||
ACastIE,
|
ACastIE,
|
||||||
ACastChannelIE,
|
ACastChannelIE,
|
||||||
)
|
)
|
||||||
from .addanime import AddAnimeIE
|
|
||||||
from .adn import ADNIE
|
from .adn import ADNIE
|
||||||
from .adobeconnect import AdobeConnectIE
|
from .adobeconnect import AdobeConnectIE
|
||||||
from .adobetv import (
|
from .adobetv import (
|
||||||
|
AdobeTVEmbedIE,
|
||||||
AdobeTVIE,
|
AdobeTVIE,
|
||||||
AdobeTVShowIE,
|
AdobeTVShowIE,
|
||||||
AdobeTVChannelIE,
|
AdobeTVChannelIE,
|
||||||
@ -80,7 +80,6 @@ from .awaan import (
|
|||||||
)
|
)
|
||||||
from .azmedien import AZMedienIE
|
from .azmedien import AZMedienIE
|
||||||
from .baidu import BaiduVideoIE
|
from .baidu import BaiduVideoIE
|
||||||
from .bambuser import BambuserIE, BambuserChannelIE
|
|
||||||
from .bandcamp import BandcampIE, BandcampAlbumIE, BandcampWeeklyIE
|
from .bandcamp import BandcampIE, BandcampAlbumIE, BandcampWeeklyIE
|
||||||
from .bbc import (
|
from .bbc import (
|
||||||
BBCCoUkIE,
|
BBCCoUkIE,
|
||||||
@ -104,6 +103,9 @@ from .bild import BildIE
|
|||||||
from .bilibili import (
|
from .bilibili import (
|
||||||
BiliBiliIE,
|
BiliBiliIE,
|
||||||
BiliBiliBangumiIE,
|
BiliBiliBangumiIE,
|
||||||
|
BilibiliAudioIE,
|
||||||
|
BilibiliAudioAlbumIE,
|
||||||
|
BiliBiliPlayerIE,
|
||||||
)
|
)
|
||||||
from .biobiochiletv import BioBioChileTVIE
|
from .biobiochiletv import BioBioChileTVIE
|
||||||
from .bitchute import (
|
from .bitchute import (
|
||||||
@ -222,13 +224,13 @@ from .comedycentral import (
|
|||||||
ComedyCentralTVIE,
|
ComedyCentralTVIE,
|
||||||
ToshIE,
|
ToshIE,
|
||||||
)
|
)
|
||||||
from .comcarcoff import ComCarCoffIE
|
|
||||||
from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
|
from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
|
||||||
from .commonprotocols import (
|
from .commonprotocols import (
|
||||||
MmsIE,
|
MmsIE,
|
||||||
RtmpIE,
|
RtmpIE,
|
||||||
)
|
)
|
||||||
from .condenast import CondeNastIE
|
from .condenast import CondeNastIE
|
||||||
|
from .contv import CONtvIE
|
||||||
from .corus import CorusIE
|
from .corus import CorusIE
|
||||||
from .cracked import CrackedIE
|
from .cracked import CrackedIE
|
||||||
from .crackle import CrackleIE
|
from .crackle import CrackleIE
|
||||||
@ -252,10 +254,6 @@ from .dailymotion import (
|
|||||||
DailymotionPlaylistIE,
|
DailymotionPlaylistIE,
|
||||||
DailymotionUserIE,
|
DailymotionUserIE,
|
||||||
)
|
)
|
||||||
from .daisuki import (
|
|
||||||
DaisukiMottoIE,
|
|
||||||
DaisukiMottoPlaylistIE,
|
|
||||||
)
|
|
||||||
from .daum import (
|
from .daum import (
|
||||||
DaumIE,
|
DaumIE,
|
||||||
DaumClipIE,
|
DaumClipIE,
|
||||||
@ -274,10 +272,7 @@ from .douyutv import (
|
|||||||
DouyuShowIE,
|
DouyuShowIE,
|
||||||
DouyuTVIE,
|
DouyuTVIE,
|
||||||
)
|
)
|
||||||
from .dplay import (
|
from .dplay import DPlayIE
|
||||||
DPlayIE,
|
|
||||||
DPlayItIE,
|
|
||||||
)
|
|
||||||
from .dreisat import DreiSatIE
|
from .dreisat import DreiSatIE
|
||||||
from .drbonanza import DRBonanzaIE
|
from .drbonanza import DRBonanzaIE
|
||||||
from .drtuber import DrTuberIE
|
from .drtuber import DrTuberIE
|
||||||
@ -356,7 +351,6 @@ from .firsttv import FirstTVIE
|
|||||||
from .fivemin import FiveMinIE
|
from .fivemin import FiveMinIE
|
||||||
from .fivetv import FiveTVIE
|
from .fivetv import FiveTVIE
|
||||||
from .flickr import FlickrIE
|
from .flickr import FlickrIE
|
||||||
from .flipagram import FlipagramIE
|
|
||||||
from .folketinget import FolketingetIE
|
from .folketinget import FolketingetIE
|
||||||
from .footyroom import FootyRoomIE
|
from .footyroom import FootyRoomIE
|
||||||
from .formula1 import Formula1IE
|
from .formula1 import Formula1IE
|
||||||
@ -367,7 +361,10 @@ from .fourtube import (
|
|||||||
FuxIE,
|
FuxIE,
|
||||||
)
|
)
|
||||||
from .fox import FOXIE
|
from .fox import FOXIE
|
||||||
from .fox9 import FOX9IE
|
from .fox9 import (
|
||||||
|
FOX9IE,
|
||||||
|
FOX9NewsIE,
|
||||||
|
)
|
||||||
from .foxgay import FoxgayIE
|
from .foxgay import FoxgayIE
|
||||||
from .foxnews import (
|
from .foxnews import (
|
||||||
FoxNewsIE,
|
FoxNewsIE,
|
||||||
@ -400,10 +397,6 @@ from .fusion import FusionIE
|
|||||||
from .fxnetworks import FXNetworksIE
|
from .fxnetworks import FXNetworksIE
|
||||||
from .gaia import GaiaIE
|
from .gaia import GaiaIE
|
||||||
from .gameinformer import GameInformerIE
|
from .gameinformer import GameInformerIE
|
||||||
from .gameone import (
|
|
||||||
GameOneIE,
|
|
||||||
GameOnePlaylistIE,
|
|
||||||
)
|
|
||||||
from .gamespot import GameSpotIE
|
from .gamespot import GameSpotIE
|
||||||
from .gamestar import GameStarIE
|
from .gamestar import GameStarIE
|
||||||
from .gaskrank import GaskrankIE
|
from .gaskrank import GaskrankIE
|
||||||
@ -419,7 +412,6 @@ from .globo import (
|
|||||||
GloboArticleIE,
|
GloboArticleIE,
|
||||||
)
|
)
|
||||||
from .go import GoIE
|
from .go import GoIE
|
||||||
from .go90 import Go90IE
|
|
||||||
from .godtube import GodTubeIE
|
from .godtube import GodTubeIE
|
||||||
from .golem import GolemIE
|
from .golem import GolemIE
|
||||||
from .googledrive import GoogleDriveIE
|
from .googledrive import GoogleDriveIE
|
||||||
@ -428,7 +420,6 @@ from .googlesearch import GoogleSearchIE
|
|||||||
from .goshgay import GoshgayIE
|
from .goshgay import GoshgayIE
|
||||||
from .gputechconf import GPUTechConfIE
|
from .gputechconf import GPUTechConfIE
|
||||||
from .groupon import GrouponIE
|
from .groupon import GrouponIE
|
||||||
from .hark import HarkIE
|
|
||||||
from .hbo import HBOIE
|
from .hbo import HBOIE
|
||||||
from .hearthisat import HearThisAtIE
|
from .hearthisat import HearThisAtIE
|
||||||
from .heise import HeiseIE
|
from .heise import HeiseIE
|
||||||
@ -460,7 +451,6 @@ from .hungama import (
|
|||||||
HungamaSongIE,
|
HungamaSongIE,
|
||||||
)
|
)
|
||||||
from .hypem import HypemIE
|
from .hypem import HypemIE
|
||||||
from .iconosquare import IconosquareIE
|
|
||||||
from .ign import (
|
from .ign import (
|
||||||
IGNIE,
|
IGNIE,
|
||||||
OneUPIE,
|
OneUPIE,
|
||||||
@ -508,7 +498,6 @@ from .jeuxvideo import JeuxVideoIE
|
|||||||
from .jove import JoveIE
|
from .jove import JoveIE
|
||||||
from .joj import JojIE
|
from .joj import JojIE
|
||||||
from .jwplatform import JWPlatformIE
|
from .jwplatform import JWPlatformIE
|
||||||
from .jpopsukitv import JpopsukiIE
|
|
||||||
from .kakao import KakaoIE
|
from .kakao import KakaoIE
|
||||||
from .kaltura import KalturaIE
|
from .kaltura import KalturaIE
|
||||||
from .kanalplay import KanalPlayIE
|
from .kanalplay import KanalPlayIE
|
||||||
@ -519,10 +508,9 @@ from .keezmovies import KeezMoviesIE
|
|||||||
from .ketnet import KetnetIE
|
from .ketnet import KetnetIE
|
||||||
from .khanacademy import KhanAcademyIE
|
from .khanacademy import KhanAcademyIE
|
||||||
from .kickstarter import KickStarterIE
|
from .kickstarter import KickStarterIE
|
||||||
|
from .kinja import KinjaEmbedIE
|
||||||
from .kinopoisk import KinoPoiskIE
|
from .kinopoisk import KinoPoiskIE
|
||||||
from .keek import KeekIE
|
|
||||||
from .konserthusetplay import KonserthusetPlayIE
|
from .konserthusetplay import KonserthusetPlayIE
|
||||||
from .kontrtube import KontrTubeIE
|
|
||||||
from .krasview import KrasViewIE
|
from .krasview import KrasViewIE
|
||||||
from .ku6 import Ku6IE
|
from .ku6 import Ku6IE
|
||||||
from .kusi import KUSIIE
|
from .kusi import KUSIIE
|
||||||
@ -546,7 +534,6 @@ from .lcp import (
|
|||||||
LcpPlayIE,
|
LcpPlayIE,
|
||||||
LcpIE,
|
LcpIE,
|
||||||
)
|
)
|
||||||
from .learnr import LearnrIE
|
|
||||||
from .lecture2go import Lecture2GoIE
|
from .lecture2go import Lecture2GoIE
|
||||||
from .lecturio import (
|
from .lecturio import (
|
||||||
LecturioIE,
|
LecturioIE,
|
||||||
@ -579,6 +566,7 @@ from .linkedin import (
|
|||||||
)
|
)
|
||||||
from .linuxacademy import LinuxAcademyIE
|
from .linuxacademy import LinuxAcademyIE
|
||||||
from .litv import LiTVIE
|
from .litv import LiTVIE
|
||||||
|
from .livejournal import LiveJournalIE
|
||||||
from .liveleak import (
|
from .liveleak import (
|
||||||
LiveLeakIE,
|
LiveLeakIE,
|
||||||
LiveLeakEmbedIE,
|
LiveLeakEmbedIE,
|
||||||
@ -597,13 +585,11 @@ from .lynda import (
|
|||||||
LyndaCourseIE
|
LyndaCourseIE
|
||||||
)
|
)
|
||||||
from .m6 import M6IE
|
from .m6 import M6IE
|
||||||
from .macgamestore import MacGameStoreIE
|
|
||||||
from .mailru import (
|
from .mailru import (
|
||||||
MailRuIE,
|
MailRuIE,
|
||||||
MailRuMusicIE,
|
MailRuMusicIE,
|
||||||
MailRuMusicSearchIE,
|
MailRuMusicSearchIE,
|
||||||
)
|
)
|
||||||
from .makertv import MakerTVIE
|
|
||||||
from .malltv import MallTVIE
|
from .malltv import MallTVIE
|
||||||
from .mangomolo import (
|
from .mangomolo import (
|
||||||
MangomoloVideoIE,
|
MangomoloVideoIE,
|
||||||
@ -637,22 +623,23 @@ from .microsoftvirtualacademy import (
|
|||||||
MicrosoftVirtualAcademyIE,
|
MicrosoftVirtualAcademyIE,
|
||||||
MicrosoftVirtualAcademyCourseIE,
|
MicrosoftVirtualAcademyCourseIE,
|
||||||
)
|
)
|
||||||
from .minhateca import MinhatecaIE
|
|
||||||
from .ministrygrid import MinistryGridIE
|
from .ministrygrid import MinistryGridIE
|
||||||
from .minoto import MinotoIE
|
from .minoto import MinotoIE
|
||||||
from .miomio import MioMioIE
|
from .miomio import MioMioIE
|
||||||
from .mit import TechTVMITIE, MITIE, OCWMITIE
|
from .mit import TechTVMITIE, OCWMITIE
|
||||||
from .mitele import MiTeleIE
|
from .mitele import MiTeleIE
|
||||||
from .mixcloud import (
|
from .mixcloud import (
|
||||||
MixcloudIE,
|
MixcloudIE,
|
||||||
MixcloudUserIE,
|
MixcloudUserIE,
|
||||||
MixcloudPlaylistIE,
|
MixcloudPlaylistIE,
|
||||||
MixcloudStreamIE,
|
|
||||||
)
|
)
|
||||||
from .mlb import MLBIE
|
from .mlb import MLBIE
|
||||||
from .mnet import MnetIE
|
from .mnet import MnetIE
|
||||||
from .moevideo import MoeVideoIE
|
from .moevideo import MoeVideoIE
|
||||||
from .mofosex import MofosexIE
|
from .mofosex import (
|
||||||
|
MofosexIE,
|
||||||
|
MofosexEmbedIE,
|
||||||
|
)
|
||||||
from .mojvideo import MojvideoIE
|
from .mojvideo import MojvideoIE
|
||||||
from .morningstar import MorningstarIE
|
from .morningstar import MorningstarIE
|
||||||
from .motherless import (
|
from .motherless import (
|
||||||
@ -669,10 +656,9 @@ from .mtv import (
|
|||||||
MTVVideoIE,
|
MTVVideoIE,
|
||||||
MTVServicesEmbeddedIE,
|
MTVServicesEmbeddedIE,
|
||||||
MTVDEIE,
|
MTVDEIE,
|
||||||
MTV81IE,
|
MTVJapanIE,
|
||||||
)
|
)
|
||||||
from .muenchentv import MuenchenTVIE
|
from .muenchentv import MuenchenTVIE
|
||||||
from .musicplayon import MusicPlayOnIE
|
|
||||||
from .mwave import MwaveIE, MwaveMeetGreetIE
|
from .mwave import MwaveIE, MwaveMeetGreetIE
|
||||||
from .mychannels import MyChannelsIE
|
from .mychannels import MyChannelsIE
|
||||||
from .myspace import MySpaceIE, MySpaceAlbumIE
|
from .myspace import MySpaceIE, MySpaceAlbumIE
|
||||||
@ -812,16 +798,22 @@ from .ooyala import (
|
|||||||
OoyalaIE,
|
OoyalaIE,
|
||||||
OoyalaExternalIE,
|
OoyalaExternalIE,
|
||||||
)
|
)
|
||||||
from .openload import (
|
|
||||||
OpenloadIE,
|
|
||||||
VerystreamIE,
|
|
||||||
)
|
|
||||||
from .ora import OraTVIE
|
from .ora import OraTVIE
|
||||||
from .orf import (
|
from .orf import (
|
||||||
ORFTVthekIE,
|
ORFTVthekIE,
|
||||||
ORFFM4IE,
|
ORFFM4IE,
|
||||||
ORFFM4StoryIE,
|
ORFFM4StoryIE,
|
||||||
ORFOE1IE,
|
ORFOE1IE,
|
||||||
|
ORFOE3IE,
|
||||||
|
ORFNOEIE,
|
||||||
|
ORFWIEIE,
|
||||||
|
ORFBGLIE,
|
||||||
|
ORFOOEIE,
|
||||||
|
ORFSTMIE,
|
||||||
|
ORFKTNIE,
|
||||||
|
ORFSBGIE,
|
||||||
|
ORFTIRIE,
|
||||||
|
ORFVBGIE,
|
||||||
ORFIPTVIE,
|
ORFIPTVIE,
|
||||||
)
|
)
|
||||||
from .outsidetv import OutsideTVIE
|
from .outsidetv import OutsideTVIE
|
||||||
@ -829,7 +821,6 @@ from .packtpub import (
|
|||||||
PacktPubIE,
|
PacktPubIE,
|
||||||
PacktPubCourseIE,
|
PacktPubCourseIE,
|
||||||
)
|
)
|
||||||
from .pandatv import PandaTVIE
|
|
||||||
from .pandoratv import PandoraTVIE
|
from .pandoratv import PandoraTVIE
|
||||||
from .parliamentliveuk import ParliamentLiveUKIE
|
from .parliamentliveuk import ParliamentLiveUKIE
|
||||||
from .patreon import PatreonIE
|
from .patreon import PatreonIE
|
||||||
@ -872,6 +863,7 @@ from .polskieradio import (
|
|||||||
PolskieRadioIE,
|
PolskieRadioIE,
|
||||||
PolskieRadioCategoryIE,
|
PolskieRadioCategoryIE,
|
||||||
)
|
)
|
||||||
|
from .popcorntimes import PopcorntimesIE
|
||||||
from .popcorntv import PopcornTVIE
|
from .popcorntv import PopcornTVIE
|
||||||
from .porn91 import Porn91IE
|
from .porn91 import Porn91IE
|
||||||
from .porncom import PornComIE
|
from .porncom import PornComIE
|
||||||
@ -890,7 +882,6 @@ from .puhutv import (
|
|||||||
PuhuTVSerieIE,
|
PuhuTVSerieIE,
|
||||||
)
|
)
|
||||||
from .presstv import PressTVIE
|
from .presstv import PressTVIE
|
||||||
from .promptfile import PromptFileIE
|
|
||||||
from .prosiebensat1 import ProSiebenSat1IE
|
from .prosiebensat1 import ProSiebenSat1IE
|
||||||
from .puls4 import Puls4IE
|
from .puls4 import Puls4IE
|
||||||
from .pyvideo import PyvideoIE
|
from .pyvideo import PyvideoIE
|
||||||
@ -927,7 +918,9 @@ from .rbmaradio import RBMARadioIE
|
|||||||
from .rds import RDSIE
|
from .rds import RDSIE
|
||||||
from .redbulltv import (
|
from .redbulltv import (
|
||||||
RedBullTVIE,
|
RedBullTVIE,
|
||||||
|
RedBullEmbedIE,
|
||||||
RedBullTVRrnContentIE,
|
RedBullTVRrnContentIE,
|
||||||
|
RedBullIE,
|
||||||
)
|
)
|
||||||
from .reddit import (
|
from .reddit import (
|
||||||
RedditIE,
|
RedditIE,
|
||||||
@ -942,10 +935,6 @@ from .rentv import (
|
|||||||
from .restudy import RestudyIE
|
from .restudy import RestudyIE
|
||||||
from .reuters import ReutersIE
|
from .reuters import ReutersIE
|
||||||
from .reverbnation import ReverbNationIE
|
from .reverbnation import ReverbNationIE
|
||||||
from .revision3 import (
|
|
||||||
Revision3EmbedIE,
|
|
||||||
Revision3IE,
|
|
||||||
)
|
|
||||||
from .rice import RICEIE
|
from .rice import RICEIE
|
||||||
from .rmcdecouverte import RMCDecouverteIE
|
from .rmcdecouverte import RMCDecouverteIE
|
||||||
from .ro220 import Ro220IE
|
from .ro220 import Ro220IE
|
||||||
@ -967,7 +956,6 @@ from .rts import RTSIE
|
|||||||
from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE, RTVELiveIE, RTVETelevisionIE
|
from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE, RTVELiveIE, RTVETelevisionIE
|
||||||
from .rtvnh import RTVNHIE
|
from .rtvnh import RTVNHIE
|
||||||
from .rtvs import RTVSIE
|
from .rtvs import RTVSIE
|
||||||
from .rudo import RudoIE
|
|
||||||
from .ruhd import RUHDIE
|
from .ruhd import RUHDIE
|
||||||
from .rutube import (
|
from .rutube import (
|
||||||
RutubeIE,
|
RutubeIE,
|
||||||
@ -990,11 +978,17 @@ from .savefrom import SaveFromIE
|
|||||||
from .sbs import SBSIE
|
from .sbs import SBSIE
|
||||||
from .screencast import ScreencastIE
|
from .screencast import ScreencastIE
|
||||||
from .screencastomatic import ScreencastOMaticIE
|
from .screencastomatic import ScreencastOMaticIE
|
||||||
from .scrippsnetworks import ScrippsNetworksWatchIE
|
from .scrippsnetworks import (
|
||||||
|
ScrippsNetworksWatchIE,
|
||||||
|
ScrippsNetworksIE,
|
||||||
|
)
|
||||||
|
from .scte import (
|
||||||
|
SCTEIE,
|
||||||
|
SCTECourseIE,
|
||||||
|
)
|
||||||
from .seeker import SeekerIE
|
from .seeker import SeekerIE
|
||||||
from .senateisvp import SenateISVPIE
|
from .senateisvp import SenateISVPIE
|
||||||
from .sendtonews import SendtoNewsIE
|
from .sendtonews import SendtoNewsIE
|
||||||
from .servingsys import ServingSysIE
|
|
||||||
from .servus import ServusIE
|
from .servus import ServusIE
|
||||||
from .sevenplus import SevenPlusIE
|
from .sevenplus import SevenPlusIE
|
||||||
from .sexu import SexuIE
|
from .sexu import SexuIE
|
||||||
@ -1035,6 +1029,7 @@ from .snotr import SnotrIE
|
|||||||
from .sohu import SohuIE
|
from .sohu import SohuIE
|
||||||
from .sonyliv import SonyLIVIE
|
from .sonyliv import SonyLIVIE
|
||||||
from .soundcloud import (
|
from .soundcloud import (
|
||||||
|
SoundcloudEmbedIE,
|
||||||
SoundcloudIE,
|
SoundcloudIE,
|
||||||
SoundcloudSetIE,
|
SoundcloudSetIE,
|
||||||
SoundcloudUserIE,
|
SoundcloudUserIE,
|
||||||
@ -1078,7 +1073,6 @@ from .srmediathek import SRMediathekIE
|
|||||||
from .stanfordoc import StanfordOpenClassroomIE
|
from .stanfordoc import StanfordOpenClassroomIE
|
||||||
from .steam import SteamIE
|
from .steam import SteamIE
|
||||||
from .streamable import StreamableIE
|
from .streamable import StreamableIE
|
||||||
from .streamango import StreamangoIE
|
|
||||||
from .streamcloud import StreamcloudIE
|
from .streamcloud import StreamcloudIE
|
||||||
from .streamcz import StreamCZIE
|
from .streamcz import StreamCZIE
|
||||||
from .streetvoice import StreetVoiceIE
|
from .streetvoice import StreetVoiceIE
|
||||||
@ -1127,12 +1121,14 @@ from .telegraaf import TelegraafIE
|
|||||||
from .telemb import TeleMBIE
|
from .telemb import TeleMBIE
|
||||||
from .telequebec import (
|
from .telequebec import (
|
||||||
TeleQuebecIE,
|
TeleQuebecIE,
|
||||||
|
TeleQuebecSquatIE,
|
||||||
TeleQuebecEmissionIE,
|
TeleQuebecEmissionIE,
|
||||||
TeleQuebecLiveIE,
|
TeleQuebecLiveIE,
|
||||||
)
|
)
|
||||||
from .teletask import TeleTaskIE
|
from .teletask import TeleTaskIE
|
||||||
from .telewebion import TelewebionIE
|
from .telewebion import TelewebionIE
|
||||||
from .tennistv import TennisTVIE
|
from .tennistv import TennisTVIE
|
||||||
|
from .tenplay import TenPlayIE
|
||||||
from .testurl import TestURLIE
|
from .testurl import TestURLIE
|
||||||
from .tf1 import TF1IE
|
from .tf1 import TF1IE
|
||||||
from .tfo import TFOIE
|
from .tfo import TFOIE
|
||||||
@ -1185,10 +1181,14 @@ from .tunein import (
|
|||||||
)
|
)
|
||||||
from .tunepk import TunePkIE
|
from .tunepk import TunePkIE
|
||||||
from .turbo import TurboIE
|
from .turbo import TurboIE
|
||||||
from .tutv import TutvIE
|
|
||||||
from .tv2 import (
|
from .tv2 import (
|
||||||
TV2IE,
|
TV2IE,
|
||||||
TV2ArticleIE,
|
TV2ArticleIE,
|
||||||
|
KatsomoIE,
|
||||||
|
)
|
||||||
|
from .tv2dk import (
|
||||||
|
TV2DKIE,
|
||||||
|
TV2DKBornholmPlayIE,
|
||||||
)
|
)
|
||||||
from .tv2hu import TV2HuIE
|
from .tv2hu import TV2HuIE
|
||||||
from .tv4 import TV4IE
|
from .tv4 import TV4IE
|
||||||
@ -1231,14 +1231,11 @@ from .twentymin import TwentyMinutenIE
|
|||||||
from .twentythreevideo import TwentyThreeVideoIE
|
from .twentythreevideo import TwentyThreeVideoIE
|
||||||
from .twitcasting import TwitCastingIE
|
from .twitcasting import TwitCastingIE
|
||||||
from .twitch import (
|
from .twitch import (
|
||||||
TwitchVideoIE,
|
|
||||||
TwitchChapterIE,
|
|
||||||
TwitchVodIE,
|
TwitchVodIE,
|
||||||
TwitchProfileIE,
|
TwitchCollectionIE,
|
||||||
TwitchAllVideosIE,
|
TwitchVideosIE,
|
||||||
TwitchUploadsIE,
|
TwitchVideosClipsIE,
|
||||||
TwitchPastBroadcastsIE,
|
TwitchVideosCollectionsIE,
|
||||||
TwitchHighlightsIE,
|
|
||||||
TwitchStreamIE,
|
TwitchStreamIE,
|
||||||
TwitchClipsIE,
|
TwitchClipsIE,
|
||||||
)
|
)
|
||||||
@ -1246,15 +1243,23 @@ from .twitter import (
|
|||||||
TwitterCardIE,
|
TwitterCardIE,
|
||||||
TwitterIE,
|
TwitterIE,
|
||||||
TwitterAmplifyIE,
|
TwitterAmplifyIE,
|
||||||
|
TwitterBroadcastIE,
|
||||||
)
|
)
|
||||||
from .udemy import (
|
from .udemy import (
|
||||||
UdemyIE,
|
UdemyIE,
|
||||||
UdemyCourseIE
|
UdemyCourseIE
|
||||||
)
|
)
|
||||||
from .udn import UDNEmbedIE
|
from .udn import UDNEmbedIE
|
||||||
from .ufctv import UFCTVIE
|
from .ufctv import (
|
||||||
|
UFCTVIE,
|
||||||
|
UFCArabiaIE,
|
||||||
|
)
|
||||||
from .uktvplay import UKTVPlayIE
|
from .uktvplay import UKTVPlayIE
|
||||||
from .digiteka import DigitekaIE
|
from .digiteka import DigitekaIE
|
||||||
|
from .dlive import (
|
||||||
|
DLiveVODIE,
|
||||||
|
DLiveStreamIE,
|
||||||
|
)
|
||||||
from .umg import UMGDeIE
|
from .umg import UMGDeIE
|
||||||
from .unistra import UnistraIE
|
from .unistra import UnistraIE
|
||||||
from .unity import UnityIE
|
from .unity import UnityIE
|
||||||
@ -1276,7 +1281,6 @@ from .varzesh3 import Varzesh3IE
|
|||||||
from .vbox7 import Vbox7IE
|
from .vbox7 import Vbox7IE
|
||||||
from .veehd import VeeHDIE
|
from .veehd import VeeHDIE
|
||||||
from .veoh import VeohIE
|
from .veoh import VeohIE
|
||||||
from .vessel import VesselIE
|
|
||||||
from .vesti import VestiIE
|
from .vesti import VestiIE
|
||||||
from .vevo import (
|
from .vevo import (
|
||||||
VevoIE,
|
VevoIE,
|
||||||
@ -1303,7 +1307,6 @@ from .videomore import (
|
|||||||
VideomoreVideoIE,
|
VideomoreVideoIE,
|
||||||
VideomoreSeasonIE,
|
VideomoreSeasonIE,
|
||||||
)
|
)
|
||||||
from .videopremium import VideoPremiumIE
|
|
||||||
from .videopress import VideoPressIE
|
from .videopress import VideoPressIE
|
||||||
from .vidio import VidioIE
|
from .vidio import VidioIE
|
||||||
from .vidlii import VidLiiIE
|
from .vidlii import VidLiiIE
|
||||||
@ -1318,7 +1321,6 @@ from .viewlift import (
|
|||||||
ViewLiftIE,
|
ViewLiftIE,
|
||||||
ViewLiftEmbedIE,
|
ViewLiftEmbedIE,
|
||||||
)
|
)
|
||||||
from .viewster import ViewsterIE
|
|
||||||
from .viidea import ViideaIE
|
from .viidea import ViideaIE
|
||||||
from .vimeo import (
|
from .vimeo import (
|
||||||
VimeoIE,
|
VimeoIE,
|
||||||
@ -1407,7 +1409,6 @@ from .weibo import (
|
|||||||
WeiboMobileIE
|
WeiboMobileIE
|
||||||
)
|
)
|
||||||
from .weiqitv import WeiqiTVIE
|
from .weiqitv import WeiqiTVIE
|
||||||
from .wimp import WimpIE
|
|
||||||
from .wistia import WistiaIE
|
from .wistia import WistiaIE
|
||||||
from .worldstarhiphop import WorldStarHipHopIE
|
from .worldstarhiphop import WorldStarHipHopIE
|
||||||
from .wsj import (
|
from .wsj import (
|
||||||
@ -1421,6 +1422,7 @@ from .xfileshare import XFileShareIE
|
|||||||
from .xhamster import (
|
from .xhamster import (
|
||||||
XHamsterIE,
|
XHamsterIE,
|
||||||
XHamsterEmbedIE,
|
XHamsterEmbedIE,
|
||||||
|
XHamsterUserIE,
|
||||||
)
|
)
|
||||||
from .xiami import (
|
from .xiami import (
|
||||||
XiamiSongIE,
|
XiamiSongIE,
|
||||||
@ -1444,6 +1446,7 @@ from .yahoo import (
|
|||||||
YahooSearchIE,
|
YahooSearchIE,
|
||||||
YahooGyaOPlayerIE,
|
YahooGyaOPlayerIE,
|
||||||
YahooGyaOIE,
|
YahooGyaOIE,
|
||||||
|
YahooJapanNewsIE,
|
||||||
)
|
)
|
||||||
from .yandexdisk import YandexDiskIE
|
from .yandexdisk import YandexDiskIE
|
||||||
from .yandexmusic import (
|
from .yandexmusic import (
|
||||||
|
@ -334,7 +334,7 @@ class FacebookIE(InfoExtractor):
|
|||||||
if not video_data:
|
if not video_data:
|
||||||
server_js_data = self._parse_json(
|
server_js_data = self._parse_json(
|
||||||
self._search_regex(
|
self._search_regex(
|
||||||
r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+(?:stream_pagelet|pagelet_group_mall|permalink_video_pagelet)',
|
r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+(?:pagelet_group_mall|permalink_video_pagelet|hyperfeed_story_id_\d+)',
|
||||||
webpage, 'js data', default='{}'),
|
webpage, 'js data', default='{}'),
|
||||||
video_id, transform_source=js_to_json, fatal=False)
|
video_id, transform_source=js_to_json, fatal=False)
|
||||||
video_data = extract_from_jsmods_instances(server_js_data)
|
video_data = extract_from_jsmods_instances(server_js_data)
|
||||||
@ -379,6 +379,7 @@ class FacebookIE(InfoExtractor):
|
|||||||
if not video_data:
|
if not video_data:
|
||||||
raise ExtractorError('Cannot parse data')
|
raise ExtractorError('Cannot parse data')
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
formats = []
|
formats = []
|
||||||
for f in video_data:
|
for f in video_data:
|
||||||
format_id = f['stream_type']
|
format_id = f['stream_type']
|
||||||
@ -402,9 +403,17 @@ class FacebookIE(InfoExtractor):
|
|||||||
if dash_manifest:
|
if dash_manifest:
|
||||||
formats.extend(self._parse_mpd_formats(
|
formats.extend(self._parse_mpd_formats(
|
||||||
compat_etree_fromstring(compat_urllib_parse_unquote_plus(dash_manifest))))
|
compat_etree_fromstring(compat_urllib_parse_unquote_plus(dash_manifest))))
|
||||||
|
subtitles_src = f[0].get('subtitles_src')
|
||||||
|
if subtitles_src:
|
||||||
|
subtitles.setdefault('en', []).append({'url': subtitles_src})
|
||||||
if not formats:
|
if not formats:
|
||||||
raise ExtractorError('Cannot find video formats')
|
raise ExtractorError('Cannot find video formats')
|
||||||
|
|
||||||
|
# Downloads with browser's User-Agent are rate limited. Working around
|
||||||
|
# with non-browser User-Agent.
|
||||||
|
for f in formats:
|
||||||
|
f.setdefault('http_headers', {})['User-Agent'] = 'facebookexternalhit/1.1'
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
video_title = self._html_search_regex(
|
video_title = self._html_search_regex(
|
||||||
@ -428,7 +437,7 @@ class FacebookIE(InfoExtractor):
|
|||||||
timestamp = int_or_none(self._search_regex(
|
timestamp = int_or_none(self._search_regex(
|
||||||
r'<abbr[^>]+data-utime=["\'](\d+)', webpage,
|
r'<abbr[^>]+data-utime=["\'](\d+)', webpage,
|
||||||
'timestamp', default=None))
|
'timestamp', default=None))
|
||||||
thumbnail = self._og_search_thumbnail(webpage)
|
thumbnail = self._html_search_meta(['og:image', 'twitter:image'], webpage)
|
||||||
|
|
||||||
view_count = parse_count(self._search_regex(
|
view_count = parse_count(self._search_regex(
|
||||||
r'\bviewCount\s*:\s*["\']([\d,.]+)', webpage, 'view count',
|
r'\bviewCount\s*:\s*["\']([\d,.]+)', webpage, 'view count',
|
||||||
@ -442,6 +451,7 @@ class FacebookIE(InfoExtractor):
|
|||||||
'timestamp': timestamp,
|
'timestamp': timestamp,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'view_count': view_count,
|
'view_count': view_count,
|
||||||
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
|
||||||
return webpage, info_dict
|
return webpage, info_dict
|
||||||
@ -456,15 +466,18 @@ class FacebookIE(InfoExtractor):
|
|||||||
return info_dict
|
return info_dict
|
||||||
|
|
||||||
if '/posts/' in url:
|
if '/posts/' in url:
|
||||||
entries = [
|
video_id_json = self._search_regex(
|
||||||
self.url_result('facebook:%s' % vid, FacebookIE.ie_key())
|
r'(["\'])video_ids\1\s*:\s*(?P<ids>\[.+?\])', webpage, 'video ids', group='ids',
|
||||||
for vid in self._parse_json(
|
default='')
|
||||||
self._search_regex(
|
if video_id_json:
|
||||||
r'(["\'])video_ids\1\s*:\s*(?P<ids>\[.+?\])',
|
entries = [
|
||||||
webpage, 'video ids', group='ids'),
|
self.url_result('facebook:%s' % vid, FacebookIE.ie_key())
|
||||||
video_id)]
|
for vid in self._parse_json(video_id_json, video_id)]
|
||||||
|
return self.playlist_result(entries, video_id)
|
||||||
|
|
||||||
return self.playlist_result(entries, video_id)
|
# Single Video?
|
||||||
|
video_id = self._search_regex(r'video_id:\s*"([0-9]+)"', webpage, 'single video id')
|
||||||
|
return self.url_result('facebook:%s' % video_id, FacebookIE.ie_key())
|
||||||
else:
|
else:
|
||||||
_, info_dict = self._extract_from_url(
|
_, info_dict = self._extract_from_url(
|
||||||
self._VIDEO_PAGE_TEMPLATE % video_id,
|
self._VIDEO_PAGE_TEMPLATE % video_id,
|
||||||
|
@ -9,7 +9,7 @@ from ..utils import int_or_none
|
|||||||
|
|
||||||
class FiveTVIE(InfoExtractor):
|
class FiveTVIE(InfoExtractor):
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
http://
|
https?://
|
||||||
(?:www\.)?5-tv\.ru/
|
(?:www\.)?5-tv\.ru/
|
||||||
(?:
|
(?:
|
||||||
(?:[^/]+/)+(?P<id>\d+)|
|
(?:[^/]+/)+(?P<id>\d+)|
|
||||||
@ -39,6 +39,7 @@ class FiveTVIE(InfoExtractor):
|
|||||||
'duration': 180,
|
'duration': 180,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
|
# redirect to https://www.5-tv.ru/projects/1000095/izvestia-glavnoe/
|
||||||
'url': 'http://www.5-tv.ru/glavnoe/#itemDetails',
|
'url': 'http://www.5-tv.ru/glavnoe/#itemDetails',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'glavnoe',
|
'id': 'glavnoe',
|
||||||
@ -46,6 +47,7 @@ class FiveTVIE(InfoExtractor):
|
|||||||
'title': r're:^Итоги недели с \d+ по \d+ \w+ \d{4} года$',
|
'title': r're:^Итоги недели с \d+ по \d+ \w+ \d{4} года$',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
},
|
},
|
||||||
|
'skip': 'redirect to «Известия. Главное» project page',
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.5-tv.ru/glavnoe/broadcasts/508645/',
|
'url': 'http://www.5-tv.ru/glavnoe/broadcasts/508645/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -70,7 +72,7 @@ class FiveTVIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
video_url = self._search_regex(
|
video_url = self._search_regex(
|
||||||
[r'<div[^>]+?class="flowplayer[^>]+?data-href="([^"]+)"',
|
[r'<div[^>]+?class="(?:flow)?player[^>]+?data-href="([^"]+)"',
|
||||||
r'<a[^>]+?href="([^"]+)"[^>]+?class="videoplayer"'],
|
r'<a[^>]+?href="([^"]+)"[^>]+?class="videoplayer"'],
|
||||||
webpage, 'video url')
|
webpage, 'video url')
|
||||||
|
|
||||||
|
@ -1,115 +0,0 @@
|
|||||||
# coding: utf-8
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..compat import compat_str
|
|
||||||
from ..utils import (
|
|
||||||
int_or_none,
|
|
||||||
float_or_none,
|
|
||||||
try_get,
|
|
||||||
unified_timestamp,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class FlipagramIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?flipagram\.com/f/(?P<id>[^/?#&]+)'
|
|
||||||
_TEST = {
|
|
||||||
'url': 'https://flipagram.com/f/nyvTSJMKId',
|
|
||||||
'md5': '888dcf08b7ea671381f00fab74692755',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'nyvTSJMKId',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Flipagram by sjuria101 featuring Midnight Memories by One Direction',
|
|
||||||
'description': 'md5:d55e32edc55261cae96a41fa85ff630e',
|
|
||||||
'duration': 35.571,
|
|
||||||
'timestamp': 1461244995,
|
|
||||||
'upload_date': '20160421',
|
|
||||||
'uploader': 'kitty juria',
|
|
||||||
'uploader_id': 'sjuria101',
|
|
||||||
'creator': 'kitty juria',
|
|
||||||
'view_count': int,
|
|
||||||
'like_count': int,
|
|
||||||
'repost_count': int,
|
|
||||||
'comment_count': int,
|
|
||||||
'comments': list,
|
|
||||||
'formats': 'mincount:2',
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url)
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
|
|
||||||
video_data = self._parse_json(
|
|
||||||
self._search_regex(
|
|
||||||
r'window\.reactH2O\s*=\s*({.+});', webpage, 'video data'),
|
|
||||||
video_id)
|
|
||||||
|
|
||||||
flipagram = video_data['flipagram']
|
|
||||||
video = flipagram['video']
|
|
||||||
|
|
||||||
json_ld = self._search_json_ld(webpage, video_id, default={})
|
|
||||||
title = json_ld.get('title') or flipagram['captionText']
|
|
||||||
description = json_ld.get('description') or flipagram.get('captionText')
|
|
||||||
|
|
||||||
formats = [{
|
|
||||||
'url': video['url'],
|
|
||||||
'width': int_or_none(video.get('width')),
|
|
||||||
'height': int_or_none(video.get('height')),
|
|
||||||
'filesize': int_or_none(video_data.get('size')),
|
|
||||||
}]
|
|
||||||
|
|
||||||
preview_url = try_get(
|
|
||||||
flipagram, lambda x: x['music']['track']['previewUrl'], compat_str)
|
|
||||||
if preview_url:
|
|
||||||
formats.append({
|
|
||||||
'url': preview_url,
|
|
||||||
'ext': 'm4a',
|
|
||||||
'vcodec': 'none',
|
|
||||||
})
|
|
||||||
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
counts = flipagram.get('counts', {})
|
|
||||||
user = flipagram.get('user', {})
|
|
||||||
video_data = flipagram.get('video', {})
|
|
||||||
|
|
||||||
thumbnails = [{
|
|
||||||
'url': self._proto_relative_url(cover['url']),
|
|
||||||
'width': int_or_none(cover.get('width')),
|
|
||||||
'height': int_or_none(cover.get('height')),
|
|
||||||
'filesize': int_or_none(cover.get('size')),
|
|
||||||
} for cover in flipagram.get('covers', []) if cover.get('url')]
|
|
||||||
|
|
||||||
# Note that this only retrieves comments that are initially loaded.
|
|
||||||
# For videos with large amounts of comments, most won't be retrieved.
|
|
||||||
comments = []
|
|
||||||
for comment in video_data.get('comments', {}).get(video_id, {}).get('items', []):
|
|
||||||
text = comment.get('comment')
|
|
||||||
if not text or not isinstance(text, list):
|
|
||||||
continue
|
|
||||||
comments.append({
|
|
||||||
'author': comment.get('user', {}).get('name'),
|
|
||||||
'author_id': comment.get('user', {}).get('username'),
|
|
||||||
'id': comment.get('id'),
|
|
||||||
'text': text[0],
|
|
||||||
'timestamp': unified_timestamp(comment.get('created')),
|
|
||||||
})
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': title,
|
|
||||||
'description': description,
|
|
||||||
'duration': float_or_none(flipagram.get('duration'), 1000),
|
|
||||||
'thumbnails': thumbnails,
|
|
||||||
'timestamp': unified_timestamp(flipagram.get('iso8601Created')),
|
|
||||||
'uploader': user.get('name'),
|
|
||||||
'uploader_id': user.get('username'),
|
|
||||||
'creator': user.get('name'),
|
|
||||||
'view_count': int_or_none(counts.get('plays')),
|
|
||||||
'like_count': int_or_none(counts.get('likes')),
|
|
||||||
'repost_count': int_or_none(counts.get('reflips')),
|
|
||||||
'comment_count': int_or_none(counts.get('comments')),
|
|
||||||
'comments': comments,
|
|
||||||
'formats': formats,
|
|
||||||
}
|
|
@ -1,13 +1,23 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .anvato import AnvatoIE
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
class FOX9IE(AnvatoIE):
|
class FOX9IE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?fox9\.com/(?:[^/]+/)+(?P<id>\d+)-story'
|
_VALID_URL = r'https?://(?:www\.)?fox9\.com/video/(?P<id>\d+)'
|
||||||
_TESTS = [{
|
|
||||||
'url': 'http://www.fox9.com/news/215123287-story',
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
return self.url_result(
|
||||||
|
'anvato:anvato_epfox_app_web_prod_b3373168e12f423f41504f207000188daf88251b:' + video_id,
|
||||||
|
'Anvato', video_id)
|
||||||
|
|
||||||
|
|
||||||
|
class FOX9NewsIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?fox9\.com/news/(?P<id>[^/?&#]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://www.fox9.com/news/black-bear-in-tree-draws-crowd-in-downtown-duluth-minnesota',
|
||||||
'md5': 'd6e1b2572c3bab8a849c9103615dd243',
|
'md5': 'd6e1b2572c3bab8a849c9103615dd243',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '314473',
|
'id': '314473',
|
||||||
@ -21,22 +31,11 @@ class FOX9IE(AnvatoIE):
|
|||||||
'categories': ['News', 'Sports'],
|
'categories': ['News', 'Sports'],
|
||||||
'tags': ['news', 'video'],
|
'tags': ['news', 'video'],
|
||||||
},
|
},
|
||||||
}, {
|
}
|
||||||
'url': 'http://www.fox9.com/news/investigators/214070684-story',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
webpage = self._download_webpage(url, video_id)
|
anvato_id = self._search_regex(
|
||||||
|
r'anvatoId\s*:\s*[\'"](\d+)', webpage, 'anvato id')
|
||||||
video_id = self._parse_json(
|
return self.url_result('https://www.fox9.com/video/' + anvato_id, 'FOX9')
|
||||||
self._search_regex(
|
|
||||||
r"this\.videosJson\s*=\s*'(\[.+?\])';",
|
|
||||||
webpage, 'anvato playlist'),
|
|
||||||
video_id)[0]['video']
|
|
||||||
|
|
||||||
return self._get_anvato_videos(
|
|
||||||
'anvato_epfox_app_web_prod_b3373168e12f423f41504f207000188daf88251b',
|
|
||||||
video_id)
|
|
||||||
|
@ -31,7 +31,13 @@ class FranceCultureIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
video_data = extract_attributes(self._search_regex(
|
video_data = extract_attributes(self._search_regex(
|
||||||
r'(?s)<div[^>]+class="[^"]*?(?:title-zone-diffusion|heading-zone-(?:wrapper|player-button))[^"]*?"[^>]*>.*?(<button[^>]+data-asset-source="[^"]+"[^>]+>)',
|
r'''(?sx)
|
||||||
|
(?:
|
||||||
|
</h1>|
|
||||||
|
<div[^>]+class="[^"]*?(?:title-zone-diffusion|heading-zone-(?:wrapper|player-button))[^"]*?"[^>]*>
|
||||||
|
).*?
|
||||||
|
(<button[^>]+data-asset-source="[^"]+"[^>]+>)
|
||||||
|
''',
|
||||||
webpage, 'video data'))
|
webpage, 'video data'))
|
||||||
|
|
||||||
video_url = video_data['data-asset-source']
|
video_url = video_data['data-asset-source']
|
||||||
|
@ -1,134 +0,0 @@
|
|||||||
# coding: utf-8
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..utils import (
|
|
||||||
xpath_with_ns,
|
|
||||||
parse_iso8601,
|
|
||||||
float_or_none,
|
|
||||||
int_or_none,
|
|
||||||
)
|
|
||||||
|
|
||||||
NAMESPACE_MAP = {
|
|
||||||
'media': 'http://search.yahoo.com/mrss/',
|
|
||||||
}
|
|
||||||
|
|
||||||
# URL prefix to download the mp4 files directly instead of streaming via rtmp
|
|
||||||
# Credits go to XBox-Maniac
|
|
||||||
# http://board.jdownloader.org/showpost.php?p=185835&postcount=31
|
|
||||||
RAW_MP4_URL = 'http://cdn.riptide-mtvn.com/'
|
|
||||||
|
|
||||||
|
|
||||||
class GameOneIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?gameone\.de/tv/(?P<id>\d+)'
|
|
||||||
_TESTS = [
|
|
||||||
{
|
|
||||||
'url': 'http://www.gameone.de/tv/288',
|
|
||||||
'md5': '136656b7fb4c9cb4a8e2d500651c499b',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '288',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Game One - Folge 288',
|
|
||||||
'duration': 1238,
|
|
||||||
'thumbnail': 'http://s3.gameone.de/gameone/assets/video_metas/teaser_images/000/643/636/big/640x360.jpg',
|
|
||||||
'description': 'FIFA-Pressepokal 2014, Star Citizen, Kingdom Come: Deliverance, Project Cars, Schöner Trants Nerdquiz Folge 2 Runde 1',
|
|
||||||
'age_limit': 16,
|
|
||||||
'upload_date': '20140513',
|
|
||||||
'timestamp': 1399980122,
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
'url': 'http://gameone.de/tv/220',
|
|
||||||
'md5': '5227ca74c4ae6b5f74c0510a7c48839e',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '220',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'upload_date': '20120918',
|
|
||||||
'description': 'Jet Set Radio HD, Tekken Tag Tournament 2, Source Filmmaker',
|
|
||||||
'timestamp': 1347971451,
|
|
||||||
'title': 'Game One - Folge 220',
|
|
||||||
'duration': 896.62,
|
|
||||||
'age_limit': 16,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url)
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
og_video = self._og_search_video_url(webpage, secure=False)
|
|
||||||
description = self._html_search_meta('description', webpage)
|
|
||||||
age_limit = int(
|
|
||||||
self._search_regex(
|
|
||||||
r'age=(\d+)',
|
|
||||||
self._html_search_meta(
|
|
||||||
'age-de-meta-label',
|
|
||||||
webpage),
|
|
||||||
'age_limit',
|
|
||||||
'0'))
|
|
||||||
mrss_url = self._search_regex(r'mrss=([^&]+)', og_video, 'mrss')
|
|
||||||
|
|
||||||
mrss = self._download_xml(mrss_url, video_id, 'Downloading mrss')
|
|
||||||
title = mrss.find('.//item/title').text
|
|
||||||
thumbnail = mrss.find('.//item/image').get('url')
|
|
||||||
timestamp = parse_iso8601(mrss.find('.//pubDate').text, delimiter=' ')
|
|
||||||
content = mrss.find(xpath_with_ns('.//media:content', NAMESPACE_MAP))
|
|
||||||
content_url = content.get('url')
|
|
||||||
|
|
||||||
content = self._download_xml(
|
|
||||||
content_url,
|
|
||||||
video_id,
|
|
||||||
'Downloading media:content')
|
|
||||||
rendition_items = content.findall('.//rendition')
|
|
||||||
duration = float_or_none(rendition_items[0].get('duration'))
|
|
||||||
formats = [
|
|
||||||
{
|
|
||||||
'url': re.sub(r'.*/(r2)', RAW_MP4_URL + r'\1', r.find('./src').text),
|
|
||||||
'width': int_or_none(r.get('width')),
|
|
||||||
'height': int_or_none(r.get('height')),
|
|
||||||
'tbr': int_or_none(r.get('bitrate')),
|
|
||||||
}
|
|
||||||
for r in rendition_items
|
|
||||||
]
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': title,
|
|
||||||
'thumbnail': thumbnail,
|
|
||||||
'duration': duration,
|
|
||||||
'formats': formats,
|
|
||||||
'description': description,
|
|
||||||
'age_limit': age_limit,
|
|
||||||
'timestamp': timestamp,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class GameOnePlaylistIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?gameone\.de(?:/tv)?/?$'
|
|
||||||
IE_NAME = 'gameone:playlist'
|
|
||||||
_TEST = {
|
|
||||||
'url': 'http://www.gameone.de/tv',
|
|
||||||
'info_dict': {
|
|
||||||
'title': 'GameOne',
|
|
||||||
},
|
|
||||||
'playlist_mincount': 294,
|
|
||||||
}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
webpage = self._download_webpage('http://www.gameone.de/tv', 'TV')
|
|
||||||
max_id = max(map(int, re.findall(r'<a href="/tv/(\d+)"', webpage)))
|
|
||||||
entries = [
|
|
||||||
self.url_result('http://www.gameone.de/tv/%d' %
|
|
||||||
video_id, 'GameOne')
|
|
||||||
for video_id in range(max_id, 0, -1)]
|
|
||||||
|
|
||||||
return {
|
|
||||||
'_type': 'playlist',
|
|
||||||
'title': 'GameOne',
|
|
||||||
'entries': entries,
|
|
||||||
}
|
|
@ -60,6 +60,9 @@ from .tnaflix import TNAFlixNetworkEmbedIE
|
|||||||
from .drtuber import DrTuberIE
|
from .drtuber import DrTuberIE
|
||||||
from .redtube import RedTubeIE
|
from .redtube import RedTubeIE
|
||||||
from .tube8 import Tube8IE
|
from .tube8 import Tube8IE
|
||||||
|
from .mofosex import MofosexEmbedIE
|
||||||
|
from .spankwire import SpankwireIE
|
||||||
|
from .youporn import YouPornIE
|
||||||
from .vimeo import VimeoIE
|
from .vimeo import VimeoIE
|
||||||
from .dailymotion import DailymotionIE
|
from .dailymotion import DailymotionIE
|
||||||
from .dailymail import DailyMailIE
|
from .dailymail import DailyMailIE
|
||||||
@ -77,11 +80,10 @@ from .instagram import InstagramIE
|
|||||||
from .liveleak import LiveLeakIE
|
from .liveleak import LiveLeakIE
|
||||||
from .threeqsdn import ThreeQSDNIE
|
from .threeqsdn import ThreeQSDNIE
|
||||||
from .theplatform import ThePlatformIE
|
from .theplatform import ThePlatformIE
|
||||||
from .vessel import VesselIE
|
|
||||||
from .kaltura import KalturaIE
|
from .kaltura import KalturaIE
|
||||||
from .eagleplatform import EaglePlatformIE
|
from .eagleplatform import EaglePlatformIE
|
||||||
from .facebook import FacebookIE
|
from .facebook import FacebookIE
|
||||||
from .soundcloud import SoundcloudIE
|
from .soundcloud import SoundcloudEmbedIE
|
||||||
from .tunein import TuneInBaseIE
|
from .tunein import TuneInBaseIE
|
||||||
from .vbox7 import Vbox7IE
|
from .vbox7 import Vbox7IE
|
||||||
from .dbtv import DBTVIE
|
from .dbtv import DBTVIE
|
||||||
@ -89,10 +91,6 @@ from .piksel import PikselIE
|
|||||||
from .videa import VideaIE
|
from .videa import VideaIE
|
||||||
from .twentymin import TwentyMinutenIE
|
from .twentymin import TwentyMinutenIE
|
||||||
from .ustream import UstreamIE
|
from .ustream import UstreamIE
|
||||||
from .openload import (
|
|
||||||
OpenloadIE,
|
|
||||||
VerystreamIE,
|
|
||||||
)
|
|
||||||
from .videopress import VideoPressIE
|
from .videopress import VideoPressIE
|
||||||
from .rutube import RutubeIE
|
from .rutube import RutubeIE
|
||||||
from .limelight import LimelightBaseIE
|
from .limelight import LimelightBaseIE
|
||||||
@ -119,6 +117,8 @@ from .foxnews import FoxNewsIE
|
|||||||
from .viqeo import ViqeoIE
|
from .viqeo import ViqeoIE
|
||||||
from .expressen import ExpressenIE
|
from .expressen import ExpressenIE
|
||||||
from .zype import ZypeIE
|
from .zype import ZypeIE
|
||||||
|
from .odnoklassniki import OdnoklassnikiIE
|
||||||
|
from .kinja import KinjaEmbedIE
|
||||||
|
|
||||||
|
|
||||||
class GenericIE(InfoExtractor):
|
class GenericIE(InfoExtractor):
|
||||||
@ -1487,16 +1487,18 @@ class GenericIE(InfoExtractor):
|
|||||||
'timestamp': 1432570283,
|
'timestamp': 1432570283,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
# OnionStudios embed
|
# Kinja embed
|
||||||
{
|
{
|
||||||
'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537',
|
'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2855',
|
'id': '106351',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You',
|
'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You',
|
||||||
|
'description': 'Migrated from OnionStudios',
|
||||||
'thumbnail': r're:^https?://.*\.jpe?g$',
|
'thumbnail': r're:^https?://.*\.jpe?g$',
|
||||||
'uploader': 'ClickHole',
|
'uploader': 'clickhole',
|
||||||
'uploader_id': 'clickhole',
|
'upload_date': '20150527',
|
||||||
|
'timestamp': 1432744860,
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
# SnagFilms embed
|
# SnagFilms embed
|
||||||
@ -1706,6 +1708,15 @@ class GenericIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
'add_ie': ['Kaltura'],
|
'add_ie': ['Kaltura'],
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
# multiple kaltura embeds, nsfw
|
||||||
|
'url': 'https://www.quartier-rouge.be/prive/femmes/kamila-avec-video-jaime-sadomie.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'kamila-avec-video-jaime-sadomie',
|
||||||
|
'title': "Kamila avec vídeo “J'aime sadomie”",
|
||||||
|
},
|
||||||
|
'playlist_count': 8,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
# Non-standard Vimeo embed
|
# Non-standard Vimeo embed
|
||||||
'url': 'https://openclassrooms.com/courses/understanding-the-web',
|
'url': 'https://openclassrooms.com/courses/understanding-the-web',
|
||||||
@ -2075,6 +2086,22 @@ class GenericIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
'playlist_count': 6,
|
'playlist_count': 6,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
# Squarespace video embed, 2019-08-28
|
||||||
|
'url': 'http://ootboxford.com',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'Tc7b_JGdZfw',
|
||||||
|
'title': 'Out of the Blue, at Childish Things 10',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'description': 'md5:a83d0026666cf5ee970f8bd1cfd69c7f',
|
||||||
|
'uploader_id': 'helendouglashouse',
|
||||||
|
'uploader': 'Helen & Douglas House',
|
||||||
|
'upload_date': '20140328',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
# Zype embed
|
# Zype embed
|
||||||
'url': 'https://www.cookscountry.com/episode/554-smoky-barbecue-favorites',
|
'url': 'https://www.cookscountry.com/episode/554-smoky-barbecue-favorites',
|
||||||
@ -2083,6 +2110,9 @@ class GenericIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Smoky Barbecue Favorites',
|
'title': 'Smoky Barbecue Favorites',
|
||||||
'thumbnail': r're:^https?://.*\.jpe?g',
|
'thumbnail': r're:^https?://.*\.jpe?g',
|
||||||
|
'description': 'md5:5ff01e76316bd8d46508af26dc86023b',
|
||||||
|
'upload_date': '20170909',
|
||||||
|
'timestamp': 1504915200,
|
||||||
},
|
},
|
||||||
'add_ie': [ZypeIE.ie_key()],
|
'add_ie': [ZypeIE.ie_key()],
|
||||||
'params': {
|
'params': {
|
||||||
@ -2226,7 +2256,7 @@ class GenericIE(InfoExtractor):
|
|||||||
default_search = 'fixup_error'
|
default_search = 'fixup_error'
|
||||||
|
|
||||||
if default_search in ('auto', 'auto_warning', 'fixup_error'):
|
if default_search in ('auto', 'auto_warning', 'fixup_error'):
|
||||||
if '/' in url:
|
if re.match(r'^[^\s/]+\.[^\s/]+/', url):
|
||||||
self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
|
self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
|
||||||
return self.url_result('http://' + url)
|
return self.url_result('http://' + url)
|
||||||
elif default_search != 'fixup_error':
|
elif default_search != 'fixup_error':
|
||||||
@ -2269,7 +2299,7 @@ class GenericIE(InfoExtractor):
|
|||||||
|
|
||||||
if head_response is not False:
|
if head_response is not False:
|
||||||
# Check for redirect
|
# Check for redirect
|
||||||
new_url = compat_str(head_response.geturl())
|
new_url = head_response.geturl()
|
||||||
if url != new_url:
|
if url != new_url:
|
||||||
self.report_following_redirect(new_url)
|
self.report_following_redirect(new_url)
|
||||||
if force_videoid:
|
if force_videoid:
|
||||||
@ -2369,12 +2399,12 @@ class GenericIE(InfoExtractor):
|
|||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
self._parse_xspf(
|
self._parse_xspf(
|
||||||
doc, video_id, xspf_url=url,
|
doc, video_id, xspf_url=url,
|
||||||
xspf_base_url=compat_str(full_response.geturl())),
|
xspf_base_url=full_response.geturl()),
|
||||||
video_id)
|
video_id)
|
||||||
elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
|
elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
|
||||||
info_dict['formats'] = self._parse_mpd_formats(
|
info_dict['formats'] = self._parse_mpd_formats(
|
||||||
doc,
|
doc,
|
||||||
mpd_base_url=compat_str(full_response.geturl()).rpartition('/')[0],
|
mpd_base_url=full_response.geturl().rpartition('/')[0],
|
||||||
mpd_url=url)
|
mpd_url=url)
|
||||||
self._sort_formats(info_dict['formats'])
|
self._sort_formats(info_dict['formats'])
|
||||||
return info_dict
|
return info_dict
|
||||||
@ -2395,6 +2425,12 @@ class GenericIE(InfoExtractor):
|
|||||||
# Unescaping the whole page allows to handle those cases in a generic way
|
# Unescaping the whole page allows to handle those cases in a generic way
|
||||||
webpage = compat_urllib_parse_unquote(webpage)
|
webpage = compat_urllib_parse_unquote(webpage)
|
||||||
|
|
||||||
|
# Unescape squarespace embeds to be detected by generic extractor,
|
||||||
|
# see https://github.com/ytdl-org/youtube-dl/issues/21294
|
||||||
|
webpage = re.sub(
|
||||||
|
r'<div[^>]+class=[^>]*?\bsqs-video-wrapper\b[^>]*>',
|
||||||
|
lambda x: unescapeHTML(x.group(0)), webpage)
|
||||||
|
|
||||||
# it's tempting to parse this further, but you would
|
# it's tempting to parse this further, but you would
|
||||||
# have to take into account all the variations like
|
# have to take into account all the variations like
|
||||||
# Video Title - Site Name
|
# Video Title - Site Name
|
||||||
@ -2469,11 +2505,6 @@ class GenericIE(InfoExtractor):
|
|||||||
if tp_urls:
|
if tp_urls:
|
||||||
return self.playlist_from_matches(tp_urls, video_id, video_title, ie='ThePlatform')
|
return self.playlist_from_matches(tp_urls, video_id, video_title, ie='ThePlatform')
|
||||||
|
|
||||||
# Look for Vessel embeds
|
|
||||||
vessel_urls = VesselIE._extract_urls(webpage)
|
|
||||||
if vessel_urls:
|
|
||||||
return self.playlist_from_matches(vessel_urls, video_id, video_title, ie=VesselIE.ie_key())
|
|
||||||
|
|
||||||
# Look for embedded rtl.nl player
|
# Look for embedded rtl.nl player
|
||||||
matches = re.findall(
|
matches = re.findall(
|
||||||
r'<iframe[^>]+?src="((?:https?:)?//(?:(?:www|static)\.)?rtl\.nl/(?:system/videoplayer/[^"]+(?:video_)?)?embed[^"]+)"',
|
r'<iframe[^>]+?src="((?:https?:)?//(?:(?:www|static)\.)?rtl\.nl/(?:system/videoplayer/[^"]+(?:video_)?)?embed[^"]+)"',
|
||||||
@ -2517,15 +2548,21 @@ class GenericIE(InfoExtractor):
|
|||||||
return self.playlist_from_matches(
|
return self.playlist_from_matches(
|
||||||
dailymail_urls, video_id, video_title, ie=DailyMailIE.ie_key())
|
dailymail_urls, video_id, video_title, ie=DailyMailIE.ie_key())
|
||||||
|
|
||||||
|
# Look for Teachable embeds, must be before Wistia
|
||||||
|
teachable_url = TeachableIE._extract_url(webpage, url)
|
||||||
|
if teachable_url:
|
||||||
|
return self.url_result(teachable_url)
|
||||||
|
|
||||||
# Look for embedded Wistia player
|
# Look for embedded Wistia player
|
||||||
wistia_url = WistiaIE._extract_url(webpage)
|
wistia_urls = WistiaIE._extract_urls(webpage)
|
||||||
if wistia_url:
|
if wistia_urls:
|
||||||
return {
|
playlist = self.playlist_from_matches(wistia_urls, video_id, video_title, ie=WistiaIE.ie_key())
|
||||||
'_type': 'url_transparent',
|
for entry in playlist['entries']:
|
||||||
'url': self._proto_relative_url(wistia_url),
|
entry.update({
|
||||||
'ie_key': WistiaIE.ie_key(),
|
'_type': 'url_transparent',
|
||||||
'uploader': video_uploader,
|
'uploader': video_uploader,
|
||||||
}
|
})
|
||||||
|
return playlist
|
||||||
|
|
||||||
# Look for SVT player
|
# Look for SVT player
|
||||||
svt_url = SVTIE._extract_url(webpage)
|
svt_url = SVTIE._extract_url(webpage)
|
||||||
@ -2611,9 +2648,9 @@ class GenericIE(InfoExtractor):
|
|||||||
return self.url_result(mobj.group('url'), 'VK')
|
return self.url_result(mobj.group('url'), 'VK')
|
||||||
|
|
||||||
# Look for embedded Odnoklassniki player
|
# Look for embedded Odnoklassniki player
|
||||||
mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:odnoklassniki|ok)\.ru/videoembed/.+?)\1', webpage)
|
odnoklassniki_url = OdnoklassnikiIE._extract_url(webpage)
|
||||||
if mobj is not None:
|
if odnoklassniki_url:
|
||||||
return self.url_result(mobj.group('url'), 'Odnoklassniki')
|
return self.url_result(odnoklassniki_url, OdnoklassnikiIE.ie_key())
|
||||||
|
|
||||||
# Look for embedded ivi player
|
# Look for embedded ivi player
|
||||||
mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
|
mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
|
||||||
@ -2690,6 +2727,21 @@ class GenericIE(InfoExtractor):
|
|||||||
if tube8_urls:
|
if tube8_urls:
|
||||||
return self.playlist_from_matches(tube8_urls, video_id, video_title, ie=Tube8IE.ie_key())
|
return self.playlist_from_matches(tube8_urls, video_id, video_title, ie=Tube8IE.ie_key())
|
||||||
|
|
||||||
|
# Look for embedded Mofosex player
|
||||||
|
mofosex_urls = MofosexEmbedIE._extract_urls(webpage)
|
||||||
|
if mofosex_urls:
|
||||||
|
return self.playlist_from_matches(mofosex_urls, video_id, video_title, ie=MofosexEmbedIE.ie_key())
|
||||||
|
|
||||||
|
# Look for embedded Spankwire player
|
||||||
|
spankwire_urls = SpankwireIE._extract_urls(webpage)
|
||||||
|
if spankwire_urls:
|
||||||
|
return self.playlist_from_matches(spankwire_urls, video_id, video_title, ie=SpankwireIE.ie_key())
|
||||||
|
|
||||||
|
# Look for embedded YouPorn player
|
||||||
|
youporn_urls = YouPornIE._extract_urls(webpage)
|
||||||
|
if youporn_urls:
|
||||||
|
return self.playlist_from_matches(youporn_urls, video_id, video_title, ie=YouPornIE.ie_key())
|
||||||
|
|
||||||
# Look for embedded Tvigle player
|
# Look for embedded Tvigle player
|
||||||
mobj = re.search(
|
mobj = re.search(
|
||||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
|
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
|
||||||
@ -2732,9 +2784,9 @@ class GenericIE(InfoExtractor):
|
|||||||
return self.url_result(myvi_url)
|
return self.url_result(myvi_url)
|
||||||
|
|
||||||
# Look for embedded soundcloud player
|
# Look for embedded soundcloud player
|
||||||
soundcloud_urls = SoundcloudIE._extract_urls(webpage)
|
soundcloud_urls = SoundcloudEmbedIE._extract_urls(webpage)
|
||||||
if soundcloud_urls:
|
if soundcloud_urls:
|
||||||
return self.playlist_from_matches(soundcloud_urls, video_id, video_title, getter=unescapeHTML, ie=SoundcloudIE.ie_key())
|
return self.playlist_from_matches(soundcloud_urls, video_id, video_title, getter=unescapeHTML)
|
||||||
|
|
||||||
# Look for tunein player
|
# Look for tunein player
|
||||||
tunein_urls = TuneInBaseIE._extract_urls(webpage)
|
tunein_urls = TuneInBaseIE._extract_urls(webpage)
|
||||||
@ -2801,9 +2853,12 @@ class GenericIE(InfoExtractor):
|
|||||||
return self.url_result(mobj.group('url'), 'Zapiks')
|
return self.url_result(mobj.group('url'), 'Zapiks')
|
||||||
|
|
||||||
# Look for Kaltura embeds
|
# Look for Kaltura embeds
|
||||||
kaltura_url = KalturaIE._extract_url(webpage)
|
kaltura_urls = KalturaIE._extract_urls(webpage)
|
||||||
if kaltura_url:
|
if kaltura_urls:
|
||||||
return self.url_result(smuggle_url(kaltura_url, {'source_url': url}), KalturaIE.ie_key())
|
return self.playlist_from_matches(
|
||||||
|
kaltura_urls, video_id, video_title,
|
||||||
|
getter=lambda x: smuggle_url(x, {'source_url': url}),
|
||||||
|
ie=KalturaIE.ie_key())
|
||||||
|
|
||||||
# Look for EaglePlatform embeds
|
# Look for EaglePlatform embeds
|
||||||
eagleplatform_url = EaglePlatformIE._extract_url(webpage)
|
eagleplatform_url = EaglePlatformIE._extract_url(webpage)
|
||||||
@ -2877,6 +2932,12 @@ class GenericIE(InfoExtractor):
|
|||||||
if senate_isvp_url:
|
if senate_isvp_url:
|
||||||
return self.url_result(senate_isvp_url, 'SenateISVP')
|
return self.url_result(senate_isvp_url, 'SenateISVP')
|
||||||
|
|
||||||
|
# Look for Kinja embeds
|
||||||
|
kinja_embed_urls = KinjaEmbedIE._extract_urls(webpage, url)
|
||||||
|
if kinja_embed_urls:
|
||||||
|
return self.playlist_from_matches(
|
||||||
|
kinja_embed_urls, video_id, video_title)
|
||||||
|
|
||||||
# Look for OnionStudios embeds
|
# Look for OnionStudios embeds
|
||||||
onionstudios_url = OnionStudiosIE._extract_url(webpage)
|
onionstudios_url = OnionStudiosIE._extract_url(webpage)
|
||||||
if onionstudios_url:
|
if onionstudios_url:
|
||||||
@ -2938,7 +2999,7 @@ class GenericIE(InfoExtractor):
|
|||||||
|
|
||||||
# Look for VODPlatform embeds
|
# Look for VODPlatform embeds
|
||||||
mobj = re.search(
|
mobj = re.search(
|
||||||
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?vod-platform\.net/[eE]mbed/.+?)\1',
|
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:(?:www\.)?vod-platform\.net|embed\.kwikmotion\.com)/[eE]mbed/.+?)\1',
|
||||||
webpage)
|
webpage)
|
||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
return self.url_result(
|
return self.url_result(
|
||||||
@ -2946,10 +3007,14 @@ class GenericIE(InfoExtractor):
|
|||||||
|
|
||||||
# Look for Mangomolo embeds
|
# Look for Mangomolo embeds
|
||||||
mobj = re.search(
|
mobj = re.search(
|
||||||
r'''(?x)<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?admin\.mangomolo\.com/analytics/index\.php/customers/embed/
|
r'''(?x)<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//
|
||||||
|
(?:
|
||||||
|
admin\.mangomolo\.com/analytics/index\.php/customers/embed|
|
||||||
|
player\.mangomolo\.com/v1
|
||||||
|
)/
|
||||||
(?:
|
(?:
|
||||||
video\?.*?\bid=(?P<video_id>\d+)|
|
video\?.*?\bid=(?P<video_id>\d+)|
|
||||||
index\?.*?\bchannelid=(?P<channel_id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)
|
(?:index|live)\?.*?\bchannelid=(?P<channel_id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)
|
||||||
).+?)\1''', webpage)
|
).+?)\1''', webpage)
|
||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
info = {
|
info = {
|
||||||
@ -3018,18 +3083,6 @@ class GenericIE(InfoExtractor):
|
|||||||
return self.playlist_from_matches(
|
return self.playlist_from_matches(
|
||||||
twentymin_urls, video_id, video_title, ie=TwentyMinutenIE.ie_key())
|
twentymin_urls, video_id, video_title, ie=TwentyMinutenIE.ie_key())
|
||||||
|
|
||||||
# Look for Openload embeds
|
|
||||||
openload_urls = OpenloadIE._extract_urls(webpage)
|
|
||||||
if openload_urls:
|
|
||||||
return self.playlist_from_matches(
|
|
||||||
openload_urls, video_id, video_title, ie=OpenloadIE.ie_key())
|
|
||||||
|
|
||||||
# Look for Verystream embeds
|
|
||||||
verystream_urls = VerystreamIE._extract_urls(webpage)
|
|
||||||
if verystream_urls:
|
|
||||||
return self.playlist_from_matches(
|
|
||||||
verystream_urls, video_id, video_title, ie=VerystreamIE.ie_key())
|
|
||||||
|
|
||||||
# Look for VideoPress embeds
|
# Look for VideoPress embeds
|
||||||
videopress_urls = VideoPressIE._extract_urls(webpage)
|
videopress_urls = VideoPressIE._extract_urls(webpage)
|
||||||
if videopress_urls:
|
if videopress_urls:
|
||||||
@ -3123,10 +3176,6 @@ class GenericIE(InfoExtractor):
|
|||||||
return self.playlist_from_matches(
|
return self.playlist_from_matches(
|
||||||
peertube_urls, video_id, video_title, ie=PeerTubeIE.ie_key())
|
peertube_urls, video_id, video_title, ie=PeerTubeIE.ie_key())
|
||||||
|
|
||||||
teachable_url = TeachableIE._extract_url(webpage, url)
|
|
||||||
if teachable_url:
|
|
||||||
return self.url_result(teachable_url)
|
|
||||||
|
|
||||||
indavideo_urls = IndavideoEmbedIE._extract_urls(webpage)
|
indavideo_urls = IndavideoEmbedIE._extract_urls(webpage)
|
||||||
if indavideo_urls:
|
if indavideo_urls:
|
||||||
return self.playlist_from_matches(
|
return self.playlist_from_matches(
|
||||||
|
@ -11,7 +11,7 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class GfycatIE(InfoExtractor):
|
class GfycatIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?gfycat\.com/(?:ifr/|gifs/detail/)?(?P<id>[^-/?#]+)'
|
_VALID_URL = r'https?://(?:(?:www|giant|thumbs)\.)?gfycat\.com/(?:ru/|ifr/|gifs/detail/)?(?P<id>[^-/?#\.]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://gfycat.com/DeadlyDecisiveGermanpinscher',
|
'url': 'http://gfycat.com/DeadlyDecisiveGermanpinscher',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -44,12 +44,21 @@ class GfycatIE(InfoExtractor):
|
|||||||
'categories': list,
|
'categories': list,
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://gfycat.com/ru/RemarkableDrearyAmurstarfish',
|
||||||
|
'only_matching': True
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://gfycat.com/gifs/detail/UnconsciousLankyIvorygull',
|
'url': 'https://gfycat.com/gifs/detail/UnconsciousLankyIvorygull',
|
||||||
'only_matching': True
|
'only_matching': True
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://gfycat.com/acceptablehappygoluckyharborporpoise-baseball',
|
'url': 'https://gfycat.com/acceptablehappygoluckyharborporpoise-baseball',
|
||||||
'only_matching': True
|
'only_matching': True
|
||||||
|
}, {
|
||||||
|
'url': 'https://thumbs.gfycat.com/acceptablehappygoluckyharborporpoise-size_restricted.gif',
|
||||||
|
'only_matching': True
|
||||||
|
}, {
|
||||||
|
'url': 'https://giant.gfycat.com/acceptablehappygoluckyharborporpoise.mp4',
|
||||||
|
'only_matching': True
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -13,10 +13,10 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class GiantBombIE(InfoExtractor):
|
class GiantBombIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?giantbomb\.com/videos/(?P<display_id>[^/]+)/(?P<id>\d+-\d+)'
|
_VALID_URL = r'https?://(?:www\.)?giantbomb\.com/(?:videos|shows)/(?P<display_id>[^/]+)/(?P<id>\d+-\d+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.giantbomb.com/videos/quick-look-destiny-the-dark-below/2300-9782/',
|
'url': 'http://www.giantbomb.com/videos/quick-look-destiny-the-dark-below/2300-9782/',
|
||||||
'md5': 'c8ea694254a59246a42831155dec57ac',
|
'md5': '132f5a803e7e0ab0e274d84bda1e77ae',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2300-9782',
|
'id': '2300-9782',
|
||||||
'display_id': 'quick-look-destiny-the-dark-below',
|
'display_id': 'quick-look-destiny-the-dark-below',
|
||||||
@ -26,7 +26,10 @@ class GiantBombIE(InfoExtractor):
|
|||||||
'duration': 2399,
|
'duration': 2399,
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
}
|
}
|
||||||
}
|
}, {
|
||||||
|
'url': 'https://www.giantbomb.com/shows/ben-stranding/2970-20212',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
@ -96,21 +96,31 @@ class GloboIE(InfoExtractor):
|
|||||||
video = self._download_json(
|
video = self._download_json(
|
||||||
'http://api.globovideos.com/videos/%s/playlist' % video_id,
|
'http://api.globovideos.com/videos/%s/playlist' % video_id,
|
||||||
video_id)['videos'][0]
|
video_id)['videos'][0]
|
||||||
|
if video.get('encrypted') is True:
|
||||||
|
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||||
|
|
||||||
title = video['title']
|
title = video['title']
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
|
subtitles = {}
|
||||||
for resource in video['resources']:
|
for resource in video['resources']:
|
||||||
resource_id = resource.get('_id')
|
resource_id = resource.get('_id')
|
||||||
resource_url = resource.get('url')
|
resource_url = resource.get('url')
|
||||||
if not resource_id or not resource_url:
|
resource_type = resource.get('type')
|
||||||
|
if not resource_url or (resource_type == 'media' and not resource_id) or resource_type not in ('subtitle', 'media'):
|
||||||
|
continue
|
||||||
|
|
||||||
|
if resource_type == 'subtitle':
|
||||||
|
subtitles.setdefault(resource.get('language') or 'por', []).append({
|
||||||
|
'url': resource_url,
|
||||||
|
})
|
||||||
continue
|
continue
|
||||||
|
|
||||||
security = self._download_json(
|
security = self._download_json(
|
||||||
'http://security.video.globo.com/videos/%s/hash' % video_id,
|
'http://security.video.globo.com/videos/%s/hash' % video_id,
|
||||||
video_id, 'Downloading security hash for %s' % resource_id, query={
|
video_id, 'Downloading security hash for %s' % resource_id, query={
|
||||||
'player': 'flash',
|
'player': 'desktop',
|
||||||
'version': '17.0.0.132',
|
'version': '5.19.1',
|
||||||
'resource_id': resource_id,
|
'resource_id': resource_id,
|
||||||
})
|
})
|
||||||
|
|
||||||
@ -123,18 +133,23 @@ class GloboIE(InfoExtractor):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
hash_code = security_hash[:2]
|
hash_code = security_hash[:2]
|
||||||
received_time = security_hash[2:12]
|
|
||||||
received_random = security_hash[12:22]
|
|
||||||
received_md5 = security_hash[22:]
|
|
||||||
|
|
||||||
sign_time = compat_str(int(received_time) + 86400)
|
|
||||||
padding = '%010d' % random.randint(1, 10000000000)
|
padding = '%010d' % random.randint(1, 10000000000)
|
||||||
|
if hash_code in ('04', '14'):
|
||||||
|
received_time = security_hash[3:13]
|
||||||
|
received_md5 = security_hash[24:]
|
||||||
|
hash_prefix = security_hash[:23]
|
||||||
|
elif hash_code in ('02', '12', '03', '13'):
|
||||||
|
received_time = security_hash[2:12]
|
||||||
|
received_md5 = security_hash[22:]
|
||||||
|
padding += '1'
|
||||||
|
hash_prefix = '05' + security_hash[:22]
|
||||||
|
|
||||||
md5_data = (received_md5 + sign_time + padding + '0xFF01DD').encode()
|
padded_sign_time = compat_str(int(received_time) + 86400) + padding
|
||||||
|
md5_data = (received_md5 + padded_sign_time + '0xAC10FD').encode()
|
||||||
signed_md5 = base64.urlsafe_b64encode(hashlib.md5(md5_data).digest()).decode().strip('=')
|
signed_md5 = base64.urlsafe_b64encode(hashlib.md5(md5_data).digest()).decode().strip('=')
|
||||||
signed_hash = hash_code + received_time + received_random + sign_time + padding + signed_md5
|
signed_hash = hash_prefix + padded_sign_time + signed_md5
|
||||||
|
signed_url = '%s?h=%s&k=html5&a=%s&u=%s' % (resource_url, signed_hash, 'F' if video.get('subscriber_only') else 'A', security.get('user') or '')
|
||||||
|
|
||||||
signed_url = '%s?h=%s&k=%s' % (resource_url, signed_hash, 'flash')
|
|
||||||
if resource_id.endswith('m3u8') or resource_url.endswith('.m3u8'):
|
if resource_id.endswith('m3u8') or resource_url.endswith('.m3u8'):
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
signed_url, resource_id, 'mp4', entry_protocol='m3u8_native',
|
signed_url, resource_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
@ -164,7 +179,8 @@ class GloboIE(InfoExtractor):
|
|||||||
'duration': duration,
|
'duration': duration,
|
||||||
'uploader': uploader,
|
'uploader': uploader,
|
||||||
'uploader_id': uploader_id,
|
'uploader_id': uploader_id,
|
||||||
'formats': formats
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -40,8 +40,17 @@ class GoIE(AdobePassIE):
|
|||||||
'resource_id': 'Disney',
|
'resource_id': 'Disney',
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
_VALID_URL = r'https?://(?:(?:(?P<sub_domain>%s)\.)?go|(?P<sub_domain_2>disneynow))\.com/(?:(?:[^/]+/)*(?P<id>vdka\w+)|(?:[^/]+/)*(?P<display_id>[^/?#]+))'\
|
_VALID_URL = r'''(?x)
|
||||||
% '|'.join(list(_SITE_INFO.keys()) + ['disneynow'])
|
https?://
|
||||||
|
(?:
|
||||||
|
(?:(?P<sub_domain>%s)\.)?go|
|
||||||
|
(?P<sub_domain_2>abc|freeform|disneynow)
|
||||||
|
)\.com/
|
||||||
|
(?:
|
||||||
|
(?:[^/]+/)*(?P<id>[Vv][Dd][Kk][Aa]\w+)|
|
||||||
|
(?:[^/]+/)*(?P<display_id>[^/?\#]+)
|
||||||
|
)
|
||||||
|
''' % '|'.join(list(_SITE_INFO.keys()))
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://abc.go.com/shows/designated-survivor/video/most-recent/VDKA3807643',
|
'url': 'http://abc.go.com/shows/designated-survivor/video/most-recent/VDKA3807643',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -54,6 +63,7 @@ class GoIE(AdobePassIE):
|
|||||||
# m3u8 download
|
# m3u8 download
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
'skip': 'This content is no longer available.',
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://watchdisneyxd.go.com/doraemon',
|
'url': 'http://watchdisneyxd.go.com/doraemon',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -61,6 +71,34 @@ class GoIE(AdobePassIE):
|
|||||||
'id': 'SH55574025',
|
'id': 'SH55574025',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 51,
|
'playlist_mincount': 51,
|
||||||
|
}, {
|
||||||
|
'url': 'http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'VDKA3609139',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'This Guilty Blood',
|
||||||
|
'description': 'md5:f18e79ad1c613798d95fdabfe96cd292',
|
||||||
|
'age_limit': 14,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'geo_bypass_ip_block': '3.244.239.0/24',
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://abc.com/shows/the-rookie/episode-guide/season-02/03-the-bet',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'VDKA13435179',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'The Bet',
|
||||||
|
'description': 'md5:c66de8ba2e92c6c5c113c3ade84ab404',
|
||||||
|
'age_limit': 14,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'geo_bypass_ip_block': '3.244.239.0/24',
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://abc.go.com/shows/the-catch/episode-guide/season-01/10-the-wedding',
|
'url': 'http://abc.go.com/shows/the-catch/episode-guide/season-01/10-the-wedding',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -95,10 +133,13 @@ class GoIE(AdobePassIE):
|
|||||||
if not video_id or not site_info:
|
if not video_id or not site_info:
|
||||||
webpage = self._download_webpage(url, display_id or video_id)
|
webpage = self._download_webpage(url, display_id or video_id)
|
||||||
video_id = self._search_regex(
|
video_id = self._search_regex(
|
||||||
# There may be inner quotes, e.g. data-video-id="'VDKA3609139'"
|
(
|
||||||
# from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood
|
# There may be inner quotes, e.g. data-video-id="'VDKA3609139'"
|
||||||
r'data-video-id=["\']*(VDKA\w+)', webpage, 'video id',
|
# from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood
|
||||||
default=video_id)
|
r'data-video-id=["\']*(VDKA\w+)',
|
||||||
|
# https://abc.com/shows/the-rookie/episode-guide/season-02/03-the-bet
|
||||||
|
r'\b(?:video)?id["\']\s*:\s*["\'](VDKA\w+)'
|
||||||
|
), webpage, 'video id', default=video_id)
|
||||||
if not site_info:
|
if not site_info:
|
||||||
brand = self._search_regex(
|
brand = self._search_regex(
|
||||||
(r'data-brand=\s*["\']\s*(\d+)',
|
(r'data-brand=\s*["\']\s*(\d+)',
|
||||||
|
@ -1,149 +0,0 @@
|
|||||||
# coding: utf-8
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..compat import compat_HTTPError
|
|
||||||
from ..utils import (
|
|
||||||
determine_ext,
|
|
||||||
ExtractorError,
|
|
||||||
int_or_none,
|
|
||||||
parse_age_limit,
|
|
||||||
parse_iso8601,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class Go90IE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?go90\.com/(?:videos|embed)/(?P<id>[0-9a-zA-Z]+)'
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'https://www.go90.com/videos/84BUqjLpf9D',
|
|
||||||
'md5': 'efa7670dbbbf21a7b07b360652b24a32',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '84BUqjLpf9D',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Daily VICE - Inside The Utah Coalition Against Pornography Convention',
|
|
||||||
'description': 'VICE\'s Karley Sciortino meets with activists who discuss the state\'s strong anti-porn stance. Then, VICE Sports explains NFL contracts.',
|
|
||||||
'timestamp': 1491868800,
|
|
||||||
'upload_date': '20170411',
|
|
||||||
'age_limit': 14,
|
|
||||||
}
|
|
||||||
}, {
|
|
||||||
'url': 'https://www.go90.com/embed/261MflWkD3N',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
|
||||||
_GEO_BYPASS = False
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url)
|
|
||||||
|
|
||||||
try:
|
|
||||||
headers = self.geo_verification_headers()
|
|
||||||
headers.update({
|
|
||||||
'Content-Type': 'application/json; charset=utf-8',
|
|
||||||
})
|
|
||||||
video_data = self._download_json(
|
|
||||||
'https://www.go90.com/api/view/items/' + video_id, video_id,
|
|
||||||
headers=headers, data=b'{"client":"web","device_type":"pc"}')
|
|
||||||
except ExtractorError as e:
|
|
||||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
|
|
||||||
message = self._parse_json(e.cause.read().decode(), None)['error']['message']
|
|
||||||
if 'region unavailable' in message:
|
|
||||||
self.raise_geo_restricted(countries=['US'])
|
|
||||||
raise ExtractorError(message, expected=True)
|
|
||||||
raise
|
|
||||||
|
|
||||||
if video_data.get('requires_drm'):
|
|
||||||
raise ExtractorError('This video is DRM protected.', expected=True)
|
|
||||||
main_video_asset = video_data['main_video_asset']
|
|
||||||
|
|
||||||
episode_number = int_or_none(video_data.get('episode_number'))
|
|
||||||
series = None
|
|
||||||
season = None
|
|
||||||
season_id = None
|
|
||||||
season_number = None
|
|
||||||
for metadata in video_data.get('__children', {}).get('Item', {}).values():
|
|
||||||
if metadata.get('type') == 'show':
|
|
||||||
series = metadata.get('title')
|
|
||||||
elif metadata.get('type') == 'season':
|
|
||||||
season = metadata.get('title')
|
|
||||||
season_id = metadata.get('id')
|
|
||||||
season_number = int_or_none(metadata.get('season_number'))
|
|
||||||
|
|
||||||
title = episode = video_data.get('title') or series
|
|
||||||
if series and series != title:
|
|
||||||
title = '%s - %s' % (series, title)
|
|
||||||
|
|
||||||
thumbnails = []
|
|
||||||
formats = []
|
|
||||||
subtitles = {}
|
|
||||||
for asset in video_data.get('assets'):
|
|
||||||
if asset.get('id') == main_video_asset:
|
|
||||||
for source in asset.get('sources', []):
|
|
||||||
source_location = source.get('location')
|
|
||||||
if not source_location:
|
|
||||||
continue
|
|
||||||
source_type = source.get('type')
|
|
||||||
if source_type == 'hls':
|
|
||||||
m3u8_formats = self._extract_m3u8_formats(
|
|
||||||
source_location, video_id, 'mp4',
|
|
||||||
'm3u8_native', m3u8_id='hls', fatal=False)
|
|
||||||
for f in m3u8_formats:
|
|
||||||
mobj = re.search(r'/hls-(\d+)-(\d+)K', f['url'])
|
|
||||||
if mobj:
|
|
||||||
height, tbr = mobj.groups()
|
|
||||||
height = int_or_none(height)
|
|
||||||
f.update({
|
|
||||||
'height': f.get('height') or height,
|
|
||||||
'width': f.get('width') or int_or_none(height / 9.0 * 16.0 if height else None),
|
|
||||||
'tbr': f.get('tbr') or int_or_none(tbr),
|
|
||||||
})
|
|
||||||
formats.extend(m3u8_formats)
|
|
||||||
elif source_type == 'dash':
|
|
||||||
formats.extend(self._extract_mpd_formats(
|
|
||||||
source_location, video_id, mpd_id='dash', fatal=False))
|
|
||||||
else:
|
|
||||||
formats.append({
|
|
||||||
'format_id': source.get('name'),
|
|
||||||
'url': source_location,
|
|
||||||
'width': int_or_none(source.get('width')),
|
|
||||||
'height': int_or_none(source.get('height')),
|
|
||||||
'tbr': int_or_none(source.get('bitrate')),
|
|
||||||
})
|
|
||||||
|
|
||||||
for caption in asset.get('caption_metadata', []):
|
|
||||||
caption_url = caption.get('source_url')
|
|
||||||
if not caption_url:
|
|
||||||
continue
|
|
||||||
subtitles.setdefault(caption.get('language', 'en'), []).append({
|
|
||||||
'url': caption_url,
|
|
||||||
'ext': determine_ext(caption_url, 'vtt'),
|
|
||||||
})
|
|
||||||
elif asset.get('type') == 'image':
|
|
||||||
asset_location = asset.get('location')
|
|
||||||
if not asset_location:
|
|
||||||
continue
|
|
||||||
thumbnails.append({
|
|
||||||
'url': asset_location,
|
|
||||||
'width': int_or_none(asset.get('width')),
|
|
||||||
'height': int_or_none(asset.get('height')),
|
|
||||||
})
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': title,
|
|
||||||
'formats': formats,
|
|
||||||
'thumbnails': thumbnails,
|
|
||||||
'description': video_data.get('short_description'),
|
|
||||||
'like_count': int_or_none(video_data.get('like_count')),
|
|
||||||
'timestamp': parse_iso8601(video_data.get('released_at')),
|
|
||||||
'series': series,
|
|
||||||
'episode': episode,
|
|
||||||
'season': season,
|
|
||||||
'season_id': season_id,
|
|
||||||
'season_number': season_number,
|
|
||||||
'episode_number': episode_number,
|
|
||||||
'subtitles': subtitles,
|
|
||||||
'age_limit': parse_age_limit(video_data.get('rating')),
|
|
||||||
}
|
|
@ -220,19 +220,27 @@ class GoogleDriveIE(InfoExtractor):
|
|||||||
'id': video_id,
|
'id': video_id,
|
||||||
'export': 'download',
|
'export': 'download',
|
||||||
})
|
})
|
||||||
urlh = self._request_webpage(
|
|
||||||
source_url, video_id, note='Requesting source file',
|
def request_source_file(source_url, kind):
|
||||||
errnote='Unable to request source file', fatal=False)
|
return self._request_webpage(
|
||||||
|
source_url, video_id, note='Requesting %s file' % kind,
|
||||||
|
errnote='Unable to request %s file' % kind, fatal=False)
|
||||||
|
urlh = request_source_file(source_url, 'source')
|
||||||
if urlh:
|
if urlh:
|
||||||
def add_source_format(src_url):
|
def add_source_format(urlh):
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': src_url,
|
# Use redirect URLs as download URLs in order to calculate
|
||||||
|
# correct cookies in _calc_cookies.
|
||||||
|
# Using original URLs may result in redirect loop due to
|
||||||
|
# google.com's cookies mistakenly used for googleusercontent.com
|
||||||
|
# redirect URLs (see #23919).
|
||||||
|
'url': urlh.geturl(),
|
||||||
'ext': determine_ext(title, 'mp4').lower(),
|
'ext': determine_ext(title, 'mp4').lower(),
|
||||||
'format_id': 'source',
|
'format_id': 'source',
|
||||||
'quality': 1,
|
'quality': 1,
|
||||||
})
|
})
|
||||||
if urlh.headers.get('Content-Disposition'):
|
if urlh.headers.get('Content-Disposition'):
|
||||||
add_source_format(source_url)
|
add_source_format(urlh)
|
||||||
else:
|
else:
|
||||||
confirmation_webpage = self._webpage_read_content(
|
confirmation_webpage = self._webpage_read_content(
|
||||||
urlh, url, video_id, note='Downloading confirmation page',
|
urlh, url, video_id, note='Downloading confirmation page',
|
||||||
@ -242,9 +250,12 @@ class GoogleDriveIE(InfoExtractor):
|
|||||||
r'confirm=([^&"\']+)', confirmation_webpage,
|
r'confirm=([^&"\']+)', confirmation_webpage,
|
||||||
'confirmation code', fatal=False)
|
'confirmation code', fatal=False)
|
||||||
if confirm:
|
if confirm:
|
||||||
add_source_format(update_url_query(source_url, {
|
confirmed_source_url = update_url_query(source_url, {
|
||||||
'confirm': confirm,
|
'confirm': confirm,
|
||||||
}))
|
})
|
||||||
|
urlh = request_source_file(confirmed_source_url, 'confirmed source')
|
||||||
|
if urlh and urlh.headers.get('Content-Disposition'):
|
||||||
|
add_source_format(urlh)
|
||||||
|
|
||||||
if not formats:
|
if not formats:
|
||||||
reason = self._search_regex(
|
reason = self._search_regex(
|
||||||
|
@ -1,33 +0,0 @@
|
|||||||
# coding: utf-8
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
|
|
||||||
|
|
||||||
class HarkIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?hark\.com/clips/(?P<id>.+?)-.+'
|
|
||||||
_TEST = {
|
|
||||||
'url': 'http://www.hark.com/clips/mmbzyhkgny-obama-beyond-the-afghan-theater-we-only-target-al-qaeda-on-may-23-2013',
|
|
||||||
'md5': '6783a58491b47b92c7c1af5a77d4cbee',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'mmbzyhkgny',
|
|
||||||
'ext': 'mp3',
|
|
||||||
'title': 'Obama: \'Beyond The Afghan Theater, We Only Target Al Qaeda\' on May 23, 2013',
|
|
||||||
'description': 'President Barack Obama addressed the nation live on May 23, 2013 in a speech aimed at addressing counter-terrorism policies including the use of drone strikes, detainees at Guantanamo Bay prison facility, and American citizens who are terrorists.',
|
|
||||||
'duration': 11,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url)
|
|
||||||
data = self._download_json(
|
|
||||||
'http://www.hark.com/clips/%s.json' % video_id, video_id)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'url': data['url'],
|
|
||||||
'title': data['name'],
|
|
||||||
'description': data.get('description'),
|
|
||||||
'thumbnail': data.get('image_original'),
|
|
||||||
'duration': data.get('duration'),
|
|
||||||
}
|
|
@ -105,8 +105,7 @@ class HeiseIE(InfoExtractor):
|
|||||||
webpage, default=None) or self._html_search_meta(
|
webpage, default=None) or self._html_search_meta(
|
||||||
'description', webpage)
|
'description', webpage)
|
||||||
|
|
||||||
kaltura_url = KalturaIE._extract_url(webpage)
|
def _make_kaltura_result(kaltura_url):
|
||||||
if kaltura_url:
|
|
||||||
return {
|
return {
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
'url': smuggle_url(kaltura_url, {'source_url': url}),
|
'url': smuggle_url(kaltura_url, {'source_url': url}),
|
||||||
@ -115,6 +114,16 @@ class HeiseIE(InfoExtractor):
|
|||||||
'description': description,
|
'description': description,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
kaltura_url = KalturaIE._extract_url(webpage)
|
||||||
|
if kaltura_url:
|
||||||
|
return _make_kaltura_result(kaltura_url)
|
||||||
|
|
||||||
|
kaltura_id = self._search_regex(
|
||||||
|
r'entry-id=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'kaltura id',
|
||||||
|
default=None, group='id')
|
||||||
|
if kaltura_id:
|
||||||
|
return _make_kaltura_result('kaltura:2238431:%s' % kaltura_id)
|
||||||
|
|
||||||
yt_urls = YoutubeIE._extract_urls(webpage)
|
yt_urls = YoutubeIE._extract_urls(webpage)
|
||||||
if yt_urls:
|
if yt_urls:
|
||||||
return self.playlist_from_matches(
|
return self.playlist_from_matches(
|
||||||
|
@ -1,12 +1,11 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
js_to_json,
|
int_or_none,
|
||||||
|
merge_dicts,
|
||||||
remove_end,
|
remove_end,
|
||||||
determine_ext,
|
unified_timestamp,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -14,15 +13,21 @@ class HellPornoIE(InfoExtractor):
|
|||||||
_VALID_URL = r'https?://(?:www\.)?hellporno\.(?:com/videos|net/v)/(?P<id>[^/]+)'
|
_VALID_URL = r'https?://(?:www\.)?hellporno\.(?:com/videos|net/v)/(?P<id>[^/]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://hellporno.com/videos/dixie-is-posing-with-naked-ass-very-erotic/',
|
'url': 'http://hellporno.com/videos/dixie-is-posing-with-naked-ass-very-erotic/',
|
||||||
'md5': '1fee339c610d2049699ef2aa699439f1',
|
'md5': 'f0a46ebc0bed0c72ae8fe4629f7de5f3',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '149116',
|
'id': '149116',
|
||||||
'display_id': 'dixie-is-posing-with-naked-ass-very-erotic',
|
'display_id': 'dixie-is-posing-with-naked-ass-very-erotic',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Dixie is posing with naked ass very erotic',
|
'title': 'Dixie is posing with naked ass very erotic',
|
||||||
|
'description': 'md5:9a72922749354edb1c4b6e540ad3d215',
|
||||||
|
'categories': list,
|
||||||
'thumbnail': r're:https?://.*\.jpg$',
|
'thumbnail': r're:https?://.*\.jpg$',
|
||||||
|
'duration': 240,
|
||||||
|
'timestamp': 1398762720,
|
||||||
|
'upload_date': '20140429',
|
||||||
|
'view_count': int,
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
}
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://hellporno.net/v/186271/',
|
'url': 'http://hellporno.net/v/186271/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -36,40 +41,36 @@ class HellPornoIE(InfoExtractor):
|
|||||||
title = remove_end(self._html_search_regex(
|
title = remove_end(self._html_search_regex(
|
||||||
r'<title>([^<]+)</title>', webpage, 'title'), ' - Hell Porno')
|
r'<title>([^<]+)</title>', webpage, 'title'), ' - Hell Porno')
|
||||||
|
|
||||||
flashvars = self._parse_json(self._search_regex(
|
info = self._parse_html5_media_entries(url, webpage, display_id)[0]
|
||||||
r'var\s+flashvars\s*=\s*({.+?});', webpage, 'flashvars'),
|
self._sort_formats(info['formats'])
|
||||||
display_id, transform_source=js_to_json)
|
|
||||||
|
|
||||||
video_id = flashvars.get('video_id')
|
video_id = self._search_regex(
|
||||||
thumbnail = flashvars.get('preview_url')
|
(r'chs_object\s*=\s*["\'](\d+)',
|
||||||
ext = determine_ext(flashvars.get('postfix'), 'mp4')
|
r'params\[["\']video_id["\']\]\s*=\s*(\d+)'), webpage, 'video id',
|
||||||
|
default=display_id)
|
||||||
|
description = self._search_regex(
|
||||||
|
r'class=["\']desc_video_view_v2[^>]+>([^<]+)', webpage,
|
||||||
|
'description', fatal=False)
|
||||||
|
categories = [
|
||||||
|
c.strip()
|
||||||
|
for c in self._html_search_meta(
|
||||||
|
'keywords', webpage, 'categories', default='').split(',')
|
||||||
|
if c.strip()]
|
||||||
|
duration = int_or_none(self._og_search_property(
|
||||||
|
'video:duration', webpage, fatal=False))
|
||||||
|
timestamp = unified_timestamp(self._og_search_property(
|
||||||
|
'video:release_date', webpage, fatal=False))
|
||||||
|
view_count = int_or_none(self._search_regex(
|
||||||
|
r'>Views\s+(\d+)', webpage, 'view count', fatal=False))
|
||||||
|
|
||||||
formats = []
|
return merge_dicts(info, {
|
||||||
for video_url_key in ['video_url', 'video_alt_url']:
|
|
||||||
video_url = flashvars.get(video_url_key)
|
|
||||||
if not video_url:
|
|
||||||
continue
|
|
||||||
video_text = flashvars.get('%s_text' % video_url_key)
|
|
||||||
fmt = {
|
|
||||||
'url': video_url,
|
|
||||||
'ext': ext,
|
|
||||||
'format_id': video_text,
|
|
||||||
}
|
|
||||||
m = re.search(r'^(?P<height>\d+)[pP]', video_text)
|
|
||||||
if m:
|
|
||||||
fmt['height'] = int(m.group('height'))
|
|
||||||
formats.append(fmt)
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
categories = self._html_search_meta(
|
|
||||||
'keywords', webpage, 'categories', default='').split(',')
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'thumbnail': thumbnail,
|
'description': description,
|
||||||
'categories': categories,
|
'categories': categories,
|
||||||
|
'duration': duration,
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'view_count': view_count,
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
'formats': formats,
|
})
|
||||||
}
|
|
||||||
|
@ -3,6 +3,7 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
import hashlib
|
import hashlib
|
||||||
import hmac
|
import hmac
|
||||||
|
import re
|
||||||
import time
|
import time
|
||||||
import uuid
|
import uuid
|
||||||
|
|
||||||
@ -117,6 +118,7 @@ class HotStarIE(HotStarBaseIE):
|
|||||||
if video_data.get('drmProtected'):
|
if video_data.get('drmProtected'):
|
||||||
raise ExtractorError('This video is DRM protected.', expected=True)
|
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||||
|
|
||||||
|
headers = {'Referer': url}
|
||||||
formats = []
|
formats = []
|
||||||
geo_restricted = False
|
geo_restricted = False
|
||||||
playback_sets = self._call_api_v2('h/v2/play', video_id)['playBackSets']
|
playback_sets = self._call_api_v2('h/v2/play', video_id)['playBackSets']
|
||||||
@ -126,6 +128,8 @@ class HotStarIE(HotStarBaseIE):
|
|||||||
format_url = url_or_none(playback_set.get('playbackUrl'))
|
format_url = url_or_none(playback_set.get('playbackUrl'))
|
||||||
if not format_url:
|
if not format_url:
|
||||||
continue
|
continue
|
||||||
|
format_url = re.sub(
|
||||||
|
r'(?<=//staragvod)(\d)', r'web\1', format_url)
|
||||||
tags = str_or_none(playback_set.get('tagsCombination')) or ''
|
tags = str_or_none(playback_set.get('tagsCombination')) or ''
|
||||||
if tags and 'encryption:plain' not in tags:
|
if tags and 'encryption:plain' not in tags:
|
||||||
continue
|
continue
|
||||||
@ -133,10 +137,12 @@ class HotStarIE(HotStarBaseIE):
|
|||||||
try:
|
try:
|
||||||
if 'package:hls' in tags or ext == 'm3u8':
|
if 'package:hls' in tags or ext == 'm3u8':
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
format_url, video_id, 'mp4', m3u8_id='hls'))
|
format_url, video_id, 'mp4',
|
||||||
|
entry_protocol='m3u8_native',
|
||||||
|
m3u8_id='hls', headers=headers))
|
||||||
elif 'package:dash' in tags or ext == 'mpd':
|
elif 'package:dash' in tags or ext == 'mpd':
|
||||||
formats.extend(self._extract_mpd_formats(
|
formats.extend(self._extract_mpd_formats(
|
||||||
format_url, video_id, mpd_id='dash'))
|
format_url, video_id, mpd_id='dash', headers=headers))
|
||||||
elif ext == 'f4m':
|
elif ext == 'f4m':
|
||||||
# produce broken files
|
# produce broken files
|
||||||
pass
|
pass
|
||||||
@ -154,6 +160,9 @@ class HotStarIE(HotStarBaseIE):
|
|||||||
self.raise_geo_restricted(countries=['IN'])
|
self.raise_geo_restricted(countries=['IN'])
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
for f in formats:
|
||||||
|
f.setdefault('http_headers', {}).update(headers)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
|
@ -1,85 +0,0 @@
|
|||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..utils import (
|
|
||||||
int_or_none,
|
|
||||||
get_element_by_id,
|
|
||||||
remove_end,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class IconosquareIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?(?:iconosquare\.com|statigr\.am)/p/(?P<id>[^/]+)'
|
|
||||||
_TEST = {
|
|
||||||
'url': 'http://statigr.am/p/522207370455279102_24101272',
|
|
||||||
'md5': '6eb93b882a3ded7c378ee1d6884b1814',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '522207370455279102_24101272',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Instagram photo by @aguynamedpatrick (Patrick Janelle)',
|
|
||||||
'description': 'md5:644406a9ec27457ed7aa7a9ebcd4ce3d',
|
|
||||||
'timestamp': 1376471991,
|
|
||||||
'upload_date': '20130814',
|
|
||||||
'uploader': 'aguynamedpatrick',
|
|
||||||
'uploader_id': '24101272',
|
|
||||||
'comment_count': int,
|
|
||||||
'like_count': int,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url)
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
|
|
||||||
media = self._parse_json(
|
|
||||||
get_element_by_id('mediaJson', webpage),
|
|
||||||
video_id)
|
|
||||||
|
|
||||||
formats = [{
|
|
||||||
'url': f['url'],
|
|
||||||
'format_id': format_id,
|
|
||||||
'width': int_or_none(f.get('width')),
|
|
||||||
'height': int_or_none(f.get('height'))
|
|
||||||
} for format_id, f in media['videos'].items()]
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
title = remove_end(self._og_search_title(webpage), ' - via Iconosquare')
|
|
||||||
|
|
||||||
timestamp = int_or_none(media.get('created_time') or media.get('caption', {}).get('created_time'))
|
|
||||||
description = media.get('caption', {}).get('text')
|
|
||||||
|
|
||||||
uploader = media.get('user', {}).get('username')
|
|
||||||
uploader_id = media.get('user', {}).get('id')
|
|
||||||
|
|
||||||
comment_count = int_or_none(media.get('comments', {}).get('count'))
|
|
||||||
like_count = int_or_none(media.get('likes', {}).get('count'))
|
|
||||||
|
|
||||||
thumbnails = [{
|
|
||||||
'url': t['url'],
|
|
||||||
'id': thumbnail_id,
|
|
||||||
'width': int_or_none(t.get('width')),
|
|
||||||
'height': int_or_none(t.get('height'))
|
|
||||||
} for thumbnail_id, t in media.get('images', {}).items()]
|
|
||||||
|
|
||||||
comments = [{
|
|
||||||
'id': comment.get('id'),
|
|
||||||
'text': comment['text'],
|
|
||||||
'timestamp': int_or_none(comment.get('created_time')),
|
|
||||||
'author': comment.get('from', {}).get('full_name'),
|
|
||||||
'author_id': comment.get('from', {}).get('username'),
|
|
||||||
} for comment in media.get('comments', {}).get('data', []) if 'text' in comment]
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': title,
|
|
||||||
'description': description,
|
|
||||||
'thumbnails': thumbnails,
|
|
||||||
'timestamp': timestamp,
|
|
||||||
'uploader': uploader,
|
|
||||||
'uploader_id': uploader_id,
|
|
||||||
'comment_count': comment_count,
|
|
||||||
'like_count': like_count,
|
|
||||||
'formats': formats,
|
|
||||||
'comments': comments,
|
|
||||||
}
|
|
@ -1,5 +1,7 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import base64
|
||||||
|
import json
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
@ -8,6 +10,7 @@ from ..utils import (
|
|||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
qualities,
|
qualities,
|
||||||
|
try_get,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -15,15 +18,16 @@ from ..utils import (
|
|||||||
class ImdbIE(InfoExtractor):
|
class ImdbIE(InfoExtractor):
|
||||||
IE_NAME = 'imdb'
|
IE_NAME = 'imdb'
|
||||||
IE_DESC = 'Internet Movie Database trailers'
|
IE_DESC = 'Internet Movie Database trailers'
|
||||||
_VALID_URL = r'https?://(?:www|m)\.imdb\.com/(?:video|title|list).+?[/-]vi(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www|m)\.imdb\.com/(?:video|title|list).*?[/-]vi(?P<id>\d+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.imdb.com/video/imdb/vi2524815897',
|
'url': 'http://www.imdb.com/video/imdb/vi2524815897',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2524815897',
|
'id': '2524815897',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'No. 2 from Ice Age: Continental Drift (2012)',
|
'title': 'No. 2',
|
||||||
'description': 'md5:87bd0bdc61e351f21f20d2d7441cb4e7',
|
'description': 'md5:87bd0bdc61e351f21f20d2d7441cb4e7',
|
||||||
|
'duration': 152,
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.imdb.com/video/_/vi2524815897',
|
'url': 'http://www.imdb.com/video/_/vi2524815897',
|
||||||
@ -47,21 +51,23 @@ class ImdbIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(
|
|
||||||
'https://www.imdb.com/videoplayer/vi' + video_id, video_id)
|
data = self._download_json(
|
||||||
video_metadata = self._parse_json(self._search_regex(
|
'https://www.imdb.com/ve/data/VIDEO_PLAYBACK_DATA', video_id,
|
||||||
r'window\.IMDbReactInitialState\.push\(({.+?})\);', webpage,
|
query={
|
||||||
'video metadata'), video_id)['videos']['videoMetadata']['vi' + video_id]
|
'key': base64.b64encode(json.dumps({
|
||||||
title = self._html_search_meta(
|
'type': 'VIDEO_PLAYER',
|
||||||
['og:title', 'twitter:title'], webpage) or self._html_search_regex(
|
'subType': 'FORCE_LEGACY',
|
||||||
r'<title>(.+?)</title>', webpage, 'title', fatal=False) or video_metadata['title']
|
'id': 'vi%s' % video_id,
|
||||||
|
}).encode()).decode(),
|
||||||
|
})[0]
|
||||||
|
|
||||||
quality = qualities(('SD', '480p', '720p', '1080p'))
|
quality = qualities(('SD', '480p', '720p', '1080p'))
|
||||||
formats = []
|
formats = []
|
||||||
for encoding in video_metadata.get('encodings', []):
|
for encoding in data['videoLegacyEncodings']:
|
||||||
if not encoding or not isinstance(encoding, dict):
|
if not encoding or not isinstance(encoding, dict):
|
||||||
continue
|
continue
|
||||||
video_url = url_or_none(encoding.get('videoUrl'))
|
video_url = url_or_none(encoding.get('url'))
|
||||||
if not video_url:
|
if not video_url:
|
||||||
continue
|
continue
|
||||||
ext = mimetype2ext(encoding.get(
|
ext = mimetype2ext(encoding.get(
|
||||||
@ -69,7 +75,7 @@ class ImdbIE(InfoExtractor):
|
|||||||
if ext == 'm3u8':
|
if ext == 'm3u8':
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
m3u8_id='hls', fatal=False))
|
preference=1, m3u8_id='hls', fatal=False))
|
||||||
continue
|
continue
|
||||||
format_id = encoding.get('definition')
|
format_id = encoding.get('definition')
|
||||||
formats.append({
|
formats.append({
|
||||||
@ -80,13 +86,33 @@ class ImdbIE(InfoExtractor):
|
|||||||
})
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(
|
||||||
|
'https://www.imdb.com/video/vi' + video_id, video_id)
|
||||||
|
video_metadata = self._parse_json(self._search_regex(
|
||||||
|
r'args\.push\(\s*({.+?})\s*\)\s*;', webpage,
|
||||||
|
'video metadata'), video_id)
|
||||||
|
|
||||||
|
video_info = video_metadata.get('VIDEO_INFO')
|
||||||
|
if video_info and isinstance(video_info, dict):
|
||||||
|
info = try_get(
|
||||||
|
video_info, lambda x: x[list(video_info.keys())[0]][0], dict)
|
||||||
|
else:
|
||||||
|
info = {}
|
||||||
|
|
||||||
|
title = self._html_search_meta(
|
||||||
|
['og:title', 'twitter:title'], webpage) or self._html_search_regex(
|
||||||
|
r'<title>(.+?)</title>', webpage, 'title',
|
||||||
|
default=None) or info['videoTitle']
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
|
'alt_title': info.get('videoSubTitle'),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'description': video_metadata.get('description'),
|
'description': info.get('videoDescription'),
|
||||||
'thumbnail': video_metadata.get('slate', {}).get('url'),
|
'thumbnail': url_or_none(try_get(
|
||||||
'duration': parse_duration(video_metadata.get('duration')),
|
video_metadata, lambda x: x['videoSlate']['source'])),
|
||||||
|
'duration': parse_duration(info.get('videoRuntime')),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
133
youtube_dl/extractor/imggaming.py
Normal file
133
youtube_dl/extractor/imggaming.py
Normal file
@ -0,0 +1,133 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_HTTPError
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
int_or_none,
|
||||||
|
str_or_none,
|
||||||
|
try_get,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ImgGamingBaseIE(InfoExtractor):
|
||||||
|
_API_BASE = 'https://dce-frontoffice.imggaming.com/api/v2/'
|
||||||
|
_API_KEY = '857a1e5d-e35e-4fdf-805b-a87b6f8364bf'
|
||||||
|
_HEADERS = None
|
||||||
|
_MANIFEST_HEADERS = {'Accept-Encoding': 'identity'}
|
||||||
|
_REALM = None
|
||||||
|
_VALID_URL_TEMPL = r'https?://(?P<domain>%s)/(?P<type>live|playlist|video)/(?P<id>\d+)(?:\?.*?\bplaylistId=(?P<playlist_id>\d+))?'
|
||||||
|
|
||||||
|
def _real_initialize(self):
|
||||||
|
self._HEADERS = {
|
||||||
|
'Realm': 'dce.' + self._REALM,
|
||||||
|
'x-api-key': self._API_KEY,
|
||||||
|
}
|
||||||
|
|
||||||
|
email, password = self._get_login_info()
|
||||||
|
if email is None:
|
||||||
|
self.raise_login_required()
|
||||||
|
|
||||||
|
p_headers = self._HEADERS.copy()
|
||||||
|
p_headers['Content-Type'] = 'application/json'
|
||||||
|
self._HEADERS['Authorization'] = 'Bearer ' + self._download_json(
|
||||||
|
self._API_BASE + 'login',
|
||||||
|
None, 'Logging in', data=json.dumps({
|
||||||
|
'id': email,
|
||||||
|
'secret': password,
|
||||||
|
}).encode(), headers=p_headers)['authorisationToken']
|
||||||
|
|
||||||
|
def _call_api(self, path, media_id):
|
||||||
|
return self._download_json(
|
||||||
|
self._API_BASE + path + media_id, media_id, headers=self._HEADERS)
|
||||||
|
|
||||||
|
def _extract_dve_api_url(self, media_id, media_type):
|
||||||
|
stream_path = 'stream'
|
||||||
|
if media_type == 'video':
|
||||||
|
stream_path += '/vod/'
|
||||||
|
else:
|
||||||
|
stream_path += '?eventId='
|
||||||
|
try:
|
||||||
|
return self._call_api(
|
||||||
|
stream_path, media_id)['playerUrlCallback']
|
||||||
|
except ExtractorError as e:
|
||||||
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||||
|
raise ExtractorError(
|
||||||
|
self._parse_json(e.cause.read().decode(), media_id)['messages'][0],
|
||||||
|
expected=True)
|
||||||
|
raise
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
domain, media_type, media_id, playlist_id = re.match(self._VALID_URL, url).groups()
|
||||||
|
|
||||||
|
if playlist_id:
|
||||||
|
if self._downloader.params.get('noplaylist'):
|
||||||
|
self.to_screen('Downloading just video %s because of --no-playlist' % media_id)
|
||||||
|
else:
|
||||||
|
self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % playlist_id)
|
||||||
|
media_type, media_id = 'playlist', playlist_id
|
||||||
|
|
||||||
|
if media_type == 'playlist':
|
||||||
|
playlist = self._call_api('vod/playlist/', media_id)
|
||||||
|
entries = []
|
||||||
|
for video in try_get(playlist, lambda x: x['videos']['vods']) or []:
|
||||||
|
video_id = str_or_none(video.get('id'))
|
||||||
|
if not video_id:
|
||||||
|
continue
|
||||||
|
entries.append(self.url_result(
|
||||||
|
'https://%s/video/%s' % (domain, video_id),
|
||||||
|
self.ie_key(), video_id))
|
||||||
|
return self.playlist_result(
|
||||||
|
entries, media_id, playlist.get('title'),
|
||||||
|
playlist.get('description'))
|
||||||
|
|
||||||
|
dve_api_url = self._extract_dve_api_url(media_id, media_type)
|
||||||
|
video_data = self._download_json(dve_api_url, media_id)
|
||||||
|
is_live = media_type == 'live'
|
||||||
|
if is_live:
|
||||||
|
title = self._live_title(self._call_api('event/', media_id)['title'])
|
||||||
|
else:
|
||||||
|
title = video_data['name']
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for proto in ('hls', 'dash'):
|
||||||
|
media_url = video_data.get(proto + 'Url') or try_get(video_data, lambda x: x[proto]['url'])
|
||||||
|
if not media_url:
|
||||||
|
continue
|
||||||
|
if proto == 'hls':
|
||||||
|
m3u8_formats = self._extract_m3u8_formats(
|
||||||
|
media_url, media_id, 'mp4', 'm3u8' if is_live else 'm3u8_native',
|
||||||
|
m3u8_id='hls', fatal=False, headers=self._MANIFEST_HEADERS)
|
||||||
|
for f in m3u8_formats:
|
||||||
|
f.setdefault('http_headers', {}).update(self._MANIFEST_HEADERS)
|
||||||
|
formats.append(f)
|
||||||
|
else:
|
||||||
|
formats.extend(self._extract_mpd_formats(
|
||||||
|
media_url, media_id, mpd_id='dash', fatal=False,
|
||||||
|
headers=self._MANIFEST_HEADERS))
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
for subtitle in video_data.get('subtitles', []):
|
||||||
|
subtitle_url = subtitle.get('url')
|
||||||
|
if not subtitle_url:
|
||||||
|
continue
|
||||||
|
subtitles.setdefault(subtitle.get('lang', 'en_US'), []).append({
|
||||||
|
'url': subtitle_url,
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': media_id,
|
||||||
|
'title': title,
|
||||||
|
'formats': formats,
|
||||||
|
'thumbnail': video_data.get('thumbnailUrl'),
|
||||||
|
'description': video_data.get('description'),
|
||||||
|
'duration': int_or_none(video_data.get('duration')),
|
||||||
|
'tags': video_data.get('tags'),
|
||||||
|
'is_live': is_live,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user