mirror of
https://codeberg.org/polarisfm/youtube-dl
synced 2025-01-24 13:37:54 +01:00
Merge branch 'master' of https://github.com/rg3/youtube-dl
This commit is contained in:
commit
6ef943d180
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@ -6,8 +6,8 @@
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.11.07*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.12.17*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.11.07**
|
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.12.17**
|
||||||
|
|
||||||
### Before submitting an *issue* make sure you have:
|
### Before submitting an *issue* make sure you have:
|
||||||
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||||
@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
|
|||||||
[debug] User config: []
|
[debug] User config: []
|
||||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||||
[debug] youtube-dl version 2018.11.07
|
[debug] youtube-dl version 2018.12.17
|
||||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||||
[debug] Proxy map: {}
|
[debug] Proxy map: {}
|
||||||
|
12
.travis.yml
12
.travis.yml
@ -15,6 +15,18 @@ env:
|
|||||||
- YTDL_TEST_SET=download
|
- YTDL_TEST_SET=download
|
||||||
matrix:
|
matrix:
|
||||||
include:
|
include:
|
||||||
|
- python: 3.7
|
||||||
|
dist: xenial
|
||||||
|
env: YTDL_TEST_SET=core
|
||||||
|
- python: 3.7
|
||||||
|
dist: xenial
|
||||||
|
env: YTDL_TEST_SET=download
|
||||||
|
- python: 3.8-dev
|
||||||
|
dist: xenial
|
||||||
|
env: YTDL_TEST_SET=core
|
||||||
|
- python: 3.8-dev
|
||||||
|
dist: xenial
|
||||||
|
env: YTDL_TEST_SET=download
|
||||||
- env: JYTHON=true; YTDL_TEST_SET=core
|
- env: JYTHON=true; YTDL_TEST_SET=core
|
||||||
- env: JYTHON=true; YTDL_TEST_SET=download
|
- env: JYTHON=true; YTDL_TEST_SET=download
|
||||||
fast_finish: true
|
fast_finish: true
|
||||||
|
@ -152,7 +152,7 @@ After you have ensured this site is distributing its content legally, you can fo
|
|||||||
```
|
```
|
||||||
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
|
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
|
||||||
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in.
|
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in.
|
||||||
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want.
|
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want.
|
||||||
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](https://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
|
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](https://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
|
||||||
9. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this:
|
9. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this:
|
||||||
|
|
||||||
@ -173,7 +173,7 @@ Extractors are very fragile by nature since they depend on the layout of the sou
|
|||||||
|
|
||||||
### Mandatory and optional metafields
|
### Mandatory and optional metafields
|
||||||
|
|
||||||
For extraction to work youtube-dl relies on metadata your extractor extracts and provides to youtube-dl expressed by an [information dictionary](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L75-L257) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by youtube-dl:
|
For extraction to work youtube-dl relies on metadata your extractor extracts and provides to youtube-dl expressed by an [information dictionary](https://github.com/rg3/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by youtube-dl:
|
||||||
|
|
||||||
- `id` (media identifier)
|
- `id` (media identifier)
|
||||||
- `title` (media title)
|
- `title` (media title)
|
||||||
@ -181,7 +181,7 @@ For extraction to work youtube-dl relies on metadata your extractor extracts and
|
|||||||
|
|
||||||
In fact only the last option is technically mandatory (i.e. if you can't figure out the download location of the media the extraction does not make any sense). But by convention youtube-dl also treats `id` and `title` as mandatory. Thus the aforementioned metafields are the critical data that the extraction does not make any sense without and if any of them fail to be extracted then the extractor is considered completely broken.
|
In fact only the last option is technically mandatory (i.e. if you can't figure out the download location of the media the extraction does not make any sense). But by convention youtube-dl also treats `id` and `title` as mandatory. Thus the aforementioned metafields are the critical data that the extraction does not make any sense without and if any of them fail to be extracted then the extractor is considered completely broken.
|
||||||
|
|
||||||
[Any field](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L149-L257) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerant** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields.
|
[Any field](https://github.com/rg3/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L188-L303) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerant** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields.
|
||||||
|
|
||||||
#### Example
|
#### Example
|
||||||
|
|
||||||
|
96
ChangeLog
96
ChangeLog
@ -1,3 +1,99 @@
|
|||||||
|
version 2018.12.17
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [ard:beta] Improve geo restricted videos extraction
|
||||||
|
* [ard:beta] Fix subtitles extraction
|
||||||
|
* [ard:beta] Improve extraction robustness
|
||||||
|
* [ard:beta] Relax URL regular expression (#18441)
|
||||||
|
* [acast] Add support for embed.acast.com and play.acast.com (#18483)
|
||||||
|
* [iprima] Relax URL regular expression (#18515, #18540)
|
||||||
|
* [vrv] Fix initial state extraction (#18553)
|
||||||
|
* [youtube] Fix mark watched (#18546)
|
||||||
|
+ [safari] Add support for learning.oreilly.com (#18510)
|
||||||
|
* [youtube] Fix multifeed extraction (#18531)
|
||||||
|
* [lecturio] Improve subtitles extraction (#18488)
|
||||||
|
* [uol] Fix format URL extraction (#18480)
|
||||||
|
+ [ard:mediathek] Add support for classic.ardmediathek.de (#18473)
|
||||||
|
|
||||||
|
|
||||||
|
version 2018.12.09
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [YoutubeDL] Keep session cookies in cookie file between runs
|
||||||
|
* [YoutubeDL] Recognize session cookies with expired set to 0 (#12929)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [teachable] Add support for teachable platform sites (#5451, #18150, #18272)
|
||||||
|
+ [aenetworks] Add support for historyvault.com (#18460)
|
||||||
|
* [imgur] Improve gallery and album detection and extraction (#9133, #16577,
|
||||||
|
#17223, #18404)
|
||||||
|
* [iprima] Relax URL regular expression (#18453)
|
||||||
|
* [hotstar] Fix video data extraction (#18386)
|
||||||
|
* [ard:mediathek] Fix title and description extraction (#18349, #18371)
|
||||||
|
* [xvideos] Switch to HTTPS (#18422, #18427)
|
||||||
|
+ [lecturio] Add support for lecturio.com (#18405)
|
||||||
|
+ [nrktv:series] Add support for extra materials
|
||||||
|
* [nrktv:season,series] Fix extraction (#17159, #17258)
|
||||||
|
* [nrktv] Relax URL regular expression (#18304, #18387)
|
||||||
|
* [yourporn] Fix extraction (#18424, #18425)
|
||||||
|
* [tbs] Fix info extraction (#18403)
|
||||||
|
+ [gamespot] Add support for review URLs
|
||||||
|
|
||||||
|
|
||||||
|
version 2018.12.03
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [utils] Fix random_birthday to generate existing dates only (#18284)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [tiktok] Add support for tiktok.com (#18108, #18135)
|
||||||
|
* [pornhub] Use actual URL host for requests (#18359)
|
||||||
|
* [lynda] Fix authentication (#18158, #18217)
|
||||||
|
* [gfycat] Update API endpoint (#18333, #18343)
|
||||||
|
+ [hotstar] Add support for alternative app state layout (#18320)
|
||||||
|
* [azmedien] Fix extraction (#18334, #18336)
|
||||||
|
+ [vimeo] Add support for VHX (Vimeo OTT) (#14835)
|
||||||
|
* [joj] Fix extraction (#18280, #18281)
|
||||||
|
+ [wistia] Add support for fast.wistia.com (#18287)
|
||||||
|
|
||||||
|
|
||||||
|
version 2018.11.23
|
||||||
|
|
||||||
|
Core
|
||||||
|
+ [setup.py] Add more relevant classifiers
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [mixcloud] Fallback to hardcoded decryption key (#18016)
|
||||||
|
* [nbc:news] Fix article extraction (#16194)
|
||||||
|
* [foxsports] Fix extraction (#17543)
|
||||||
|
* [loc] Relax regular expression and improve formats extraction
|
||||||
|
+ [ciscolive] Add support for ciscolive.cisco.com (#17984)
|
||||||
|
* [nzz] Relax kaltura regex (#18228)
|
||||||
|
* [sixplay] Fix formats extraction
|
||||||
|
* [bitchute] Improve title extraction
|
||||||
|
* [kaltura] Limit requested MediaEntry fields
|
||||||
|
+ [americastestkitchen] Add support for zype embeds (#18225)
|
||||||
|
+ [pornhub] Add pornhub.net alias
|
||||||
|
* [nova:embed] Fix extraction (#18222)
|
||||||
|
|
||||||
|
|
||||||
|
version 2018.11.18
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [wwe] Extract subtitles
|
||||||
|
+ [wwe] Add support for playlistst (#14781)
|
||||||
|
+ [wwe] Add support for wwe.com (#14781, #17450)
|
||||||
|
* [vk] Detect geo restriction (#17767)
|
||||||
|
* [openload] Use original host during extraction (#18211)
|
||||||
|
* [atvat] Fix extraction (#18041)
|
||||||
|
+ [rte] Add support for new API endpoint (#18206)
|
||||||
|
* [tnaflixnetwork:embed] Fix extraction (#18205)
|
||||||
|
* [picarto] Use API and add token support (#16518)
|
||||||
|
+ [zype] Add support for player.zype.com (#18143)
|
||||||
|
* [vivo] Fix extraction (#18139)
|
||||||
|
* [ruutu] Update API endpoint (#18138)
|
||||||
|
|
||||||
|
|
||||||
version 2018.11.07
|
version 2018.11.07
|
||||||
|
|
||||||
Extractors
|
Extractors
|
||||||
|
@ -1024,7 +1024,7 @@ After you have ensured this site is distributing its content legally, you can fo
|
|||||||
```
|
```
|
||||||
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
|
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
|
||||||
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in.
|
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in.
|
||||||
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want.
|
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want.
|
||||||
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](https://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
|
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](https://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
|
||||||
9. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this:
|
9. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this:
|
||||||
|
|
||||||
@ -1045,7 +1045,7 @@ Extractors are very fragile by nature since they depend on the layout of the sou
|
|||||||
|
|
||||||
### Mandatory and optional metafields
|
### Mandatory and optional metafields
|
||||||
|
|
||||||
For extraction to work youtube-dl relies on metadata your extractor extracts and provides to youtube-dl expressed by an [information dictionary](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L75-L257) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by youtube-dl:
|
For extraction to work youtube-dl relies on metadata your extractor extracts and provides to youtube-dl expressed by an [information dictionary](https://github.com/rg3/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by youtube-dl:
|
||||||
|
|
||||||
- `id` (media identifier)
|
- `id` (media identifier)
|
||||||
- `title` (media title)
|
- `title` (media title)
|
||||||
@ -1053,7 +1053,7 @@ For extraction to work youtube-dl relies on metadata your extractor extracts and
|
|||||||
|
|
||||||
In fact only the last option is technically mandatory (i.e. if you can't figure out the download location of the media the extraction does not make any sense). But by convention youtube-dl also treats `id` and `title` as mandatory. Thus the aforementioned metafields are the critical data that the extraction does not make any sense without and if any of them fail to be extracted then the extractor is considered completely broken.
|
In fact only the last option is technically mandatory (i.e. if you can't figure out the download location of the media the extraction does not make any sense). But by convention youtube-dl also treats `id` and `title` as mandatory. Thus the aforementioned metafields are the critical data that the extraction does not make any sense without and if any of them fail to be extracted then the extractor is considered completely broken.
|
||||||
|
|
||||||
[Any field](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L149-L257) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerant** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields.
|
[Any field](https://github.com/rg3/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L188-L303) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerant** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields.
|
||||||
|
|
||||||
#### Example
|
#### Example
|
||||||
|
|
||||||
|
@ -33,7 +33,7 @@
|
|||||||
- **AdobeTVShow**
|
- **AdobeTVShow**
|
||||||
- **AdobeTVVideo**
|
- **AdobeTVVideo**
|
||||||
- **AdultSwim**
|
- **AdultSwim**
|
||||||
- **aenetworks**: A+E Networks: A&E, Lifetime, History.com, FYI Network
|
- **aenetworks**: A+E Networks: A&E, Lifetime, History.com, FYI Network and History Vault
|
||||||
- **afreecatv**: afreecatv.com
|
- **afreecatv**: afreecatv.com
|
||||||
- **AirMozilla**
|
- **AirMozilla**
|
||||||
- **AliExpressLive**
|
- **AliExpressLive**
|
||||||
@ -163,6 +163,8 @@
|
|||||||
- **chirbit**
|
- **chirbit**
|
||||||
- **chirbit:profile**
|
- **chirbit:profile**
|
||||||
- **Cinchcast**
|
- **Cinchcast**
|
||||||
|
- **CiscoLiveSearch**
|
||||||
|
- **CiscoLiveSession**
|
||||||
- **CJSW**
|
- **CJSW**
|
||||||
- **cliphunter**
|
- **cliphunter**
|
||||||
- **Clippit**
|
- **Clippit**
|
||||||
@ -374,7 +376,8 @@
|
|||||||
- **imdb**: Internet Movie Database trailers
|
- **imdb**: Internet Movie Database trailers
|
||||||
- **imdb:list**: Internet Movie Database lists
|
- **imdb:list**: Internet Movie Database lists
|
||||||
- **Imgur**
|
- **Imgur**
|
||||||
- **ImgurAlbum**
|
- **imgur:album**
|
||||||
|
- **imgur:gallery**
|
||||||
- **Ina**
|
- **Ina**
|
||||||
- **Inc**
|
- **Inc**
|
||||||
- **IndavideoEmbed**
|
- **IndavideoEmbed**
|
||||||
@ -433,6 +436,8 @@
|
|||||||
- **Le**: 乐视网
|
- **Le**: 乐视网
|
||||||
- **Learnr**
|
- **Learnr**
|
||||||
- **Lecture2Go**
|
- **Lecture2Go**
|
||||||
|
- **Lecturio**
|
||||||
|
- **LecturioCourse**
|
||||||
- **LEGO**
|
- **LEGO**
|
||||||
- **Lemonde**
|
- **Lemonde**
|
||||||
- **Lenta**
|
- **Lenta**
|
||||||
@ -851,6 +856,8 @@
|
|||||||
- **TastyTrade**
|
- **TastyTrade**
|
||||||
- **TBS**
|
- **TBS**
|
||||||
- **TDSLifeway**
|
- **TDSLifeway**
|
||||||
|
- **Teachable**
|
||||||
|
- **TeachableCourse**
|
||||||
- **teachertube**: teachertube.com videos
|
- **teachertube**: teachertube.com videos
|
||||||
- **teachertube:user:collection**: teachertube.com user and collection videos
|
- **teachertube:user:collection**: teachertube.com user and collection videos
|
||||||
- **TeachingChannel**
|
- **TeachingChannel**
|
||||||
@ -883,6 +890,8 @@
|
|||||||
- **ThisAmericanLife**
|
- **ThisAmericanLife**
|
||||||
- **ThisAV**
|
- **ThisAV**
|
||||||
- **ThisOldHouse**
|
- **ThisOldHouse**
|
||||||
|
- **TikTok**
|
||||||
|
- **TikTokUser**
|
||||||
- **tinypic**: tinypic.com videos
|
- **tinypic**: tinypic.com videos
|
||||||
- **TMZ**
|
- **TMZ**
|
||||||
- **TMZArticle**
|
- **TMZArticle**
|
||||||
@ -957,8 +966,6 @@
|
|||||||
- **uol.com.br**
|
- **uol.com.br**
|
||||||
- **uplynk**
|
- **uplynk**
|
||||||
- **uplynk:preplay**
|
- **uplynk:preplay**
|
||||||
- **Upskill**
|
|
||||||
- **UpskillCourse**
|
|
||||||
- **Urort**: NRK P3 Urørt
|
- **Urort**: NRK P3 Urørt
|
||||||
- **URPlay**
|
- **URPlay**
|
||||||
- **USANetwork**
|
- **USANetwork**
|
||||||
@ -977,6 +984,7 @@
|
|||||||
- **VevoPlaylist**
|
- **VevoPlaylist**
|
||||||
- **VGTV**: VGTV, BTTV, FTV, Aftenposten and Aftonbladet
|
- **VGTV**: VGTV, BTTV, FTV, Aftenposten and Aftonbladet
|
||||||
- **vh1.com**
|
- **vh1.com**
|
||||||
|
- **vhx:embed**
|
||||||
- **Viafree**
|
- **Viafree**
|
||||||
- **vice**
|
- **vice**
|
||||||
- **vice:article**
|
- **vice:article**
|
||||||
@ -1080,6 +1088,7 @@
|
|||||||
- **wrzuta.pl:playlist**
|
- **wrzuta.pl:playlist**
|
||||||
- **WSJ**: Wall Street Journal
|
- **WSJ**: Wall Street Journal
|
||||||
- **WSJArticle**
|
- **WSJArticle**
|
||||||
|
- **WWE**
|
||||||
- **XBef**
|
- **XBef**
|
||||||
- **XboxClips**
|
- **XboxClips**
|
||||||
- **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE, Vid ABC, VidBom, vidlo, RapidVideo.TV, FastVideo.me
|
- **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE, Vid ABC, VidBom, vidlo, RapidVideo.TV, FastVideo.me
|
||||||
@ -1139,3 +1148,4 @@
|
|||||||
- **ZDF**
|
- **ZDF**
|
||||||
- **ZDFChannel**
|
- **ZDFChannel**
|
||||||
- **zingmp3**: mp3.zing.vn
|
- **zingmp3**: mp3.zing.vn
|
||||||
|
- **Zype**
|
||||||
|
9
setup.py
9
setup.py
@ -124,6 +124,8 @@ setup(
|
|||||||
'Development Status :: 5 - Production/Stable',
|
'Development Status :: 5 - Production/Stable',
|
||||||
'Environment :: Console',
|
'Environment :: Console',
|
||||||
'License :: Public Domain',
|
'License :: Public Domain',
|
||||||
|
'Programming Language :: Python',
|
||||||
|
'Programming Language :: Python :: 2',
|
||||||
'Programming Language :: Python :: 2.6',
|
'Programming Language :: Python :: 2.6',
|
||||||
'Programming Language :: Python :: 2.7',
|
'Programming Language :: Python :: 2.7',
|
||||||
'Programming Language :: Python :: 3',
|
'Programming Language :: Python :: 3',
|
||||||
@ -132,6 +134,13 @@ setup(
|
|||||||
'Programming Language :: Python :: 3.4',
|
'Programming Language :: Python :: 3.4',
|
||||||
'Programming Language :: Python :: 3.5',
|
'Programming Language :: Python :: 3.5',
|
||||||
'Programming Language :: Python :: 3.6',
|
'Programming Language :: Python :: 3.6',
|
||||||
|
'Programming Language :: Python :: 3.7',
|
||||||
|
'Programming Language :: Python :: 3.8',
|
||||||
|
'Programming Language :: Python :: Implementation',
|
||||||
|
'Programming Language :: Python :: Implementation :: CPython',
|
||||||
|
'Programming Language :: Python :: Implementation :: IronPython',
|
||||||
|
'Programming Language :: Python :: Implementation :: Jython',
|
||||||
|
'Programming Language :: Python :: Implementation :: PyPy',
|
||||||
],
|
],
|
||||||
|
|
||||||
cmdclass={'build_lazy_extractors': build_lazy_extractors},
|
cmdclass={'build_lazy_extractors': build_lazy_extractors},
|
||||||
|
34
test/test_YoutubeDLCookieJar.py
Normal file
34
test/test_YoutubeDLCookieJar.py
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# coding: utf-8
|
||||||
|
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
import tempfile
|
||||||
|
import unittest
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
from youtube_dl.utils import YoutubeDLCookieJar
|
||||||
|
|
||||||
|
|
||||||
|
class TestYoutubeDLCookieJar(unittest.TestCase):
|
||||||
|
def test_keep_session_cookies(self):
|
||||||
|
cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/session_cookies.txt')
|
||||||
|
cookiejar.load(ignore_discard=True, ignore_expires=True)
|
||||||
|
tf = tempfile.NamedTemporaryFile(delete=False)
|
||||||
|
try:
|
||||||
|
cookiejar.save(filename=tf.name, ignore_discard=True, ignore_expires=True)
|
||||||
|
temp = tf.read().decode('utf-8')
|
||||||
|
self.assertTrue(re.search(
|
||||||
|
r'www\.foobar\.foobar\s+FALSE\s+/\s+TRUE\s+0\s+YoutubeDLExpiresEmpty\s+YoutubeDLExpiresEmptyValue', temp))
|
||||||
|
self.assertTrue(re.search(
|
||||||
|
r'www\.foobar\.foobar\s+FALSE\s+/\s+TRUE\s+0\s+YoutubeDLExpires0\s+YoutubeDLExpires0Value', temp))
|
||||||
|
finally:
|
||||||
|
tf.close()
|
||||||
|
os.remove(tf.name)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
unittest.main()
|
@ -39,7 +39,7 @@ class TestCompat(unittest.TestCase):
|
|||||||
|
|
||||||
def test_compat_expanduser(self):
|
def test_compat_expanduser(self):
|
||||||
old_home = os.environ.get('HOME')
|
old_home = os.environ.get('HOME')
|
||||||
test_str = 'C:\Documents and Settings\тест\Application Data'
|
test_str = r'C:\Documents and Settings\тест\Application Data'
|
||||||
compat_setenv('HOME', test_str)
|
compat_setenv('HOME', test_str)
|
||||||
self.assertEqual(compat_expanduser('~'), test_str)
|
self.assertEqual(compat_expanduser('~'), test_str)
|
||||||
compat_setenv('HOME', old_home or '')
|
compat_setenv('HOME', old_home or '')
|
||||||
|
@ -14,4 +14,4 @@ from youtube_dl.postprocessor import MetadataFromTitlePP
|
|||||||
class TestMetadataFromTitle(unittest.TestCase):
|
class TestMetadataFromTitle(unittest.TestCase):
|
||||||
def test_format_to_regex(self):
|
def test_format_to_regex(self):
|
||||||
pp = MetadataFromTitlePP(None, '%(title)s - %(artist)s')
|
pp = MetadataFromTitlePP(None, '%(title)s - %(artist)s')
|
||||||
self.assertEqual(pp._titleregex, '(?P<title>.+)\ \-\ (?P<artist>.+)')
|
self.assertEqual(pp._titleregex, r'(?P<title>.+)\ \-\ (?P<artist>.+)')
|
||||||
|
6
test/testdata/cookies/session_cookies.txt
vendored
Normal file
6
test/testdata/cookies/session_cookies.txt
vendored
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
# Netscape HTTP Cookie File
|
||||||
|
# http://curl.haxx.se/rfc/cookie_spec.html
|
||||||
|
# This is a generated file! Do not edit.
|
||||||
|
|
||||||
|
www.foobar.foobar FALSE / TRUE YoutubeDLExpiresEmpty YoutubeDLExpiresEmptyValue
|
||||||
|
www.foobar.foobar FALSE / TRUE 0 YoutubeDLExpires0 YoutubeDLExpires0Value
|
@ -88,6 +88,7 @@ from .utils import (
|
|||||||
version_tuple,
|
version_tuple,
|
||||||
write_json_file,
|
write_json_file,
|
||||||
write_string,
|
write_string,
|
||||||
|
YoutubeDLCookieJar,
|
||||||
YoutubeDLCookieProcessor,
|
YoutubeDLCookieProcessor,
|
||||||
YoutubeDLHandler,
|
YoutubeDLHandler,
|
||||||
)
|
)
|
||||||
@ -558,7 +559,7 @@ class YoutubeDL(object):
|
|||||||
self.restore_console_title()
|
self.restore_console_title()
|
||||||
|
|
||||||
if self.params.get('cookiefile') is not None:
|
if self.params.get('cookiefile') is not None:
|
||||||
self.cookiejar.save()
|
self.cookiejar.save(ignore_discard=True, ignore_expires=True)
|
||||||
|
|
||||||
def trouble(self, message=None, tb=None):
|
def trouble(self, message=None, tb=None):
|
||||||
"""Determine action to take when a download problem appears.
|
"""Determine action to take when a download problem appears.
|
||||||
@ -2297,10 +2298,9 @@ class YoutubeDL(object):
|
|||||||
self.cookiejar = compat_cookiejar.CookieJar()
|
self.cookiejar = compat_cookiejar.CookieJar()
|
||||||
else:
|
else:
|
||||||
opts_cookiefile = expand_path(opts_cookiefile)
|
opts_cookiefile = expand_path(opts_cookiefile)
|
||||||
self.cookiejar = compat_cookiejar.MozillaCookieJar(
|
self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
|
||||||
opts_cookiefile)
|
|
||||||
if os.access(opts_cookiefile, os.R_OK):
|
if os.access(opts_cookiefile, os.R_OK):
|
||||||
self.cookiejar.load()
|
self.cookiejar.load(ignore_discard=True, ignore_expires=True)
|
||||||
|
|
||||||
cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
|
cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
|
||||||
if opts_proxy is not None:
|
if opts_proxy is not None:
|
||||||
|
@ -17,25 +17,15 @@ from ..utils import (
|
|||||||
|
|
||||||
class ACastIE(InfoExtractor):
|
class ACastIE(InfoExtractor):
|
||||||
IE_NAME = 'acast'
|
IE_NAME = 'acast'
|
||||||
_VALID_URL = r'https?://(?:www\.)?acast\.com/(?P<channel>[^/]+)/(?P<id>[^/#?]+)'
|
_VALID_URL = r'''(?x)
|
||||||
|
https?://
|
||||||
|
(?:
|
||||||
|
(?:(?:embed|www)\.)?acast\.com/|
|
||||||
|
play\.acast\.com/s/
|
||||||
|
)
|
||||||
|
(?P<channel>[^/]+)/(?P<id>[^/#?]+)
|
||||||
|
'''
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# test with one bling
|
|
||||||
'url': 'https://www.acast.com/condenasttraveler/-where-are-you-taipei-101-taiwan',
|
|
||||||
'md5': 'ada3de5a1e3a2a381327d749854788bb',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '57de3baa-4bb0-487e-9418-2692c1277a34',
|
|
||||||
'ext': 'mp3',
|
|
||||||
'title': '"Where Are You?": Taipei 101, Taiwan',
|
|
||||||
'description': 'md5:a0b4ef3634e63866b542e5b1199a1a0e',
|
|
||||||
'timestamp': 1196172000,
|
|
||||||
'upload_date': '20071127',
|
|
||||||
'duration': 211,
|
|
||||||
'creator': 'Concierge',
|
|
||||||
'series': 'Condé Nast Traveler Podcast',
|
|
||||||
'episode': '"Where Are You?": Taipei 101, Taiwan',
|
|
||||||
}
|
|
||||||
}, {
|
|
||||||
# test with multiple blings
|
|
||||||
'url': 'https://www.acast.com/sparpodcast/2.raggarmordet-rosterurdetforflutna',
|
'url': 'https://www.acast.com/sparpodcast/2.raggarmordet-rosterurdetforflutna',
|
||||||
'md5': 'a02393c74f3bdb1801c3ec2695577ce0',
|
'md5': 'a02393c74f3bdb1801c3ec2695577ce0',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -50,6 +40,12 @@ class ACastIE(InfoExtractor):
|
|||||||
'series': 'Spår',
|
'series': 'Spår',
|
||||||
'episode': '2. Raggarmordet - Röster ur det förflutna',
|
'episode': '2. Raggarmordet - Röster ur det förflutna',
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://embed.acast.com/adambuxton/ep.12-adam-joeschristmaspodcast2015',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://play.acast.com/s/rattegangspodden/s04e09-styckmordet-i-helenelund-del-22',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -22,18 +22,19 @@ class AENetworksBaseIE(ThePlatformIE):
|
|||||||
|
|
||||||
class AENetworksIE(AENetworksBaseIE):
|
class AENetworksIE(AENetworksBaseIE):
|
||||||
IE_NAME = 'aenetworks'
|
IE_NAME = 'aenetworks'
|
||||||
IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network'
|
IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network and History Vault'
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://
|
https?://
|
||||||
(?:www\.)?
|
(?:www\.)?
|
||||||
(?P<domain>
|
(?P<domain>
|
||||||
(?:history|aetv|mylifetime|lifetimemovieclub)\.com|
|
(?:history(?:vault)?|aetv|mylifetime|lifetimemovieclub)\.com|
|
||||||
fyi\.tv
|
fyi\.tv
|
||||||
)/
|
)/
|
||||||
(?:
|
(?:
|
||||||
shows/(?P<show_path>[^/]+(?:/[^/]+){0,2})|
|
shows/(?P<show_path>[^/]+(?:/[^/]+){0,2})|
|
||||||
movies/(?P<movie_display_id>[^/]+)(?:/full-movie)?|
|
movies/(?P<movie_display_id>[^/]+)(?:/full-movie)?|
|
||||||
specials/(?P<special_display_id>[^/]+)/full-special
|
specials/(?P<special_display_id>[^/]+)/full-special|
|
||||||
|
collections/[^/]+/(?P<collection_display_id>[^/]+)
|
||||||
)
|
)
|
||||||
'''
|
'''
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
@ -80,6 +81,9 @@ class AENetworksIE(AENetworksBaseIE):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://www.history.com/specials/sniper-into-the-kill-zone/full-special',
|
'url': 'http://www.history.com/specials/sniper-into-the-kill-zone/full-special',
|
||||||
'only_matching': True
|
'only_matching': True
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.historyvault.com/collections/america-the-story-of-us/westward',
|
||||||
|
'only_matching': True
|
||||||
}]
|
}]
|
||||||
_DOMAIN_TO_REQUESTOR_ID = {
|
_DOMAIN_TO_REQUESTOR_ID = {
|
||||||
'history.com': 'HISTORY',
|
'history.com': 'HISTORY',
|
||||||
@ -90,9 +94,9 @@ class AENetworksIE(AENetworksBaseIE):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
domain, show_path, movie_display_id, special_display_id = re.match(self._VALID_URL, url).groups()
|
domain, show_path, movie_display_id, special_display_id, collection_display_id = re.match(self._VALID_URL, url).groups()
|
||||||
display_id = show_path or movie_display_id or special_display_id
|
display_id = show_path or movie_display_id or special_display_id or collection_display_id
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id, headers=self.geo_verification_headers())
|
||||||
if show_path:
|
if show_path:
|
||||||
url_parts = show_path.split('/')
|
url_parts = show_path.split('/')
|
||||||
url_parts_len = len(url_parts)
|
url_parts_len = len(url_parts)
|
||||||
|
@ -43,10 +43,6 @@ class AmericasTestKitchenIE(InfoExtractor):
|
|||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
partner_id = self._search_regex(
|
|
||||||
r'src=["\'](?:https?:)?//(?:[^/]+\.)kaltura\.com/(?:[^/]+/)*(?:p|partner_id)/(\d+)',
|
|
||||||
webpage, 'kaltura partner id')
|
|
||||||
|
|
||||||
video_data = self._parse_json(
|
video_data = self._parse_json(
|
||||||
self._search_regex(
|
self._search_regex(
|
||||||
r'window\.__INITIAL_STATE__\s*=\s*({.+?})\s*;\s*</script>',
|
r'window\.__INITIAL_STATE__\s*=\s*({.+?})\s*;\s*</script>',
|
||||||
@ -58,7 +54,18 @@ class AmericasTestKitchenIE(InfoExtractor):
|
|||||||
(lambda x: x['episodeDetail']['content']['data'],
|
(lambda x: x['episodeDetail']['content']['data'],
|
||||||
lambda x: x['videoDetail']['content']['data']), dict)
|
lambda x: x['videoDetail']['content']['data']), dict)
|
||||||
ep_meta = ep_data.get('full_video', {})
|
ep_meta = ep_data.get('full_video', {})
|
||||||
external_id = ep_data.get('external_id') or ep_meta['external_id']
|
|
||||||
|
zype_id = ep_meta.get('zype_id')
|
||||||
|
if zype_id:
|
||||||
|
embed_url = 'https://player.zype.com/embed/%s.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ' % zype_id
|
||||||
|
ie_key = 'Zype'
|
||||||
|
else:
|
||||||
|
partner_id = self._search_regex(
|
||||||
|
r'src=["\'](?:https?:)?//(?:[^/]+\.)kaltura\.com/(?:[^/]+/)*(?:p|partner_id)/(\d+)',
|
||||||
|
webpage, 'kaltura partner id')
|
||||||
|
external_id = ep_data.get('external_id') or ep_meta['external_id']
|
||||||
|
embed_url = 'kaltura:%s:%s' % (partner_id, external_id)
|
||||||
|
ie_key = 'Kaltura'
|
||||||
|
|
||||||
title = ep_data.get('title') or ep_meta.get('title')
|
title = ep_data.get('title') or ep_meta.get('title')
|
||||||
description = clean_html(ep_meta.get('episode_description') or ep_data.get(
|
description = clean_html(ep_meta.get('episode_description') or ep_data.get(
|
||||||
@ -72,8 +79,8 @@ class AmericasTestKitchenIE(InfoExtractor):
|
|||||||
|
|
||||||
return {
|
return {
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
'url': 'kaltura:%s:%s' % (partner_id, external_id),
|
'url': embed_url,
|
||||||
'ie_key': 'Kaltura',
|
'ie_key': ie_key,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': description,
|
'description': description,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
|
@ -8,20 +8,23 @@ from .generic import GenericIE
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
qualities,
|
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
|
qualities,
|
||||||
|
str_or_none,
|
||||||
|
try_get,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
xpath_text,
|
unified_timestamp,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
|
xpath_text,
|
||||||
)
|
)
|
||||||
from ..compat import compat_etree_fromstring
|
from ..compat import compat_etree_fromstring
|
||||||
|
|
||||||
|
|
||||||
class ARDMediathekIE(InfoExtractor):
|
class ARDMediathekIE(InfoExtractor):
|
||||||
IE_NAME = 'ARD:mediathek'
|
IE_NAME = 'ARD:mediathek'
|
||||||
_VALID_URL = r'^https?://(?:(?:www\.)?ardmediathek\.de|mediathek\.(?:daserste|rbb-online)\.de|one\.ard\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?'
|
_VALID_URL = r'^https?://(?:(?:(?:www|classic)\.)?ardmediathek\.de|mediathek\.(?:daserste|rbb-online)\.de|one\.ard\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# available till 26.07.2022
|
# available till 26.07.2022
|
||||||
@ -51,8 +54,15 @@ class ARDMediathekIE(InfoExtractor):
|
|||||||
# audio
|
# audio
|
||||||
'url': 'http://mediathek.rbb-online.de/radio/Hörspiel/Vor-dem-Fest/kulturradio/Audio?documentId=30796318&topRessort=radio&bcastId=9839158',
|
'url': 'http://mediathek.rbb-online.de/radio/Hörspiel/Vor-dem-Fest/kulturradio/Audio?documentId=30796318&topRessort=radio&bcastId=9839158',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://classic.ardmediathek.de/tv/Panda-Gorilla-Co/Panda-Gorilla-Co-Folge-274/Das-Erste/Video?bcastId=16355486&documentId=58234698',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def suitable(cls, url):
|
||||||
|
return False if ARDBetaMediathekIE.suitable(url) else super(ARDMediathekIE, cls).suitable(url)
|
||||||
|
|
||||||
def _extract_media_info(self, media_info_url, webpage, video_id):
|
def _extract_media_info(self, media_info_url, webpage, video_id):
|
||||||
media_info = self._download_json(
|
media_info = self._download_json(
|
||||||
media_info_url, video_id, 'Downloading media JSON')
|
media_info_url, video_id, 'Downloading media JSON')
|
||||||
@ -173,13 +183,18 @@ class ARDMediathekIE(InfoExtractor):
|
|||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
[r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>',
|
[r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>',
|
||||||
r'<meta name="dcterms\.title" content="(.*?)"/>',
|
r'<meta name="dcterms\.title" content="(.*?)"/>',
|
||||||
r'<h4 class="headline">(.*?)</h4>'],
|
r'<h4 class="headline">(.*?)</h4>',
|
||||||
|
r'<title[^>]*>(.*?)</title>'],
|
||||||
webpage, 'title')
|
webpage, 'title')
|
||||||
description = self._html_search_meta(
|
description = self._html_search_meta(
|
||||||
'dcterms.abstract', webpage, 'description', default=None)
|
'dcterms.abstract', webpage, 'description', default=None)
|
||||||
if description is None:
|
if description is None:
|
||||||
description = self._html_search_meta(
|
description = self._html_search_meta(
|
||||||
'description', webpage, 'meta description')
|
'description', webpage, 'meta description', default=None)
|
||||||
|
if description is None:
|
||||||
|
description = self._html_search_regex(
|
||||||
|
r'<p\s+class="teasertext">(.+?)</p>',
|
||||||
|
webpage, 'teaser text', default=None)
|
||||||
|
|
||||||
# Thumbnail is sometimes not present.
|
# Thumbnail is sometimes not present.
|
||||||
# It is in the mobile version, but that seems to use a different URL
|
# It is in the mobile version, but that seems to use a different URL
|
||||||
@ -288,7 +303,7 @@ class ARDIE(InfoExtractor):
|
|||||||
|
|
||||||
|
|
||||||
class ARDBetaMediathekIE(InfoExtractor):
|
class ARDBetaMediathekIE(InfoExtractor):
|
||||||
_VALID_URL = r'https://beta\.ardmediathek\.de/[a-z]+/player/(?P<video_id>[a-zA-Z0-9]+)/(?P<display_id>[^/?#]+)'
|
_VALID_URL = r'https://(?:beta|www)\.ardmediathek\.de/[^/]+/(?:player|live)/(?P<video_id>[a-zA-Z0-9]+)(?:/(?P<display_id>[^/?#]+))?'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://beta.ardmediathek.de/ard/player/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE/die-robuste-roswita',
|
'url': 'https://beta.ardmediathek.de/ard/player/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE/die-robuste-roswita',
|
||||||
'md5': '2d02d996156ea3c397cfc5036b5d7f8f',
|
'md5': '2d02d996156ea3c397cfc5036b5d7f8f',
|
||||||
@ -302,12 +317,18 @@ class ARDBetaMediathekIE(InfoExtractor):
|
|||||||
'upload_date': '20180826',
|
'upload_date': '20180826',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.ardmediathek.de/ard/player/Y3JpZDovL3N3ci5kZS9hZXgvbzEwNzE5MTU/',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.ardmediathek.de/swr/live/Y3JpZDovL3N3ci5kZS8xMzQ4MTA0Mg',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('video_id')
|
video_id = mobj.group('video_id')
|
||||||
display_id = mobj.group('display_id')
|
display_id = mobj.group('display_id') or video_id
|
||||||
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
data_json = self._search_regex(r'window\.__APOLLO_STATE__\s*=\s*(\{.*);\n', webpage, 'json')
|
data_json = self._search_regex(r'window\.__APOLLO_STATE__\s*=\s*(\{.*);\n', webpage, 'json')
|
||||||
@ -318,43 +339,62 @@ class ARDBetaMediathekIE(InfoExtractor):
|
|||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
}
|
}
|
||||||
formats = []
|
formats = []
|
||||||
|
subtitles = {}
|
||||||
|
geoblocked = False
|
||||||
for widget in data.values():
|
for widget in data.values():
|
||||||
if widget.get('_geoblocked'):
|
if widget.get('_geoblocked') is True:
|
||||||
raise ExtractorError('This video is not available due to geoblocking', expected=True)
|
geoblocked = True
|
||||||
|
|
||||||
if '_duration' in widget:
|
if '_duration' in widget:
|
||||||
res['duration'] = widget['_duration']
|
res['duration'] = int_or_none(widget['_duration'])
|
||||||
if 'clipTitle' in widget:
|
if 'clipTitle' in widget:
|
||||||
res['title'] = widget['clipTitle']
|
res['title'] = widget['clipTitle']
|
||||||
if '_previewImage' in widget:
|
if '_previewImage' in widget:
|
||||||
res['thumbnail'] = widget['_previewImage']
|
res['thumbnail'] = widget['_previewImage']
|
||||||
if 'broadcastedOn' in widget:
|
if 'broadcastedOn' in widget:
|
||||||
res['upload_date'] = unified_strdate(widget['broadcastedOn'])
|
res['timestamp'] = unified_timestamp(widget['broadcastedOn'])
|
||||||
if 'synopsis' in widget:
|
if 'synopsis' in widget:
|
||||||
res['description'] = widget['synopsis']
|
res['description'] = widget['synopsis']
|
||||||
if '_subtitleUrl' in widget:
|
subtitle_url = url_or_none(widget.get('_subtitleUrl'))
|
||||||
res['subtitles'] = {'de': [{
|
if subtitle_url:
|
||||||
|
subtitles.setdefault('de', []).append({
|
||||||
'ext': 'ttml',
|
'ext': 'ttml',
|
||||||
'url': widget['_subtitleUrl'],
|
'url': subtitle_url,
|
||||||
}]}
|
})
|
||||||
if '_quality' in widget:
|
if '_quality' in widget:
|
||||||
format_url = widget['_stream']['json'][0]
|
format_url = url_or_none(try_get(
|
||||||
|
widget, lambda x: x['_stream']['json'][0]))
|
||||||
if format_url.endswith('.f4m'):
|
if not format_url:
|
||||||
|
continue
|
||||||
|
ext = determine_ext(format_url)
|
||||||
|
if ext == 'f4m':
|
||||||
formats.extend(self._extract_f4m_formats(
|
formats.extend(self._extract_f4m_formats(
|
||||||
format_url + '?hdcore=3.11.0',
|
format_url + '?hdcore=3.11.0',
|
||||||
video_id, f4m_id='hds', fatal=False))
|
video_id, f4m_id='hds', fatal=False))
|
||||||
elif format_url.endswith('m3u8'):
|
elif ext == 'm3u8':
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
format_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
format_url, video_id, 'mp4', m3u8_id='hls',
|
||||||
|
fatal=False))
|
||||||
else:
|
else:
|
||||||
|
# HTTP formats are not available when geoblocked is True,
|
||||||
|
# other formats are fine though
|
||||||
|
if geoblocked:
|
||||||
|
continue
|
||||||
|
quality = str_or_none(widget.get('_quality'))
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': 'http-' + widget['_quality'],
|
'format_id': ('http-' + quality) if quality else 'http',
|
||||||
'url': format_url,
|
'url': format_url,
|
||||||
'preference': 10, # Plain HTTP, that's nice
|
'preference': 10, # Plain HTTP, that's nice
|
||||||
})
|
})
|
||||||
|
|
||||||
|
if not formats and geoblocked:
|
||||||
|
self.raise_geo_restricted(
|
||||||
|
msg='This video is not available due to geoblocking',
|
||||||
|
countries=['DE'])
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
res['formats'] = formats
|
res.update({
|
||||||
|
'subtitles': subtitles,
|
||||||
|
'formats': formats,
|
||||||
|
})
|
||||||
|
|
||||||
return res
|
return res
|
||||||
|
@ -28,8 +28,10 @@ class ATVAtIE(InfoExtractor):
|
|||||||
display_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
video_data = self._parse_json(unescapeHTML(self._search_regex(
|
video_data = self._parse_json(unescapeHTML(self._search_regex(
|
||||||
r'class="[^"]*jsb_video/FlashPlayer[^"]*"[^>]+data-jsb="([^"]+)"',
|
[r'flashPlayerOptions\s*=\s*(["\'])(?P<json>(?:(?!\1).)+)\1',
|
||||||
webpage, 'player data')), display_id)['config']['initial_video']
|
r'class="[^"]*jsb_video/FlashPlayer[^"]*"[^>]+data-jsb="(?P<json>[^"]+)"'],
|
||||||
|
webpage, 'player data', group='json')),
|
||||||
|
display_id)['config']['initial_video']
|
||||||
|
|
||||||
video_id = video_data['id']
|
video_id = video_data['id']
|
||||||
video_title = video_data['title']
|
video_title = video_data['title']
|
||||||
|
@ -36,7 +36,6 @@ class AZMedienIE(InfoExtractor):
|
|||||||
'id': '1_anruz3wy',
|
'id': '1_anruz3wy',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Bundesrats-Vakanzen / EU-Rahmenabkommen',
|
'title': 'Bundesrats-Vakanzen / EU-Rahmenabkommen',
|
||||||
'description': 'md5:dd9f96751ec9c35e409a698a328402f3',
|
|
||||||
'uploader_id': 'TVOnline',
|
'uploader_id': 'TVOnline',
|
||||||
'upload_date': '20180930',
|
'upload_date': '20180930',
|
||||||
'timestamp': 1538328802,
|
'timestamp': 1538328802,
|
||||||
@ -53,15 +52,12 @@ class AZMedienIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
host = mobj.group('host')
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
entry_id = mobj.group('kaltura_id')
|
entry_id = mobj.group('kaltura_id')
|
||||||
|
|
||||||
if not entry_id:
|
if not entry_id:
|
||||||
webpage = self._download_webpage(url, video_id)
|
api_url = 'https://www.%s/api/pub/gql/%s' % (host, host.split('.')[0])
|
||||||
api_path = self._search_regex(
|
|
||||||
r'["\']apiPath["\']\s*:\s*["\']([^"^\']+)["\']',
|
|
||||||
webpage, 'api path')
|
|
||||||
api_url = 'https://www.%s%s' % (mobj.group('host'), api_path)
|
|
||||||
payload = {
|
payload = {
|
||||||
'query': '''query VideoContext($articleId: ID!) {
|
'query': '''query VideoContext($articleId: ID!) {
|
||||||
article: node(id: $articleId) {
|
article: node(id: $articleId) {
|
||||||
|
@ -37,7 +37,7 @@ class BitChuteIE(InfoExtractor):
|
|||||||
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.57 Safari/537.36',
|
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.57 Safari/537.36',
|
||||||
})
|
})
|
||||||
|
|
||||||
title = self._search_regex(
|
title = self._html_search_regex(
|
||||||
(r'<[^>]+\bid=["\']video-title[^>]+>([^<]+)', r'<title>([^<]+)'),
|
(r'<[^>]+\bid=["\']video-title[^>]+>([^<]+)', r'<title>([^<]+)'),
|
||||||
webpage, 'title', default=None) or self._html_search_meta(
|
webpage, 'title', default=None) or self._html_search_meta(
|
||||||
'description', webpage, 'title',
|
'description', webpage, 'title',
|
||||||
|
142
youtube_dl/extractor/ciscolive.py
Normal file
142
youtube_dl/extractor/ciscolive.py
Normal file
@ -0,0 +1,142 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import itertools
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import (
|
||||||
|
compat_parse_qs,
|
||||||
|
compat_urllib_parse_urlparse,
|
||||||
|
)
|
||||||
|
from ..utils import (
|
||||||
|
clean_html,
|
||||||
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
|
try_get,
|
||||||
|
urlencode_postdata,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class CiscoLiveBaseIE(InfoExtractor):
|
||||||
|
# These appear to be constant across all Cisco Live presentations
|
||||||
|
# and are not tied to any user session or event
|
||||||
|
RAINFOCUS_API_URL = 'https://events.rainfocus.com/api/%s'
|
||||||
|
RAINFOCUS_API_PROFILE_ID = 'Na3vqYdAlJFSxhYTYQGuMbpafMqftalz'
|
||||||
|
RAINFOCUS_WIDGET_ID = 'n6l4Lo05R8fiy3RpUBm447dZN8uNWoye'
|
||||||
|
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5647924234001/SyK2FdqjM_default/index.html?videoId=%s'
|
||||||
|
|
||||||
|
HEADERS = {
|
||||||
|
'Origin': 'https://ciscolive.cisco.com',
|
||||||
|
'rfApiProfileId': RAINFOCUS_API_PROFILE_ID,
|
||||||
|
'rfWidgetId': RAINFOCUS_WIDGET_ID,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _call_api(self, ep, rf_id, query, referrer, note=None):
|
||||||
|
headers = self.HEADERS.copy()
|
||||||
|
headers['Referer'] = referrer
|
||||||
|
return self._download_json(
|
||||||
|
self.RAINFOCUS_API_URL % ep, rf_id, note=note,
|
||||||
|
data=urlencode_postdata(query), headers=headers)
|
||||||
|
|
||||||
|
def _parse_rf_item(self, rf_item):
|
||||||
|
event_name = rf_item.get('eventName')
|
||||||
|
title = rf_item['title']
|
||||||
|
description = clean_html(rf_item.get('abstract'))
|
||||||
|
presenter_name = try_get(rf_item, lambda x: x['participants'][0]['fullName'])
|
||||||
|
bc_id = rf_item['videos'][0]['url']
|
||||||
|
bc_url = self.BRIGHTCOVE_URL_TEMPLATE % bc_id
|
||||||
|
duration = float_or_none(try_get(rf_item, lambda x: x['times'][0]['length']))
|
||||||
|
location = try_get(rf_item, lambda x: x['times'][0]['room'])
|
||||||
|
|
||||||
|
if duration:
|
||||||
|
duration = duration * 60
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'url': bc_url,
|
||||||
|
'ie_key': 'BrightcoveNew',
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'duration': duration,
|
||||||
|
'creator': presenter_name,
|
||||||
|
'location': location,
|
||||||
|
'series': event_name,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class CiscoLiveSessionIE(CiscoLiveBaseIE):
|
||||||
|
_VALID_URL = r'https?://ciscolive\.cisco\.com/on-demand-library/\??[^#]*#/session/(?P<id>[^/?&]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://ciscolive.cisco.com/on-demand-library/?#/session/1423353499155001FoSs',
|
||||||
|
'md5': 'c98acf395ed9c9f766941c70f5352e22',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5803694304001',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '13 Smart Automations to Monitor Your Cisco IOS Network',
|
||||||
|
'description': 'md5:ec4a436019e09a918dec17714803f7cc',
|
||||||
|
'timestamp': 1530305395,
|
||||||
|
'upload_date': '20180629',
|
||||||
|
'uploader_id': '5647924234001',
|
||||||
|
'location': '16B Mezz.',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
rf_id = self._match_id(url)
|
||||||
|
rf_result = self._call_api('session', rf_id, {'id': rf_id}, url)
|
||||||
|
return self._parse_rf_item(rf_result['items'][0])
|
||||||
|
|
||||||
|
|
||||||
|
class CiscoLiveSearchIE(CiscoLiveBaseIE):
|
||||||
|
_VALID_URL = r'https?://ciscolive\.cisco\.com/on-demand-library/'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://ciscolive.cisco.com/on-demand-library/?search.event=ciscoliveus2018&search.technicallevel=scpsSkillLevel_aintroductory&search.focus=scpsSessionFocus_designAndDeployment#/',
|
||||||
|
'info_dict': {
|
||||||
|
'title': 'Search query',
|
||||||
|
},
|
||||||
|
'playlist_count': 5,
|
||||||
|
}, {
|
||||||
|
'url': 'https://ciscolive.cisco.com/on-demand-library/?search.technology=scpsTechnology_applicationDevelopment&search.technology=scpsTechnology_ipv6&search.focus=scpsSessionFocus_troubleshootingTroubleshooting#/',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def suitable(cls, url):
|
||||||
|
return False if CiscoLiveSessionIE.suitable(url) else super(CiscoLiveSearchIE, cls).suitable(url)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _check_bc_id_exists(rf_item):
|
||||||
|
return int_or_none(try_get(rf_item, lambda x: x['videos'][0]['url'])) is not None
|
||||||
|
|
||||||
|
def _entries(self, query, url):
|
||||||
|
query['size'] = 50
|
||||||
|
query['from'] = 0
|
||||||
|
for page_num in itertools.count(1):
|
||||||
|
results = self._call_api(
|
||||||
|
'search', None, query, url,
|
||||||
|
'Downloading search JSON page %d' % page_num)
|
||||||
|
sl = try_get(results, lambda x: x['sectionList'][0], dict)
|
||||||
|
if sl:
|
||||||
|
results = sl
|
||||||
|
items = results.get('items')
|
||||||
|
if not items or not isinstance(items, list):
|
||||||
|
break
|
||||||
|
for item in items:
|
||||||
|
if not isinstance(item, dict):
|
||||||
|
continue
|
||||||
|
if not self._check_bc_id_exists(item):
|
||||||
|
continue
|
||||||
|
yield self._parse_rf_item(item)
|
||||||
|
size = int_or_none(results.get('size'))
|
||||||
|
if size is not None:
|
||||||
|
query['size'] = size
|
||||||
|
total = int_or_none(results.get('total'))
|
||||||
|
if total is not None and query['from'] + query['size'] > total:
|
||||||
|
break
|
||||||
|
query['from'] += query['size']
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
query = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
|
||||||
|
query['type'] = 'session'
|
||||||
|
return self.playlist_result(
|
||||||
|
self._entries(query, url), playlist_title='Search query')
|
@ -17,16 +17,29 @@ from ..compat import compat_HTTPError
|
|||||||
|
|
||||||
|
|
||||||
class DiscoveryIE(DiscoveryGoBaseIE):
|
class DiscoveryIE(DiscoveryGoBaseIE):
|
||||||
_VALID_URL = r'''(?x)https?://(?:www\.)?(?P<site>
|
_VALID_URL = r'''(?x)https?://
|
||||||
discovery|
|
(?P<site>
|
||||||
investigationdiscovery|
|
(?:www\.)?
|
||||||
discoverylife|
|
(?:
|
||||||
animalplanet|
|
discovery|
|
||||||
ahctv|
|
investigationdiscovery|
|
||||||
destinationamerica|
|
discoverylife|
|
||||||
sciencechannel|
|
animalplanet|
|
||||||
tlc|
|
ahctv|
|
||||||
velocity
|
destinationamerica|
|
||||||
|
sciencechannel|
|
||||||
|
tlc|
|
||||||
|
velocity
|
||||||
|
)|
|
||||||
|
watch\.
|
||||||
|
(?:
|
||||||
|
hgtv|
|
||||||
|
foodnetwork|
|
||||||
|
travelchannel|
|
||||||
|
diynetwork|
|
||||||
|
cookingchanneltv|
|
||||||
|
motortrend
|
||||||
|
)
|
||||||
)\.com(?P<path>/tv-shows/[^/]+/(?:video|full-episode)s/(?P<id>[^./?#]+))'''
|
)\.com(?P<path>/tv-shows/[^/]+/(?:video|full-episode)s/(?P<id>[^./?#]+))'''
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.discovery.com/tv-shows/cash-cab/videos/dave-foley',
|
'url': 'https://www.discovery.com/tv-shows/cash-cab/videos/dave-foley',
|
||||||
@ -71,7 +84,7 @@ class DiscoveryIE(DiscoveryGoBaseIE):
|
|||||||
|
|
||||||
if not access_token:
|
if not access_token:
|
||||||
access_token = self._download_json(
|
access_token = self._download_json(
|
||||||
'https://www.%s.com/anonymous' % site, display_id, query={
|
'https://%s.com/anonymous' % site, display_id, query={
|
||||||
'authRel': 'authorization',
|
'authRel': 'authorization',
|
||||||
'client_id': try_get(
|
'client_id': try_get(
|
||||||
react_data, lambda x: x['application']['apiClientId'],
|
react_data, lambda x: x['application']['apiClientId'],
|
||||||
|
@ -194,6 +194,10 @@ from .chirbit import (
|
|||||||
ChirbitProfileIE,
|
ChirbitProfileIE,
|
||||||
)
|
)
|
||||||
from .cinchcast import CinchcastIE
|
from .cinchcast import CinchcastIE
|
||||||
|
from .ciscolive import (
|
||||||
|
CiscoLiveSessionIE,
|
||||||
|
CiscoLiveSearchIE,
|
||||||
|
)
|
||||||
from .cjsw import CJSWIE
|
from .cjsw import CJSWIE
|
||||||
from .cliphunter import CliphunterIE
|
from .cliphunter import CliphunterIE
|
||||||
from .clippit import ClippitIE
|
from .clippit import ClippitIE
|
||||||
@ -479,6 +483,7 @@ from .imdb import (
|
|||||||
from .imgur import (
|
from .imgur import (
|
||||||
ImgurIE,
|
ImgurIE,
|
||||||
ImgurAlbumIE,
|
ImgurAlbumIE,
|
||||||
|
ImgurGalleryIE,
|
||||||
)
|
)
|
||||||
from .ina import InaIE
|
from .ina import InaIE
|
||||||
from .inc import IncIE
|
from .inc import IncIE
|
||||||
@ -549,6 +554,11 @@ from .lcp import (
|
|||||||
)
|
)
|
||||||
from .learnr import LearnrIE
|
from .learnr import LearnrIE
|
||||||
from .lecture2go import Lecture2GoIE
|
from .lecture2go import Lecture2GoIE
|
||||||
|
from .lecturio import (
|
||||||
|
LecturioIE,
|
||||||
|
LecturioCourseIE,
|
||||||
|
LecturioDeCourseIE,
|
||||||
|
)
|
||||||
from .leeco import (
|
from .leeco import (
|
||||||
LeIE,
|
LeIE,
|
||||||
LePlaylistIE,
|
LePlaylistIE,
|
||||||
@ -1082,6 +1092,10 @@ from .tass import TassIE
|
|||||||
from .tastytrade import TastyTradeIE
|
from .tastytrade import TastyTradeIE
|
||||||
from .tbs import TBSIE
|
from .tbs import TBSIE
|
||||||
from .tdslifeway import TDSLifewayIE
|
from .tdslifeway import TDSLifewayIE
|
||||||
|
from .teachable import (
|
||||||
|
TeachableIE,
|
||||||
|
TeachableCourseIE,
|
||||||
|
)
|
||||||
from .teachertube import (
|
from .teachertube import (
|
||||||
TeacherTubeIE,
|
TeacherTubeIE,
|
||||||
TeacherTubeUserIE,
|
TeacherTubeUserIE,
|
||||||
@ -1120,6 +1134,10 @@ from .thisamericanlife import ThisAmericanLifeIE
|
|||||||
from .thisav import ThisAVIE
|
from .thisav import ThisAVIE
|
||||||
from .thisoldhouse import ThisOldHouseIE
|
from .thisoldhouse import ThisOldHouseIE
|
||||||
from .threeqsdn import ThreeQSDNIE
|
from .threeqsdn import ThreeQSDNIE
|
||||||
|
from .tiktok import (
|
||||||
|
TikTokIE,
|
||||||
|
TikTokUserIE,
|
||||||
|
)
|
||||||
from .tinypic import TinyPicIE
|
from .tinypic import TinyPicIE
|
||||||
from .tmz import (
|
from .tmz import (
|
||||||
TMZIE,
|
TMZIE,
|
||||||
@ -1227,10 +1245,6 @@ from .uplynk import (
|
|||||||
UplynkIE,
|
UplynkIE,
|
||||||
UplynkPreplayIE,
|
UplynkPreplayIE,
|
||||||
)
|
)
|
||||||
from .upskill import (
|
|
||||||
UpskillIE,
|
|
||||||
UpskillCourseIE,
|
|
||||||
)
|
|
||||||
from .urort import UrortIE
|
from .urort import UrortIE
|
||||||
from .urplay import URPlayIE
|
from .urplay import URPlayIE
|
||||||
from .usanetwork import USANetworkIE
|
from .usanetwork import USANetworkIE
|
||||||
@ -1299,6 +1313,7 @@ from .vimeo import (
|
|||||||
VimeoReviewIE,
|
VimeoReviewIE,
|
||||||
VimeoUserIE,
|
VimeoUserIE,
|
||||||
VimeoWatchLaterIE,
|
VimeoWatchLaterIE,
|
||||||
|
VHXEmbedIE,
|
||||||
)
|
)
|
||||||
from .vimple import VimpleIE
|
from .vimple import VimpleIE
|
||||||
from .vine import (
|
from .vine import (
|
||||||
@ -1386,6 +1401,7 @@ from .wsj import (
|
|||||||
WSJIE,
|
WSJIE,
|
||||||
WSJArticleIE,
|
WSJArticleIE,
|
||||||
)
|
)
|
||||||
|
from .wwe import WWEIE
|
||||||
from .xbef import XBefIE
|
from .xbef import XBefIE
|
||||||
from .xboxclips import XboxClipsIE
|
from .xboxclips import XboxClipsIE
|
||||||
from .xfileshare import XFileShareIE
|
from .xfileshare import XFileShareIE
|
||||||
@ -1478,3 +1494,4 @@ from .zattoo import (
|
|||||||
)
|
)
|
||||||
from .zdf import ZDFIE, ZDFChannelIE
|
from .zdf import ZDFIE, ZDFChannelIE
|
||||||
from .zingmp3 import ZingMp3IE
|
from .zingmp3 import ZingMp3IE
|
||||||
|
from .zype import ZypeIE
|
||||||
|
@ -1,43 +1,33 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
|
||||||
smuggle_url,
|
|
||||||
update_url_query,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class FoxSportsIE(InfoExtractor):
|
class FoxSportsIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?foxsports\.com/(?:[^/]+/)*(?P<id>[^/]+)'
|
_VALID_URL = r'https?://(?:www\.)?foxsports\.com/(?:[^/]+/)*video/(?P<id>\d+)'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.foxsports.com/tennessee/video/432609859715',
|
'url': 'http://www.foxsports.com/tennessee/video/432609859715',
|
||||||
'md5': 'b49050e955bebe32c301972e4012ac17',
|
'md5': 'b49050e955bebe32c301972e4012ac17',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'bwduI3X_TgUB',
|
'id': '432609859715',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Courtney Lee on going up 2-0 in series vs. Blazers',
|
'title': 'Courtney Lee on going up 2-0 in series vs. Blazers',
|
||||||
'description': 'Courtney Lee talks about Memphis being focused.',
|
'description': 'Courtney Lee talks about Memphis being focused.',
|
||||||
'upload_date': '20150423',
|
# TODO: fix timestamp
|
||||||
'timestamp': 1429761109,
|
'upload_date': '19700101', # '20150423',
|
||||||
|
# 'timestamp': 1429761109,
|
||||||
'uploader': 'NEWA-FNG-FOXSPORTS',
|
'uploader': 'NEWA-FNG-FOXSPORTS',
|
||||||
},
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
'add_ie': ['ThePlatform'],
|
'add_ie': ['ThePlatform'],
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
return self.url_result(
|
||||||
|
'https://feed.theplatform.com/f/BKQ29B/foxsports-all?byId=' + video_id, 'ThePlatformFeed')
|
||||||
config = self._parse_json(
|
|
||||||
self._html_search_regex(
|
|
||||||
r"""class="[^"]*(?:fs-player|platformPlayer-wrapper)[^"]*".+?data-player-config='([^']+)'""",
|
|
||||||
webpage, 'data player config'),
|
|
||||||
video_id)
|
|
||||||
|
|
||||||
return self.url_result(smuggle_url(update_url_query(
|
|
||||||
config['releaseURL'], {
|
|
||||||
'mbr': 'true',
|
|
||||||
'switch': 'http',
|
|
||||||
}), {'force_smil_url': True}))
|
|
||||||
|
@ -14,7 +14,7 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class GameSpotIE(OnceIE):
|
class GameSpotIE(OnceIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?gamespot\.com/(?:video|article)s/(?:[^/]+/\d+-|embed/)(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?gamespot\.com/(?:video|article|review)s/(?:[^/]+/\d+-|embed/)(?P<id>\d+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.gamespot.com/videos/arma-3-community-guide-sitrep-i/2300-6410818/',
|
'url': 'http://www.gamespot.com/videos/arma-3-community-guide-sitrep-i/2300-6410818/',
|
||||||
'md5': 'b2a30deaa8654fcccd43713a6b6a4825',
|
'md5': 'b2a30deaa8654fcccd43713a6b6a4825',
|
||||||
@ -41,6 +41,9 @@ class GameSpotIE(OnceIE):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'https://www.gamespot.com/articles/the-last-of-us-2-receives-new-ps4-trailer/1100-6454469/',
|
'url': 'https://www.gamespot.com/articles/the-last-of-us-2-receives-new-ps4-trailer/1100-6454469/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.gamespot.com/reviews/gears-of-war-review/1900-6161188/',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -109,11 +109,13 @@ from .vice import ViceIE
|
|||||||
from .xfileshare import XFileShareIE
|
from .xfileshare import XFileShareIE
|
||||||
from .cloudflarestream import CloudflareStreamIE
|
from .cloudflarestream import CloudflareStreamIE
|
||||||
from .peertube import PeerTubeIE
|
from .peertube import PeerTubeIE
|
||||||
|
from .teachable import TeachableIE
|
||||||
from .indavideo import IndavideoEmbedIE
|
from .indavideo import IndavideoEmbedIE
|
||||||
from .apa import APAIE
|
from .apa import APAIE
|
||||||
from .foxnews import FoxNewsIE
|
from .foxnews import FoxNewsIE
|
||||||
from .viqeo import ViqeoIE
|
from .viqeo import ViqeoIE
|
||||||
from .expressen import ExpressenIE
|
from .expressen import ExpressenIE
|
||||||
|
from .zype import ZypeIE
|
||||||
|
|
||||||
|
|
||||||
class GenericIE(InfoExtractor):
|
class GenericIE(InfoExtractor):
|
||||||
@ -2070,6 +2072,20 @@ class GenericIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
'playlist_count': 6,
|
'playlist_count': 6,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
# Zype embed
|
||||||
|
'url': 'https://www.cookscountry.com/episode/554-smoky-barbecue-favorites',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5b400b834b32992a310622b9',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Smoky Barbecue Favorites',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpe?g',
|
||||||
|
},
|
||||||
|
'add_ie': [ZypeIE.ie_key()],
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
# videojs embed
|
# videojs embed
|
||||||
'url': 'https://video.sibnet.ru/shell.php?videoid=3422904',
|
'url': 'https://video.sibnet.ru/shell.php?videoid=3422904',
|
||||||
@ -3097,6 +3113,10 @@ class GenericIE(InfoExtractor):
|
|||||||
return self.playlist_from_matches(
|
return self.playlist_from_matches(
|
||||||
peertube_urls, video_id, video_title, ie=PeerTubeIE.ie_key())
|
peertube_urls, video_id, video_title, ie=PeerTubeIE.ie_key())
|
||||||
|
|
||||||
|
teachable_url = TeachableIE._extract_url(webpage, url)
|
||||||
|
if teachable_url:
|
||||||
|
return self.url_result(teachable_url)
|
||||||
|
|
||||||
indavideo_urls = IndavideoEmbedIE._extract_urls(webpage)
|
indavideo_urls = IndavideoEmbedIE._extract_urls(webpage)
|
||||||
if indavideo_urls:
|
if indavideo_urls:
|
||||||
return self.playlist_from_matches(
|
return self.playlist_from_matches(
|
||||||
@ -3129,6 +3149,11 @@ class GenericIE(InfoExtractor):
|
|||||||
return self.playlist_from_matches(
|
return self.playlist_from_matches(
|
||||||
expressen_urls, video_id, video_title, ie=ExpressenIE.ie_key())
|
expressen_urls, video_id, video_title, ie=ExpressenIE.ie_key())
|
||||||
|
|
||||||
|
zype_urls = ZypeIE._extract_urls(webpage)
|
||||||
|
if zype_urls:
|
||||||
|
return self.playlist_from_matches(
|
||||||
|
zype_urls, video_id, video_title, ie=ZypeIE.ie_key())
|
||||||
|
|
||||||
# Look for HTML5 media
|
# Look for HTML5 media
|
||||||
entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
|
entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
|
||||||
if entries:
|
if entries:
|
||||||
|
@ -53,7 +53,7 @@ class GfycatIE(InfoExtractor):
|
|||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
gfy = self._download_json(
|
gfy = self._download_json(
|
||||||
'http://gfycat.com/cajax/get/%s' % video_id,
|
'https://api.gfycat.com/v1/gfycats/%s' % video_id,
|
||||||
video_id, 'Downloading video info')
|
video_id, 'Downloading video info')
|
||||||
if 'error' in gfy:
|
if 'error' in gfy:
|
||||||
raise ExtractorError('Gfycat said: ' + gfy['error'], expected=True)
|
raise ExtractorError('Gfycat said: ' + gfy['error'], expected=True)
|
||||||
|
@ -43,6 +43,7 @@ class HotStarIE(HotStarBaseIE):
|
|||||||
IE_NAME = 'hotstar'
|
IE_NAME = 'hotstar'
|
||||||
_VALID_URL = r'https?://(?:www\.)?hotstar\.com/(?:.+?[/-])?(?P<id>\d{10})'
|
_VALID_URL = r'https?://(?:www\.)?hotstar\.com/(?:.+?[/-])?(?P<id>\d{10})'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
# contentData
|
||||||
'url': 'https://www.hotstar.com/can-you-not-spread-rumours/1000076273',
|
'url': 'https://www.hotstar.com/can-you-not-spread-rumours/1000076273',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1000076273',
|
'id': '1000076273',
|
||||||
@ -57,6 +58,10 @@ class HotStarIE(HotStarBaseIE):
|
|||||||
# m3u8 download
|
# m3u8 download
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
# contentDetail
|
||||||
|
'url': 'https://www.hotstar.com/movies/radha-gopalam/1000057157',
|
||||||
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.hotstar.com/sports/cricket/rajitha-sizzles-on-debut-with-329/2001477583',
|
'url': 'http://www.hotstar.com/sports/cricket/rajitha-sizzles-on-debut-with-329/2001477583',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -74,10 +79,15 @@ class HotStarIE(HotStarBaseIE):
|
|||||||
r'<script>window\.APP_STATE\s*=\s*({.+?})</script>',
|
r'<script>window\.APP_STATE\s*=\s*({.+?})</script>',
|
||||||
webpage, 'app state'), video_id)
|
webpage, 'app state'), video_id)
|
||||||
video_data = {}
|
video_data = {}
|
||||||
|
getters = list(
|
||||||
|
lambda x, k=k: x['initialState']['content%s' % k]['content']
|
||||||
|
for k in ('Data', 'Detail')
|
||||||
|
)
|
||||||
for v in app_state.values():
|
for v in app_state.values():
|
||||||
content = try_get(v, lambda x: x['initialState']['contentData']['content'], dict)
|
content = try_get(v, getters, dict)
|
||||||
if content and content.get('contentId') == video_id:
|
if content and content.get('contentId') == video_id:
|
||||||
video_data = content
|
video_data = content
|
||||||
|
break
|
||||||
|
|
||||||
title = video_data['title']
|
title = video_data['title']
|
||||||
|
|
||||||
|
@ -12,7 +12,7 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class ImgurIE(InfoExtractor):
|
class ImgurIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:(?:gallery|(?:topic|r)/[^/]+)/)?(?P<id>[a-zA-Z0-9]{6,})(?:[/?#&]+|\.[a-z0-9]+)?$'
|
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?!(?:a|gallery|(?:t(?:opic)?|r)/[^/]+)/)(?P<id>[a-zA-Z0-9]+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://i.imgur.com/A61SaA1.gifv',
|
'url': 'https://i.imgur.com/A61SaA1.gifv',
|
||||||
@ -20,28 +20,9 @@ class ImgurIE(InfoExtractor):
|
|||||||
'id': 'A61SaA1',
|
'id': 'A61SaA1',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$',
|
'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$',
|
||||||
'description': 'Imgur: The magic of the Internet',
|
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://imgur.com/A61SaA1',
|
'url': 'https://imgur.com/A61SaA1',
|
||||||
'info_dict': {
|
|
||||||
'id': 'A61SaA1',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$',
|
|
||||||
'description': 'Imgur: The magic of the Internet',
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'https://imgur.com/gallery/YcAQlkx',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'YcAQlkx',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Classic Steve Carell gif...cracks me up everytime....damn the repost downvotes....',
|
|
||||||
}
|
|
||||||
}, {
|
|
||||||
'url': 'http://imgur.com/topic/Funny/N8rOudd',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'http://imgur.com/r/aww/VQcQPhM',
|
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://i.imgur.com/crGpqCV.mp4',
|
'url': 'https://i.imgur.com/crGpqCV.mp4',
|
||||||
@ -50,8 +31,8 @@ class ImgurIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
gifv_url = 'https://i.imgur.com/{id}.gifv'.format(id=video_id)
|
webpage = self._download_webpage(
|
||||||
webpage = self._download_webpage(gifv_url, video_id)
|
'https://i.imgur.com/{id}.gifv'.format(id=video_id), video_id)
|
||||||
|
|
||||||
width = int_or_none(self._og_search_property(
|
width = int_or_none(self._og_search_property(
|
||||||
'video:width', webpage, default=None))
|
'video:width', webpage, default=None))
|
||||||
@ -72,7 +53,6 @@ class ImgurIE(InfoExtractor):
|
|||||||
'format_id': m.group('type').partition('/')[2],
|
'format_id': m.group('type').partition('/')[2],
|
||||||
'url': self._proto_relative_url(m.group('src')),
|
'url': self._proto_relative_url(m.group('src')),
|
||||||
'ext': mimetype2ext(m.group('type')),
|
'ext': mimetype2ext(m.group('type')),
|
||||||
'acodec': 'none',
|
|
||||||
'width': width,
|
'width': width,
|
||||||
'height': height,
|
'height': height,
|
||||||
'http_headers': {
|
'http_headers': {
|
||||||
@ -107,44 +87,64 @@ class ImgurIE(InfoExtractor):
|
|||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'description': self._og_search_description(webpage, default=None),
|
|
||||||
'title': self._og_search_title(webpage),
|
'title': self._og_search_title(webpage),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class ImgurAlbumIE(InfoExtractor):
|
class ImgurGalleryIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:(?:a|gallery|topic/[^/]+)/)?(?P<id>[a-zA-Z0-9]{5})(?:[/?#&]+)?$'
|
IE_NAME = 'imgur:gallery'
|
||||||
|
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:gallery|(?:t(?:opic)?|r)/[^/]+)/(?P<id>[a-zA-Z0-9]+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://imgur.com/gallery/Q95ko',
|
'url': 'http://imgur.com/gallery/Q95ko',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'Q95ko',
|
'id': 'Q95ko',
|
||||||
|
'title': 'Adding faces make every GIF better',
|
||||||
},
|
},
|
||||||
'playlist_count': 25,
|
'playlist_count': 25,
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://imgur.com/a/j6Orj',
|
'url': 'http://imgur.com/topic/Aww/ll5Vk',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://imgur.com/topic/Aww/ll5Vk',
|
'url': 'https://imgur.com/gallery/YcAQlkx',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'YcAQlkx',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Classic Steve Carell gif...cracks me up everytime....damn the repost downvotes....',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://imgur.com/topic/Funny/N8rOudd',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://imgur.com/r/aww/VQcQPhM',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
album_id = self._match_id(url)
|
gallery_id = self._match_id(url)
|
||||||
|
|
||||||
album_images = self._download_json(
|
data = self._download_json(
|
||||||
'http://imgur.com/gallery/%s/album_images/hit.json?all=true' % album_id,
|
'https://imgur.com/gallery/%s.json' % gallery_id,
|
||||||
album_id, fatal=False)
|
gallery_id)['data']['image']
|
||||||
|
|
||||||
if album_images:
|
if data.get('is_album'):
|
||||||
data = album_images.get('data')
|
entries = [
|
||||||
if data and isinstance(data, dict):
|
self.url_result('http://imgur.com/%s' % image['hash'], ImgurIE.ie_key(), image['hash'])
|
||||||
images = data.get('images')
|
for image in data['album_images']['images'] if image.get('hash')]
|
||||||
if images and isinstance(images, list):
|
return self.playlist_result(entries, gallery_id, data.get('title'), data.get('description'))
|
||||||
entries = [
|
|
||||||
self.url_result('http://imgur.com/%s' % image['hash'])
|
|
||||||
for image in images if image.get('hash')]
|
|
||||||
return self.playlist_result(entries, album_id)
|
|
||||||
|
|
||||||
# Fallback to single video
|
return self.url_result('http://imgur.com/%s' % gallery_id, ImgurIE.ie_key(), gallery_id)
|
||||||
return self.url_result('http://imgur.com/%s' % album_id, ImgurIE.ie_key())
|
|
||||||
|
|
||||||
|
class ImgurAlbumIE(ImgurGalleryIE):
|
||||||
|
IE_NAME = 'imgur:album'
|
||||||
|
_VALID_URL = r'https?://(?:i\.)?imgur\.com/a/(?P<id>[a-zA-Z0-9]+)'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://imgur.com/a/j6Orj',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'j6Orj',
|
||||||
|
'title': 'A Literary Analysis of "Star Wars: The Force Awakens"',
|
||||||
|
},
|
||||||
|
'playlist_count': 12,
|
||||||
|
}]
|
||||||
|
@ -12,7 +12,7 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class IPrimaIE(InfoExtractor):
|
class IPrimaIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:play|prima)\.iprima\.cz/(?:.+/)?(?P<id>[^?#]+)'
|
_VALID_URL = r'https?://(?:[^/]+)\.iprima\.cz/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||||
_GEO_BYPASS = False
|
_GEO_BYPASS = False
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
@ -41,6 +41,24 @@ class IPrimaIE(InfoExtractor):
|
|||||||
# iframe prima.iprima.cz
|
# iframe prima.iprima.cz
|
||||||
'url': 'https://prima.iprima.cz/porady/jak-se-stavi-sen/rodina-rathousova-praha',
|
'url': 'https://prima.iprima.cz/porady/jak-se-stavi-sen/rodina-rathousova-praha',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.iprima.cz/filmy/desne-rande',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://zoom.iprima.cz/10-nejvetsich-tajemstvi-zahad/posvatna-mista-a-stavby',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://krimi.iprima.cz/mraz-0/sebevrazdy',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://cool.iprima.cz/derava-silnice-nevadi',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://love.iprima.cz/laska-az-za-hrob/slib-dany-bratrovi',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://autosalon.iprima.cz/motorsport/7-epizoda-1',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -61,7 +61,7 @@ class JojIE(InfoExtractor):
|
|||||||
|
|
||||||
bitrates = self._parse_json(
|
bitrates = self._parse_json(
|
||||||
self._search_regex(
|
self._search_regex(
|
||||||
r'(?s)bitrates\s*=\s*({.+?});', webpage, 'bitrates',
|
r'(?s)(?:src|bitrates)\s*=\s*({.+?});', webpage, 'bitrates',
|
||||||
default='{}'),
|
default='{}'),
|
||||||
video_id, transform_source=js_to_json, fatal=False)
|
video_id, transform_source=js_to_json, fatal=False)
|
||||||
|
|
||||||
|
@ -192,6 +192,8 @@ class KalturaIE(InfoExtractor):
|
|||||||
'entryId': video_id,
|
'entryId': video_id,
|
||||||
'service': 'baseentry',
|
'service': 'baseentry',
|
||||||
'ks': '{1:result:ks}',
|
'ks': '{1:result:ks}',
|
||||||
|
'responseProfile:fields': 'createdAt,dataUrl,duration,name,plays,thumbnailUrl,userId',
|
||||||
|
'responseProfile:type': 1,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'action': 'getbyentryid',
|
'action': 'getbyentryid',
|
||||||
|
229
youtube_dl/extractor/lecturio.py
Normal file
229
youtube_dl/extractor/lecturio.py
Normal file
@ -0,0 +1,229 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
|
extract_attributes,
|
||||||
|
ExtractorError,
|
||||||
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
|
str_or_none,
|
||||||
|
url_or_none,
|
||||||
|
urlencode_postdata,
|
||||||
|
urljoin,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class LecturioBaseIE(InfoExtractor):
|
||||||
|
_LOGIN_URL = 'https://app.lecturio.com/en/login'
|
||||||
|
_NETRC_MACHINE = 'lecturio'
|
||||||
|
|
||||||
|
def _real_initialize(self):
|
||||||
|
self._login()
|
||||||
|
|
||||||
|
def _login(self):
|
||||||
|
username, password = self._get_login_info()
|
||||||
|
if username is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
# Sets some cookies
|
||||||
|
_, urlh = self._download_webpage_handle(
|
||||||
|
self._LOGIN_URL, None, 'Downloading login popup')
|
||||||
|
|
||||||
|
def is_logged(url_handle):
|
||||||
|
return self._LOGIN_URL not in compat_str(url_handle.geturl())
|
||||||
|
|
||||||
|
# Already logged in
|
||||||
|
if is_logged(urlh):
|
||||||
|
return
|
||||||
|
|
||||||
|
login_form = {
|
||||||
|
'signin[email]': username,
|
||||||
|
'signin[password]': password,
|
||||||
|
'signin[remember]': 'on',
|
||||||
|
}
|
||||||
|
|
||||||
|
response, urlh = self._download_webpage_handle(
|
||||||
|
self._LOGIN_URL, None, 'Logging in',
|
||||||
|
data=urlencode_postdata(login_form))
|
||||||
|
|
||||||
|
# Logged in successfully
|
||||||
|
if is_logged(urlh):
|
||||||
|
return
|
||||||
|
|
||||||
|
errors = self._html_search_regex(
|
||||||
|
r'(?s)<ul[^>]+class=["\']error_list[^>]+>(.+?)</ul>', response,
|
||||||
|
'errors', default=None)
|
||||||
|
if errors:
|
||||||
|
raise ExtractorError('Unable to login: %s' % errors, expected=True)
|
||||||
|
raise ExtractorError('Unable to log in')
|
||||||
|
|
||||||
|
|
||||||
|
class LecturioIE(LecturioBaseIE):
|
||||||
|
_VALID_URL = r'''(?x)
|
||||||
|
https://
|
||||||
|
(?:
|
||||||
|
app\.lecturio\.com/[^/]+/(?P<id>[^/?#&]+)\.lecture|
|
||||||
|
(?:www\.)?lecturio\.de/[^/]+/(?P<id_de>[^/?#&]+)\.vortrag
|
||||||
|
)
|
||||||
|
'''
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://app.lecturio.com/medical-courses/important-concepts-and-terms-introduction-to-microbiology.lecture#tab/videos',
|
||||||
|
'md5': 'f576a797a5b7a5e4e4bbdfc25a6a6870',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '39634',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Important Concepts and Terms – Introduction to Microbiology',
|
||||||
|
},
|
||||||
|
'skip': 'Requires lecturio account credentials',
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.lecturio.de/jura/oeffentliches-recht-staatsexamen.vortrag',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
_CC_LANGS = {
|
||||||
|
'German': 'de',
|
||||||
|
'English': 'en',
|
||||||
|
'Spanish': 'es',
|
||||||
|
'French': 'fr',
|
||||||
|
'Polish': 'pl',
|
||||||
|
'Russian': 'ru',
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
display_id = mobj.group('id') or mobj.group('id_de')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(
|
||||||
|
'https://app.lecturio.com/en/lecture/%s/player.html' % display_id,
|
||||||
|
display_id)
|
||||||
|
|
||||||
|
lecture_id = self._search_regex(
|
||||||
|
r'lecture_id\s*=\s*(?:L_)?(\d+)', webpage, 'lecture id')
|
||||||
|
|
||||||
|
api_url = self._search_regex(
|
||||||
|
r'lectureDataLink\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
|
||||||
|
'api url', group='url')
|
||||||
|
|
||||||
|
video = self._download_json(api_url, display_id)
|
||||||
|
|
||||||
|
title = video['title'].strip()
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for format_ in video['content']['media']:
|
||||||
|
if not isinstance(format_, dict):
|
||||||
|
continue
|
||||||
|
file_ = format_.get('file')
|
||||||
|
if not file_:
|
||||||
|
continue
|
||||||
|
ext = determine_ext(file_)
|
||||||
|
if ext == 'smil':
|
||||||
|
# smil contains only broken RTMP formats anyway
|
||||||
|
continue
|
||||||
|
file_url = url_or_none(file_)
|
||||||
|
if not file_url:
|
||||||
|
continue
|
||||||
|
label = str_or_none(format_.get('label'))
|
||||||
|
filesize = int_or_none(format_.get('fileSize'))
|
||||||
|
formats.append({
|
||||||
|
'url': file_url,
|
||||||
|
'format_id': label,
|
||||||
|
'filesize': float_or_none(filesize, invscale=1000)
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
automatic_captions = {}
|
||||||
|
cc = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'subtitleUrls\s*:\s*({.+?})\s*,', webpage, 'subtitles',
|
||||||
|
default='{}'), display_id, fatal=False)
|
||||||
|
for cc_label, cc_url in cc.items():
|
||||||
|
cc_url = url_or_none(cc_url)
|
||||||
|
if not cc_url:
|
||||||
|
continue
|
||||||
|
lang = self._search_regex(
|
||||||
|
r'/([a-z]{2})_', cc_url, 'lang',
|
||||||
|
default=cc_label.split()[0] if cc_label else 'en')
|
||||||
|
original_lang = self._search_regex(
|
||||||
|
r'/[a-z]{2}_([a-z]{2})_', cc_url, 'original lang',
|
||||||
|
default=None)
|
||||||
|
sub_dict = (automatic_captions
|
||||||
|
if 'auto-translated' in cc_label or original_lang
|
||||||
|
else subtitles)
|
||||||
|
sub_dict.setdefault(self._CC_LANGS.get(lang, lang), []).append({
|
||||||
|
'url': cc_url,
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': lecture_id,
|
||||||
|
'title': title,
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
'automatic_captions': automatic_captions,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class LecturioCourseIE(LecturioBaseIE):
|
||||||
|
_VALID_URL = r'https://app\.lecturio\.com/[^/]+/(?P<id>[^/?#&]+)\.course'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://app.lecturio.com/medical-courses/microbiology-introduction.course#/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'microbiology-introduction',
|
||||||
|
'title': 'Microbiology: Introduction',
|
||||||
|
},
|
||||||
|
'playlist_count': 45,
|
||||||
|
'skip': 'Requires lecturio account credentials',
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
for mobj in re.finditer(
|
||||||
|
r'(?s)<[^>]+\bdata-url=(["\'])(?:(?!\1).)+\.lecture\b[^>]+>',
|
||||||
|
webpage):
|
||||||
|
params = extract_attributes(mobj.group(0))
|
||||||
|
lecture_url = urljoin(url, params.get('data-url'))
|
||||||
|
lecture_id = params.get('data-id')
|
||||||
|
entries.append(self.url_result(
|
||||||
|
lecture_url, ie=LecturioIE.ie_key(), video_id=lecture_id))
|
||||||
|
|
||||||
|
title = self._search_regex(
|
||||||
|
r'<span[^>]+class=["\']content-title[^>]+>([^<]+)', webpage,
|
||||||
|
'title', default=None)
|
||||||
|
|
||||||
|
return self.playlist_result(entries, display_id, title)
|
||||||
|
|
||||||
|
|
||||||
|
class LecturioDeCourseIE(LecturioBaseIE):
|
||||||
|
_VALID_URL = r'https://(?:www\.)?lecturio\.de/[^/]+/(?P<id>[^/?#&]+)\.kurs'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://www.lecturio.de/jura/grundrechte.kurs',
|
||||||
|
'only_matching': True,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
for mobj in re.finditer(
|
||||||
|
r'(?s)<td[^>]+\bdata-lecture-id=["\'](?P<id>\d+).+?\bhref=(["\'])(?P<url>(?:(?!\2).)+\.vortrag)\b[^>]+>',
|
||||||
|
webpage):
|
||||||
|
lecture_url = urljoin(url, mobj.group('url'))
|
||||||
|
lecture_id = mobj.group('id')
|
||||||
|
entries.append(self.url_result(
|
||||||
|
lecture_url, ie=LecturioIE.ie_key(), video_id=lecture_id))
|
||||||
|
|
||||||
|
title = self._search_regex(
|
||||||
|
r'<h1[^>]*>([^<]+)', webpage, 'title', default=None)
|
||||||
|
|
||||||
|
return self.playlist_result(entries, display_id, title)
|
@ -16,16 +16,15 @@ from ..utils import (
|
|||||||
class LibraryOfCongressIE(InfoExtractor):
|
class LibraryOfCongressIE(InfoExtractor):
|
||||||
IE_NAME = 'loc'
|
IE_NAME = 'loc'
|
||||||
IE_DESC = 'Library of Congress'
|
IE_DESC = 'Library of Congress'
|
||||||
_VALID_URL = r'https?://(?:www\.)?loc\.gov/(?:item/|today/cyberlc/feature_wdesc\.php\?.*\brec=)(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://(?:www\.)?loc\.gov/(?:item/|today/cyberlc/feature_wdesc\.php\?.*\brec=)(?P<id>[0-9a-z_.]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# embedded via <div class="media-player"
|
# embedded via <div class="media-player"
|
||||||
'url': 'http://loc.gov/item/90716351/',
|
'url': 'http://loc.gov/item/90716351/',
|
||||||
'md5': '353917ff7f0255aa6d4b80a034833de8',
|
'md5': '6ec0ae8f07f86731b1b2ff70f046210a',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '90716351',
|
'id': '90716351',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': "Pa's trip to Mars",
|
'title': "Pa's trip to Mars",
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
|
||||||
'duration': 0,
|
'duration': 0,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
},
|
},
|
||||||
@ -57,6 +56,12 @@ class LibraryOfCongressIE(InfoExtractor):
|
|||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.loc.gov/item/ihas.200197114/',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.loc.gov/item/afc1981005_afs20503/',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@ -67,12 +72,13 @@ class LibraryOfCongressIE(InfoExtractor):
|
|||||||
(r'id=(["\'])media-player-(?P<id>.+?)\1',
|
(r'id=(["\'])media-player-(?P<id>.+?)\1',
|
||||||
r'<video[^>]+id=(["\'])uuid-(?P<id>.+?)\1',
|
r'<video[^>]+id=(["\'])uuid-(?P<id>.+?)\1',
|
||||||
r'<video[^>]+data-uuid=(["\'])(?P<id>.+?)\1',
|
r'<video[^>]+data-uuid=(["\'])(?P<id>.+?)\1',
|
||||||
r'mediaObjectId\s*:\s*(["\'])(?P<id>.+?)\1'),
|
r'mediaObjectId\s*:\s*(["\'])(?P<id>.+?)\1',
|
||||||
|
r'data-tab="share-media-(?P<id>[0-9A-F]{32})"'),
|
||||||
webpage, 'media id', group='id')
|
webpage, 'media id', group='id')
|
||||||
|
|
||||||
data = self._download_json(
|
data = self._download_json(
|
||||||
'https://media.loc.gov/services/v1/media?id=%s&context=json' % media_id,
|
'https://media.loc.gov/services/v1/media?id=%s&context=json' % media_id,
|
||||||
video_id)['mediaObject']
|
media_id)['mediaObject']
|
||||||
|
|
||||||
derivative = data['derivatives'][0]
|
derivative = data['derivatives'][0]
|
||||||
media_url = derivative['derivativeUrl']
|
media_url = derivative['derivativeUrl']
|
||||||
@ -89,25 +95,29 @@ class LibraryOfCongressIE(InfoExtractor):
|
|||||||
if ext not in ('mp4', 'mp3'):
|
if ext not in ('mp4', 'mp3'):
|
||||||
media_url += '.mp4' if is_video else '.mp3'
|
media_url += '.mp4' if is_video else '.mp3'
|
||||||
|
|
||||||
if 'vod/mp4:' in media_url:
|
formats = []
|
||||||
formats = [{
|
if '/vod/mp4:' in media_url:
|
||||||
'url': media_url.replace('vod/mp4:', 'hls-vod/media/') + '.m3u8',
|
formats.append({
|
||||||
|
'url': media_url.replace('/vod/mp4:', '/hls-vod/media/') + '.m3u8',
|
||||||
'format_id': 'hls',
|
'format_id': 'hls',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'protocol': 'm3u8_native',
|
'protocol': 'm3u8_native',
|
||||||
'quality': 1,
|
'quality': 1,
|
||||||
}]
|
})
|
||||||
elif 'vod/mp3:' in media_url:
|
http_format = {
|
||||||
formats = [{
|
'url': re.sub(r'(://[^/]+/)(?:[^/]+/)*(?:mp4|mp3):', r'\1', media_url),
|
||||||
'url': media_url.replace('vod/mp3:', ''),
|
'format_id': 'http',
|
||||||
'vcodec': 'none',
|
'quality': 1,
|
||||||
}]
|
}
|
||||||
|
if not is_video:
|
||||||
|
http_format['vcodec'] = 'none'
|
||||||
|
formats.append(http_format)
|
||||||
|
|
||||||
download_urls = set()
|
download_urls = set()
|
||||||
for m in re.finditer(
|
for m in re.finditer(
|
||||||
r'<option[^>]+value=(["\'])(?P<url>.+?)\1[^>]+data-file-download=[^>]+>\s*(?P<id>.+?)(?:(?: |\s+)\((?P<size>.+?)\))?\s*<', webpage):
|
r'<option[^>]+value=(["\'])(?P<url>.+?)\1[^>]+data-file-download=[^>]+>\s*(?P<id>.+?)(?:(?: |\s+)\((?P<size>.+?)\))?\s*<', webpage):
|
||||||
format_id = m.group('id').lower()
|
format_id = m.group('id').lower()
|
||||||
if format_id == 'gif':
|
if format_id in ('gif', 'jpeg'):
|
||||||
continue
|
continue
|
||||||
download_url = m.group('url')
|
download_url = m.group('url')
|
||||||
if download_url in download_urls:
|
if download_url in download_urls:
|
||||||
|
@ -15,7 +15,7 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class LyndaBaseIE(InfoExtractor):
|
class LyndaBaseIE(InfoExtractor):
|
||||||
_SIGNIN_URL = 'https://www.lynda.com/signin'
|
_SIGNIN_URL = 'https://www.lynda.com/signin/lynda'
|
||||||
_PASSWORD_URL = 'https://www.lynda.com/signin/password'
|
_PASSWORD_URL = 'https://www.lynda.com/signin/password'
|
||||||
_USER_URL = 'https://www.lynda.com/signin/user'
|
_USER_URL = 'https://www.lynda.com/signin/user'
|
||||||
_ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.'
|
_ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.'
|
||||||
|
@ -21,7 +21,7 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class MediasiteIE(InfoExtractor):
|
class MediasiteIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?xi)https?://[^/]+/Mediasite/Play/(?P<id>[0-9a-f]{32,34})(?P<query>\?[^#]+|)'
|
_VALID_URL = r'(?xi)https?://[^/]+/Mediasite/(?:Play|Showcase/(?:default|livebroadcast)/Presentation)/(?P<id>[0-9a-f]{32,34})(?P<query>\?[^#]+|)'
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'https://hitsmediaweb.h-its.org/mediasite/Play/2db6c271681e4f199af3c60d1f82869b1d',
|
'url': 'https://hitsmediaweb.h-its.org/mediasite/Play/2db6c271681e4f199af3c60d1f82869b1d',
|
||||||
@ -84,7 +84,15 @@ class MediasiteIE(InfoExtractor):
|
|||||||
'timestamp': 1333983600,
|
'timestamp': 1333983600,
|
||||||
'duration': 7794,
|
'duration': 7794,
|
||||||
}
|
}
|
||||||
}
|
},
|
||||||
|
{
|
||||||
|
'url': 'https://collegerama.tudelft.nl/Mediasite/Showcase/livebroadcast/Presentation/ada7020854f743c49fbb45c9ec7dbb351d',
|
||||||
|
'only_matching': True,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'https://mediasite.ntnu.no/Mediasite/Showcase/default/Presentation/7d8b913259334b688986e970fae6fcb31d',
|
||||||
|
'only_matching': True,
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
# look in Mediasite.Core.js (Mediasite.ContentStreamType[*])
|
# look in Mediasite.Core.js (Mediasite.ContentStreamType[*])
|
||||||
|
@ -161,11 +161,17 @@ class MixcloudIE(InfoExtractor):
|
|||||||
stream_info = info_json['streamInfo']
|
stream_info = info_json['streamInfo']
|
||||||
formats = []
|
formats = []
|
||||||
|
|
||||||
|
def decrypt_url(f_url):
|
||||||
|
for k in (key, 'IFYOUWANTTHEARTISTSTOGETPAIDDONOTDOWNLOADFROMMIXCLOUD'):
|
||||||
|
decrypted_url = self._decrypt_xor_cipher(k, f_url)
|
||||||
|
if re.search(r'^https?://[0-9a-z.]+/[0-9A-Za-z/.?=&_-]+$', decrypted_url):
|
||||||
|
return decrypted_url
|
||||||
|
|
||||||
for url_key in ('url', 'hlsUrl', 'dashUrl'):
|
for url_key in ('url', 'hlsUrl', 'dashUrl'):
|
||||||
format_url = stream_info.get(url_key)
|
format_url = stream_info.get(url_key)
|
||||||
if not format_url:
|
if not format_url:
|
||||||
continue
|
continue
|
||||||
decrypted = self._decrypt_xor_cipher(key, compat_b64decode(format_url))
|
decrypted = decrypt_url(compat_b64decode(format_url))
|
||||||
if not decrypted:
|
if not decrypted:
|
||||||
continue
|
continue
|
||||||
if url_key == 'hlsUrl':
|
if url_key == 'hlsUrl':
|
||||||
|
@ -9,10 +9,8 @@ from .theplatform import ThePlatformIE
|
|||||||
from .adobepass import AdobePassIE
|
from .adobepass import AdobePassIE
|
||||||
from ..compat import compat_urllib_parse_unquote
|
from ..compat import compat_urllib_parse_unquote
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
find_xpath_attr,
|
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
try_get,
|
try_get,
|
||||||
unescapeHTML,
|
|
||||||
update_url_query,
|
update_url_query,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
)
|
)
|
||||||
@ -269,27 +267,14 @@ class CSNNEIE(InfoExtractor):
|
|||||||
|
|
||||||
|
|
||||||
class NBCNewsIE(ThePlatformIE):
|
class NBCNewsIE(ThePlatformIE):
|
||||||
_VALID_URL = r'''(?x)https?://(?:www\.)?(?:nbcnews|today|msnbc)\.com/
|
_VALID_URL = r'(?x)https?://(?:www\.)?(?:nbcnews|today|msnbc)\.com/([^/]+/)*(?:.*-)?(?P<id>[^/?]+)'
|
||||||
(?:video/.+?/(?P<id>\d+)|
|
|
||||||
([^/]+/)*(?:.*-)?(?P<mpx_id>[^/?]+))
|
|
||||||
'''
|
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
|
||||||
'url': 'http://www.nbcnews.com/video/nbc-news/52753292',
|
|
||||||
'md5': '47abaac93c6eaf9ad37ee6c4463a5179',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '52753292',
|
|
||||||
'ext': 'flv',
|
|
||||||
'title': 'Crew emerges after four-month Mars food study',
|
|
||||||
'description': 'md5:24e632ffac72b35f8b67a12d1b6ddfc1',
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
'url': 'http://www.nbcnews.com/watch/nbcnews-com/how-twitter-reacted-to-the-snowden-interview-269389891880',
|
'url': 'http://www.nbcnews.com/watch/nbcnews-com/how-twitter-reacted-to-the-snowden-interview-269389891880',
|
||||||
'md5': 'af1adfa51312291a017720403826bb64',
|
'md5': 'af1adfa51312291a017720403826bb64',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'p_tweet_snow_140529',
|
'id': '269389891880',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'How Twitter Reacted To The Snowden Interview',
|
'title': 'How Twitter Reacted To The Snowden Interview',
|
||||||
'description': 'md5:65a0bd5d76fe114f3c2727aa3a81fe64',
|
'description': 'md5:65a0bd5d76fe114f3c2727aa3a81fe64',
|
||||||
@ -313,7 +298,7 @@ class NBCNewsIE(ThePlatformIE):
|
|||||||
'url': 'http://www.nbcnews.com/nightly-news/video/nightly-news-with-brian-williams-full-broadcast-february-4-394064451844',
|
'url': 'http://www.nbcnews.com/nightly-news/video/nightly-news-with-brian-williams-full-broadcast-february-4-394064451844',
|
||||||
'md5': '73135a2e0ef819107bbb55a5a9b2a802',
|
'md5': '73135a2e0ef819107bbb55a5a9b2a802',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'nn_netcast_150204',
|
'id': '394064451844',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Nightly News with Brian Williams Full Broadcast (February 4)',
|
'title': 'Nightly News with Brian Williams Full Broadcast (February 4)',
|
||||||
'description': 'md5:1c10c1eccbe84a26e5debb4381e2d3c5',
|
'description': 'md5:1c10c1eccbe84a26e5debb4381e2d3c5',
|
||||||
@ -326,7 +311,7 @@ class NBCNewsIE(ThePlatformIE):
|
|||||||
'url': 'http://www.nbcnews.com/business/autos/volkswagen-11-million-vehicles-could-have-suspect-software-emissions-scandal-n431456',
|
'url': 'http://www.nbcnews.com/business/autos/volkswagen-11-million-vehicles-could-have-suspect-software-emissions-scandal-n431456',
|
||||||
'md5': 'a49e173825e5fcd15c13fc297fced39d',
|
'md5': 'a49e173825e5fcd15c13fc297fced39d',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'x_lon_vwhorn_150922',
|
'id': '529953347624',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Volkswagen U.S. Chief:\xa0 We Have Totally Screwed Up',
|
'title': 'Volkswagen U.S. Chief:\xa0 We Have Totally Screwed Up',
|
||||||
'description': 'md5:c8be487b2d80ff0594c005add88d8351',
|
'description': 'md5:c8be487b2d80ff0594c005add88d8351',
|
||||||
@ -339,7 +324,7 @@ class NBCNewsIE(ThePlatformIE):
|
|||||||
'url': 'http://www.today.com/video/see-the-aurora-borealis-from-space-in-stunning-new-nasa-video-669831235788',
|
'url': 'http://www.today.com/video/see-the-aurora-borealis-from-space-in-stunning-new-nasa-video-669831235788',
|
||||||
'md5': '118d7ca3f0bea6534f119c68ef539f71',
|
'md5': '118d7ca3f0bea6534f119c68ef539f71',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'tdy_al_space_160420',
|
'id': '669831235788',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'See the aurora borealis from space in stunning new NASA video',
|
'title': 'See the aurora borealis from space in stunning new NASA video',
|
||||||
'description': 'md5:74752b7358afb99939c5f8bb2d1d04b1',
|
'description': 'md5:74752b7358afb99939c5f8bb2d1d04b1',
|
||||||
@ -352,7 +337,7 @@ class NBCNewsIE(ThePlatformIE):
|
|||||||
'url': 'http://www.msnbc.com/all-in-with-chris-hayes/watch/the-chaotic-gop-immigration-vote-314487875924',
|
'url': 'http://www.msnbc.com/all-in-with-chris-hayes/watch/the-chaotic-gop-immigration-vote-314487875924',
|
||||||
'md5': '6d236bf4f3dddc226633ce6e2c3f814d',
|
'md5': '6d236bf4f3dddc226633ce6e2c3f814d',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'n_hayes_Aimm_140801_272214',
|
'id': '314487875924',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'The chaotic GOP immigration vote',
|
'title': 'The chaotic GOP immigration vote',
|
||||||
'description': 'The Republican House votes on a border bill that has no chance of getting through the Senate or signed by the President and is drawing criticism from all sides.',
|
'description': 'The Republican House votes on a border bill that has no chance of getting through the Senate or signed by the President and is drawing criticism from all sides.',
|
||||||
@ -374,60 +359,22 @@ class NBCNewsIE(ThePlatformIE):
|
|||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
if not video_id.isdigit():
|
||||||
if video_id is not None:
|
|
||||||
all_info = self._download_xml('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id)
|
|
||||||
info = all_info.find('video')
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': info.find('headline').text,
|
|
||||||
'ext': 'flv',
|
|
||||||
'url': find_xpath_attr(info, 'media', 'type', 'flashVideo').text,
|
|
||||||
'description': info.find('caption').text,
|
|
||||||
'thumbnail': find_xpath_attr(info, 'media', 'type', 'thumbnail').text,
|
|
||||||
}
|
|
||||||
else:
|
|
||||||
# "feature" and "nightly-news" pages use theplatform.com
|
|
||||||
video_id = mobj.group('mpx_id')
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
filter_param = 'byId'
|
data = self._parse_json(self._search_regex(
|
||||||
bootstrap_json = self._search_regex(
|
r'window\.__data\s*=\s*({.+});', webpage,
|
||||||
[r'(?m)(?:var\s+(?:bootstrapJson|playlistData)|NEWS\.videoObj)\s*=\s*({.+});?\s*$',
|
'bootstrap json'), video_id)
|
||||||
r'videoObj\s*:\s*({.+})', r'data-video="([^"]+)"',
|
video_id = data['article']['content'][0]['primaryMedia']['video']['mpxMetadata']['id']
|
||||||
r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);'],
|
|
||||||
webpage, 'bootstrap json', default=None)
|
|
||||||
if bootstrap_json:
|
|
||||||
bootstrap = self._parse_json(
|
|
||||||
bootstrap_json, video_id, transform_source=unescapeHTML)
|
|
||||||
|
|
||||||
info = None
|
return {
|
||||||
if 'results' in bootstrap:
|
'_type': 'url_transparent',
|
||||||
info = bootstrap['results'][0]['video']
|
'id': video_id,
|
||||||
elif 'video' in bootstrap:
|
# http://feed.theplatform.com/f/2E2eJC/nbcnews also works
|
||||||
info = bootstrap['video']
|
'url': update_url_query('http://feed.theplatform.com/f/2E2eJC/nnd_NBCNews', {'byId': video_id}),
|
||||||
elif 'msnbcVideoInfo' in bootstrap:
|
'ie_key': 'ThePlatformFeed',
|
||||||
info = bootstrap['msnbcVideoInfo']['meta']
|
}
|
||||||
elif 'msnbcThePlatform' in bootstrap:
|
|
||||||
info = bootstrap['msnbcThePlatform']['videoPlayer']['video']
|
|
||||||
else:
|
|
||||||
info = bootstrap
|
|
||||||
|
|
||||||
if 'guid' in info:
|
|
||||||
video_id = info['guid']
|
|
||||||
filter_param = 'byGuid'
|
|
||||||
elif 'mpxId' in info:
|
|
||||||
video_id = info['mpxId']
|
|
||||||
|
|
||||||
return {
|
|
||||||
'_type': 'url_transparent',
|
|
||||||
'id': video_id,
|
|
||||||
# http://feed.theplatform.com/f/2E2eJC/nbcnews also works
|
|
||||||
'url': update_url_query('http://feed.theplatform.com/f/2E2eJC/nnd_NBCNews', {filter_param: video_id}),
|
|
||||||
'ie_key': 'ThePlatformFeed',
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class NBCOlympicsIE(InfoExtractor):
|
class NBCOlympicsIE(InfoExtractor):
|
||||||
|
@ -35,7 +35,7 @@ class NovaEmbedIE(InfoExtractor):
|
|||||||
|
|
||||||
bitrates = self._parse_json(
|
bitrates = self._parse_json(
|
||||||
self._search_regex(
|
self._search_regex(
|
||||||
r'(?s)bitrates\s*=\s*({.+?})\s*;', webpage, 'formats'),
|
r'(?s)(?:src|bitrates)\s*=\s*({.+?})\s*;', webpage, 'formats'),
|
||||||
video_id, transform_source=js_to_json)
|
video_id, transform_source=js_to_json)
|
||||||
|
|
||||||
QUALITIES = ('lq', 'mq', 'hq', 'hd')
|
QUALITIES = ('lq', 'mq', 'hq', 'hd')
|
||||||
|
@ -211,13 +211,13 @@ class NRKIE(NRKBaseIE):
|
|||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# video
|
# video
|
||||||
'url': 'http://www.nrk.no/video/PS*150533',
|
'url': 'http://www.nrk.no/video/PS*150533',
|
||||||
'md5': '2f7f6eeb2aacdd99885f355428715cfa',
|
'md5': '706f34cdf1322577589e369e522b50ef',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '150533',
|
'id': '150533',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Dompap og andre fugler i Piip-Show',
|
'title': 'Dompap og andre fugler i Piip-Show',
|
||||||
'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f',
|
'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f',
|
||||||
'duration': 263,
|
'duration': 262,
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
# audio
|
# audio
|
||||||
@ -248,7 +248,7 @@ class NRKTVIE(NRKBaseIE):
|
|||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://
|
https?://
|
||||||
(?:tv|radio)\.nrk(?:super)?\.no/
|
(?:tv|radio)\.nrk(?:super)?\.no/
|
||||||
(?:serie/[^/]+|program)/
|
(?:serie(?:/[^/]+){1,2}|program)/
|
||||||
(?![Ee]pisodes)%s
|
(?![Ee]pisodes)%s
|
||||||
(?:/\d{2}-\d{2}-\d{4})?
|
(?:/\d{2}-\d{2}-\d{4})?
|
||||||
(?:\#del=(?P<part_id>\d+))?
|
(?:\#del=(?P<part_id>\d+))?
|
||||||
@ -256,14 +256,14 @@ class NRKTVIE(NRKBaseIE):
|
|||||||
_API_HOSTS = ('psapi-ne.nrk.no', 'psapi-we.nrk.no')
|
_API_HOSTS = ('psapi-ne.nrk.no', 'psapi-we.nrk.no')
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
|
'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
|
||||||
'md5': '4e9ca6629f09e588ed240fb11619922a',
|
'md5': '9a167e54d04671eb6317a37b7bc8a280',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'MUHH48000314AA',
|
'id': 'MUHH48000314AA',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '20 spørsmål 23.05.2014',
|
'title': '20 spørsmål 23.05.2014',
|
||||||
'description': 'md5:bdea103bc35494c143c6a9acdd84887a',
|
'description': 'md5:bdea103bc35494c143c6a9acdd84887a',
|
||||||
'duration': 1741,
|
'duration': 1741,
|
||||||
'series': '20 spørsmål - TV',
|
'series': '20 spørsmål',
|
||||||
'episode': '23.05.2014',
|
'episode': '23.05.2014',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
@ -301,7 +301,7 @@ class NRKTVIE(NRKBaseIE):
|
|||||||
'id': 'MSPO40010515AH',
|
'id': 'MSPO40010515AH',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015 (Part 1)',
|
'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015 (Part 1)',
|
||||||
'description': 'md5:c03aba1e917561eface5214020551b7a',
|
'description': 'md5:1f97a41f05a9486ee00c56f35f82993d',
|
||||||
'duration': 772,
|
'duration': 772,
|
||||||
'series': 'Tour de Ski',
|
'series': 'Tour de Ski',
|
||||||
'episode': '06.01.2015',
|
'episode': '06.01.2015',
|
||||||
@ -314,7 +314,7 @@ class NRKTVIE(NRKBaseIE):
|
|||||||
'id': 'MSPO40010515BH',
|
'id': 'MSPO40010515BH',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015 (Part 2)',
|
'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015 (Part 2)',
|
||||||
'description': 'md5:c03aba1e917561eface5214020551b7a',
|
'description': 'md5:1f97a41f05a9486ee00c56f35f82993d',
|
||||||
'duration': 6175,
|
'duration': 6175,
|
||||||
'series': 'Tour de Ski',
|
'series': 'Tour de Ski',
|
||||||
'episode': '06.01.2015',
|
'episode': '06.01.2015',
|
||||||
@ -326,7 +326,7 @@ class NRKTVIE(NRKBaseIE):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'MSPO40010515',
|
'id': 'MSPO40010515',
|
||||||
'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015',
|
'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015',
|
||||||
'description': 'md5:c03aba1e917561eface5214020551b7a',
|
'description': 'md5:1f97a41f05a9486ee00c56f35f82993d',
|
||||||
},
|
},
|
||||||
'expected_warnings': ['Video is geo restricted'],
|
'expected_warnings': ['Video is geo restricted'],
|
||||||
}, {
|
}, {
|
||||||
@ -362,6 +362,9 @@ class NRKTVIE(NRKBaseIE):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'https://radio.nrk.no/serie/dagsnytt/NPUB21019315/12-07-2015#',
|
'url': 'https://radio.nrk.no/serie/dagsnytt/NPUB21019315/12-07-2015#',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://tv.nrk.no/serie/lindmo/2018/MUHU11006318/avspiller',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
|
||||||
@ -403,21 +406,35 @@ class NRKTVSerieBaseIE(InfoExtractor):
|
|||||||
def _extract_series(self, webpage, display_id, fatal=True):
|
def _extract_series(self, webpage, display_id, fatal=True):
|
||||||
config = self._parse_json(
|
config = self._parse_json(
|
||||||
self._search_regex(
|
self._search_regex(
|
||||||
r'({.+?})\s*,\s*"[^"]+"\s*\)\s*</script>', webpage, 'config',
|
(r'INITIAL_DATA_*\s*=\s*({.+?})\s*;',
|
||||||
default='{}' if not fatal else NO_DEFAULT),
|
r'({.+?})\s*,\s*"[^"]+"\s*\)\s*</script>'),
|
||||||
|
webpage, 'config', default='{}' if not fatal else NO_DEFAULT),
|
||||||
display_id, fatal=False)
|
display_id, fatal=False)
|
||||||
if not config:
|
if not config:
|
||||||
return
|
return
|
||||||
return try_get(config, lambda x: x['series'], dict)
|
return try_get(
|
||||||
|
config,
|
||||||
|
(lambda x: x['initialState']['series'], lambda x: x['series']),
|
||||||
|
dict)
|
||||||
|
|
||||||
|
def _extract_seasons(self, seasons):
|
||||||
|
if not isinstance(seasons, list):
|
||||||
|
return []
|
||||||
|
entries = []
|
||||||
|
for season in seasons:
|
||||||
|
entries.extend(self._extract_episodes(season))
|
||||||
|
return entries
|
||||||
|
|
||||||
def _extract_episodes(self, season):
|
def _extract_episodes(self, season):
|
||||||
entries = []
|
|
||||||
if not isinstance(season, dict):
|
if not isinstance(season, dict):
|
||||||
return entries
|
return []
|
||||||
episodes = season.get('episodes')
|
return self._extract_entries(season.get('episodes'))
|
||||||
if not isinstance(episodes, list):
|
|
||||||
return entries
|
def _extract_entries(self, entry_list):
|
||||||
for episode in episodes:
|
if not isinstance(entry_list, list):
|
||||||
|
return []
|
||||||
|
entries = []
|
||||||
|
for episode in entry_list:
|
||||||
nrk_id = episode.get('prfId')
|
nrk_id = episode.get('prfId')
|
||||||
if not nrk_id or not isinstance(nrk_id, compat_str):
|
if not nrk_id or not isinstance(nrk_id, compat_str):
|
||||||
continue
|
continue
|
||||||
@ -462,7 +479,7 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE):
|
|||||||
_VALID_URL = r'https?://(?:tv|radio)\.nrk(?:super)?\.no/serie/(?P<id>[^/]+)'
|
_VALID_URL = r'https?://(?:tv|radio)\.nrk(?:super)?\.no/serie/(?P<id>[^/]+)'
|
||||||
_ITEM_RE = r'(?:data-season=["\']|id=["\']season-)(?P<id>\d+)'
|
_ITEM_RE = r'(?:data-season=["\']|id=["\']season-)(?P<id>\d+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# new layout
|
# new layout, seasons
|
||||||
'url': 'https://tv.nrk.no/serie/backstage',
|
'url': 'https://tv.nrk.no/serie/backstage',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'backstage',
|
'id': 'backstage',
|
||||||
@ -471,20 +488,21 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE):
|
|||||||
},
|
},
|
||||||
'playlist_mincount': 60,
|
'playlist_mincount': 60,
|
||||||
}, {
|
}, {
|
||||||
# old layout
|
# new layout, instalments
|
||||||
'url': 'https://tv.nrk.no/serie/groenn-glede',
|
'url': 'https://tv.nrk.no/serie/groenn-glede',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'groenn-glede',
|
'id': 'groenn-glede',
|
||||||
'title': 'Grønn glede',
|
'title': 'Grønn glede',
|
||||||
'description': 'md5:7576e92ae7f65da6993cf90ee29e4608',
|
'description': 'md5:7576e92ae7f65da6993cf90ee29e4608',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 9,
|
'playlist_mincount': 10,
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://tv.nrksuper.no/serie/labyrint',
|
# old layout
|
||||||
|
'url': 'https://tv.nrksuper.no/serie/labyrint',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'labyrint',
|
'id': 'labyrint',
|
||||||
'title': 'Labyrint',
|
'title': 'Labyrint',
|
||||||
'description': 'md5:58afd450974c89e27d5a19212eee7115',
|
'description': 'md5:318b597330fdac5959247c9b69fdb1ec',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 3,
|
'playlist_mincount': 3,
|
||||||
}, {
|
}, {
|
||||||
@ -517,11 +535,12 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE):
|
|||||||
description = try_get(
|
description = try_get(
|
||||||
series, lambda x: x['titles']['subtitle'], compat_str)
|
series, lambda x: x['titles']['subtitle'], compat_str)
|
||||||
entries = []
|
entries = []
|
||||||
for season in series['seasons']:
|
entries.extend(self._extract_seasons(series.get('seasons')))
|
||||||
entries.extend(self._extract_episodes(season))
|
entries.extend(self._extract_entries(series.get('instalments')))
|
||||||
|
entries.extend(self._extract_episodes(series.get('extraMaterial')))
|
||||||
return self.playlist_result(entries, series_id, title, description)
|
return self.playlist_result(entries, series_id, title, description)
|
||||||
|
|
||||||
# Old layout (e.g. https://tv.nrk.no/serie/groenn-glede)
|
# Old layout (e.g. https://tv.nrksuper.no/serie/labyrint)
|
||||||
entries = [
|
entries = [
|
||||||
self.url_result(
|
self.url_result(
|
||||||
'https://tv.nrk.no/program/Episodes/{series}/{season}'.format(
|
'https://tv.nrk.no/program/Episodes/{series}/{season}'.format(
|
||||||
@ -533,6 +552,9 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE):
|
|||||||
'seriestitle', webpage,
|
'seriestitle', webpage,
|
||||||
'title', default=None) or self._og_search_title(
|
'title', default=None) or self._og_search_title(
|
||||||
webpage, fatal=False)
|
webpage, fatal=False)
|
||||||
|
if title:
|
||||||
|
title = self._search_regex(
|
||||||
|
r'NRK (?:Super )?TV\s*[-–]\s*(.+)', title, 'title', default=title)
|
||||||
|
|
||||||
description = self._html_search_meta(
|
description = self._html_search_meta(
|
||||||
'series_description', webpage,
|
'series_description', webpage,
|
||||||
@ -593,7 +615,7 @@ class NRKPlaylistIE(NRKPlaylistBaseIE):
|
|||||||
'title': 'Rivertonprisen til Karin Fossum',
|
'title': 'Rivertonprisen til Karin Fossum',
|
||||||
'description': 'Første kvinne på 15 år til å vinne krimlitteraturprisen.',
|
'description': 'Første kvinne på 15 år til å vinne krimlitteraturprisen.',
|
||||||
},
|
},
|
||||||
'playlist_count': 5,
|
'playlist_count': 2,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _extract_title(self, webpage):
|
def _extract_title(self, webpage):
|
||||||
|
@ -11,20 +11,27 @@ from ..utils import (
|
|||||||
|
|
||||||
class NZZIE(InfoExtractor):
|
class NZZIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?nzz\.ch/(?:[^/]+/)*[^/?#]+-ld\.(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?nzz\.ch/(?:[^/]+/)*[^/?#]+-ld\.(?P<id>\d+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.nzz.ch/zuerich/gymizyte/gymizyte-schreiben-schueler-heute-noch-diktate-ld.9153',
|
'url': 'http://www.nzz.ch/zuerich/gymizyte/gymizyte-schreiben-schueler-heute-noch-diktate-ld.9153',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '9153',
|
'id': '9153',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 6,
|
'playlist_mincount': 6,
|
||||||
}
|
}, {
|
||||||
|
'url': 'https://www.nzz.ch/video/nzz-standpunkte/cvp-auf-der-suche-nach-dem-mass-der-mitte-ld.1368112',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1368112',
|
||||||
|
},
|
||||||
|
'playlist_count': 1,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
page_id = self._match_id(url)
|
page_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, page_id)
|
webpage = self._download_webpage(url, page_id)
|
||||||
|
|
||||||
entries = []
|
entries = []
|
||||||
for player_element in re.findall(r'(<[^>]+class="kalturaPlayer"[^>]*>)', webpage):
|
for player_element in re.findall(
|
||||||
|
r'(<[^>]+class="kalturaPlayer[^"]*"[^>]*>)', webpage):
|
||||||
player_params = extract_attributes(player_element)
|
player_params = extract_attributes(player_element)
|
||||||
if player_params.get('data-type') not in ('kaltura_singleArticle',):
|
if player_params.get('data-type') not in ('kaltura_singleArticle',):
|
||||||
self.report_warning('Unsupported player type')
|
self.report_warning('Unsupported player type')
|
||||||
|
@ -243,7 +243,18 @@ class PhantomJSwrapper(object):
|
|||||||
|
|
||||||
|
|
||||||
class OpenloadIE(InfoExtractor):
|
class OpenloadIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun))/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)'
|
_VALID_URL = r'''(?x)
|
||||||
|
https?://
|
||||||
|
(?P<host>
|
||||||
|
(?:www\.)?
|
||||||
|
(?:
|
||||||
|
openload\.(?:co|io|link)|
|
||||||
|
oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun)
|
||||||
|
)
|
||||||
|
)/
|
||||||
|
(?:f|embed)/
|
||||||
|
(?P<id>[a-zA-Z0-9-_]+)
|
||||||
|
'''
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://openload.co/f/kUEfGclsU9o',
|
'url': 'https://openload.co/f/kUEfGclsU9o',
|
||||||
@ -334,8 +345,11 @@ class OpenloadIE(InfoExtractor):
|
|||||||
webpage)
|
webpage)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
url_pattern = 'https://openload.co/%%s/%s/' % video_id
|
host = mobj.group('host')
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
url_pattern = 'https://%s/%%s/%s/' % (host, video_id)
|
||||||
headers = {
|
headers = {
|
||||||
'User-Agent': self._USER_AGENT,
|
'User-Agent': self._USER_AGENT,
|
||||||
}
|
}
|
||||||
@ -368,7 +382,7 @@ class OpenloadIE(InfoExtractor):
|
|||||||
r'>\s*([\w~-]+~[a-f0-9:]+~[\w~-]+)'), webpage,
|
r'>\s*([\w~-]+~[a-f0-9:]+~[\w~-]+)'), webpage,
|
||||||
'stream URL'))
|
'stream URL'))
|
||||||
|
|
||||||
video_url = 'https://openload.co/stream/%s?mime=true' % decoded_id
|
video_url = 'https://%s/stream/%s?mime=true' % (host, decoded_id)
|
||||||
|
|
||||||
title = self._og_search_title(webpage, default=None) or self._search_regex(
|
title = self._og_search_title(webpage, default=None) or self._search_regex(
|
||||||
r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage,
|
r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage,
|
||||||
@ -379,7 +393,7 @@ class OpenloadIE(InfoExtractor):
|
|||||||
entry = entries[0] if entries else {}
|
entry = entries[0] if entries else {}
|
||||||
subtitles = entry.get('subtitles')
|
subtitles = entry.get('subtitles')
|
||||||
|
|
||||||
info_dict = {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'thumbnail': entry.get('thumbnail') or self._og_search_thumbnail(webpage, default=None),
|
'thumbnail': entry.get('thumbnail') or self._og_search_thumbnail(webpage, default=None),
|
||||||
@ -388,4 +402,3 @@ class OpenloadIE(InfoExtractor):
|
|||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
'http_headers': headers,
|
'http_headers': headers,
|
||||||
}
|
}
|
||||||
return info_dict
|
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
import time
|
import time
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
@ -15,7 +16,7 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class PicartoIE(InfoExtractor):
|
class PicartoIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P<id>[a-zA-Z0-9]+)'
|
_VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P<id>[a-zA-Z0-9]+)(?:/(?P<token>[a-zA-Z0-9]+))?'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'https://picarto.tv/Setz',
|
'url': 'https://picarto.tv/Setz',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -33,20 +34,14 @@ class PicartoIE(InfoExtractor):
|
|||||||
return False if PicartoVodIE.suitable(url) else super(PicartoIE, cls).suitable(url)
|
return False if PicartoVodIE.suitable(url) else super(PicartoIE, cls).suitable(url)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
channel_id = self._match_id(url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
stream_page = self._download_webpage(url, channel_id)
|
channel_id = mobj.group('id')
|
||||||
|
|
||||||
if '>This channel does not exist' in stream_page:
|
metadata = self._download_json(
|
||||||
raise ExtractorError(
|
'https://api.picarto.tv/v1/channel/name/' + channel_id,
|
||||||
'Channel %s does not exist' % channel_id, expected=True)
|
channel_id)
|
||||||
|
|
||||||
player = self._parse_json(
|
if metadata.get('online') is False:
|
||||||
self._search_regex(
|
|
||||||
r'(?s)playerSettings\[\d+\]\s*=\s*(\{.+?\}\s*\n)', stream_page,
|
|
||||||
'player settings'),
|
|
||||||
channel_id, transform_source=js_to_json)
|
|
||||||
|
|
||||||
if player.get('online') is False:
|
|
||||||
raise ExtractorError('Stream is offline', expected=True)
|
raise ExtractorError('Stream is offline', expected=True)
|
||||||
|
|
||||||
cdn_data = self._download_json(
|
cdn_data = self._download_json(
|
||||||
@ -54,20 +49,13 @@ class PicartoIE(InfoExtractor):
|
|||||||
data=urlencode_postdata({'loadbalancinginfo': channel_id}),
|
data=urlencode_postdata({'loadbalancinginfo': channel_id}),
|
||||||
note='Downloading load balancing info')
|
note='Downloading load balancing info')
|
||||||
|
|
||||||
def get_event(key):
|
token = mobj.group('token') or 'public'
|
||||||
return try_get(player, lambda x: x['event'][key], compat_str) or ''
|
|
||||||
|
|
||||||
params = {
|
params = {
|
||||||
'token': player.get('token') or '',
|
|
||||||
'ticket': get_event('ticket'),
|
|
||||||
'con': int(time.time() * 1000),
|
'con': int(time.time() * 1000),
|
||||||
'type': get_event('ticket'),
|
'token': token,
|
||||||
'scope': get_event('scope'),
|
|
||||||
}
|
}
|
||||||
|
|
||||||
prefered_edge = cdn_data.get('preferedEdge')
|
prefered_edge = cdn_data.get('preferedEdge')
|
||||||
default_tech = player.get('defaultTech')
|
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
|
|
||||||
for edge in cdn_data['edges']:
|
for edge in cdn_data['edges']:
|
||||||
@ -81,8 +69,6 @@ class PicartoIE(InfoExtractor):
|
|||||||
preference = 0
|
preference = 0
|
||||||
if edge_id == prefered_edge:
|
if edge_id == prefered_edge:
|
||||||
preference += 1
|
preference += 1
|
||||||
if tech_type == default_tech:
|
|
||||||
preference += 1
|
|
||||||
format_id = []
|
format_id = []
|
||||||
if edge_id:
|
if edge_id:
|
||||||
format_id.append(edge_id)
|
format_id.append(edge_id)
|
||||||
@ -109,7 +95,7 @@ class PicartoIE(InfoExtractor):
|
|||||||
continue
|
continue
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
mature = player.get('mature')
|
mature = metadata.get('adult')
|
||||||
if mature is None:
|
if mature is None:
|
||||||
age_limit = None
|
age_limit = None
|
||||||
else:
|
else:
|
||||||
@ -117,9 +103,11 @@ class PicartoIE(InfoExtractor):
|
|||||||
|
|
||||||
return {
|
return {
|
||||||
'id': channel_id,
|
'id': channel_id,
|
||||||
'title': self._live_title(channel_id),
|
'title': self._live_title(metadata.get('title') or channel_id),
|
||||||
'is_live': True,
|
'is_live': True,
|
||||||
'thumbnail': player.get('vodThumb'),
|
'thumbnail': try_get(metadata, lambda x: x['thumbnails']['web']),
|
||||||
|
'channel': channel_id,
|
||||||
|
'channel_url': 'https://picarto.tv/%s' % channel_id,
|
||||||
'age_limit': age_limit,
|
'age_limit': age_limit,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
@ -27,7 +27,7 @@ class PornHubIE(InfoExtractor):
|
|||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://
|
https?://
|
||||||
(?:
|
(?:
|
||||||
(?:[^/]+\.)?pornhub\.com/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)|
|
(?:[^/]+\.)?(?P<host>pornhub\.(?:com|net))/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)|
|
||||||
(?:www\.)?thumbzilla\.com/video/
|
(?:www\.)?thumbzilla\.com/video/
|
||||||
)
|
)
|
||||||
(?P<id>[\da-z]+)
|
(?P<id>[\da-z]+)
|
||||||
@ -121,12 +121,15 @@ class PornHubIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://www.pornhub.com/video/show?viewkey=648719015',
|
'url': 'http://www.pornhub.com/video/show?viewkey=648719015',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.pornhub.net/view_video.php?viewkey=203640933',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_urls(webpage):
|
def _extract_urls(webpage):
|
||||||
return re.findall(
|
return re.findall(
|
||||||
r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub\.com/embed/[\da-z]+)',
|
r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub\.(?:com|net)/embed/[\da-z]+)',
|
||||||
webpage)
|
webpage)
|
||||||
|
|
||||||
def _extract_count(self, pattern, webpage, name):
|
def _extract_count(self, pattern, webpage, name):
|
||||||
@ -134,14 +137,16 @@ class PornHubIE(InfoExtractor):
|
|||||||
pattern, webpage, '%s count' % name, fatal=False))
|
pattern, webpage, '%s count' % name, fatal=False))
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
host = mobj.group('host') or 'pornhub.com'
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
self._set_cookie('pornhub.com', 'age_verified', '1')
|
self._set_cookie(host, 'age_verified', '1')
|
||||||
|
|
||||||
def dl_webpage(platform):
|
def dl_webpage(platform):
|
||||||
self._set_cookie('pornhub.com', 'platform', platform)
|
self._set_cookie(host, 'platform', platform)
|
||||||
return self._download_webpage(
|
return self._download_webpage(
|
||||||
'http://www.pornhub.com/view_video.php?viewkey=%s' % video_id,
|
'http://www.%s/view_video.php?viewkey=%s' % (host, video_id),
|
||||||
video_id, 'Downloading %s webpage' % platform)
|
video_id, 'Downloading %s webpage' % platform)
|
||||||
|
|
||||||
webpage = dl_webpage('pc')
|
webpage = dl_webpage('pc')
|
||||||
@ -303,7 +308,7 @@ class PornHubIE(InfoExtractor):
|
|||||||
|
|
||||||
|
|
||||||
class PornHubPlaylistBaseIE(InfoExtractor):
|
class PornHubPlaylistBaseIE(InfoExtractor):
|
||||||
def _extract_entries(self, webpage):
|
def _extract_entries(self, webpage, host):
|
||||||
# Only process container div with main playlist content skipping
|
# Only process container div with main playlist content skipping
|
||||||
# drop-down menu that uses similar pattern for videos (see
|
# drop-down menu that uses similar pattern for videos (see
|
||||||
# https://github.com/rg3/youtube-dl/issues/11594).
|
# https://github.com/rg3/youtube-dl/issues/11594).
|
||||||
@ -313,7 +318,7 @@ class PornHubPlaylistBaseIE(InfoExtractor):
|
|||||||
|
|
||||||
return [
|
return [
|
||||||
self.url_result(
|
self.url_result(
|
||||||
'http://www.pornhub.com/%s' % video_url,
|
'http://www.%s/%s' % (host, video_url),
|
||||||
PornHubIE.ie_key(), video_title=title)
|
PornHubIE.ie_key(), video_title=title)
|
||||||
for video_url, title in orderedSet(re.findall(
|
for video_url, title in orderedSet(re.findall(
|
||||||
r'href="/?(view_video\.php\?.*\bviewkey=[\da-z]+[^"]*)"[^>]*\s+title="([^"]+)"',
|
r'href="/?(view_video\.php\?.*\bviewkey=[\da-z]+[^"]*)"[^>]*\s+title="([^"]+)"',
|
||||||
@ -321,11 +326,13 @@ class PornHubPlaylistBaseIE(InfoExtractor):
|
|||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
playlist_id = self._match_id(url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
host = mobj.group('host')
|
||||||
|
playlist_id = mobj.group('id')
|
||||||
|
|
||||||
webpage = self._download_webpage(url, playlist_id)
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
|
|
||||||
entries = self._extract_entries(webpage)
|
entries = self._extract_entries(webpage, host)
|
||||||
|
|
||||||
playlist = self._parse_json(
|
playlist = self._parse_json(
|
||||||
self._search_regex(
|
self._search_regex(
|
||||||
@ -340,7 +347,7 @@ class PornHubPlaylistBaseIE(InfoExtractor):
|
|||||||
|
|
||||||
|
|
||||||
class PornHubPlaylistIE(PornHubPlaylistBaseIE):
|
class PornHubPlaylistIE(PornHubPlaylistBaseIE):
|
||||||
_VALID_URL = r'https?://(?:[^/]+\.)?pornhub\.com/playlist/(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:[^/]+\.)?(?P<host>pornhub\.(?:com|net))/playlist/(?P<id>\d+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.pornhub.com/playlist/4667351',
|
'url': 'http://www.pornhub.com/playlist/4667351',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -355,7 +362,7 @@ class PornHubPlaylistIE(PornHubPlaylistBaseIE):
|
|||||||
|
|
||||||
|
|
||||||
class PornHubUserVideosIE(PornHubPlaylistBaseIE):
|
class PornHubUserVideosIE(PornHubPlaylistBaseIE):
|
||||||
_VALID_URL = r'https?://(?:[^/]+\.)?pornhub\.com/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos'
|
_VALID_URL = r'https?://(?:[^/]+\.)?(?P<host>pornhub\.(?:com|net))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.pornhub.com/users/zoe_ph/videos/public',
|
'url': 'http://www.pornhub.com/users/zoe_ph/videos/public',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -396,7 +403,9 @@ class PornHubUserVideosIE(PornHubPlaylistBaseIE):
|
|||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
user_id = self._match_id(url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
host = mobj.group('host')
|
||||||
|
user_id = mobj.group('id')
|
||||||
|
|
||||||
entries = []
|
entries = []
|
||||||
for page_num in itertools.count(1):
|
for page_num in itertools.count(1):
|
||||||
@ -408,7 +417,7 @@ class PornHubUserVideosIE(PornHubPlaylistBaseIE):
|
|||||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
|
||||||
break
|
break
|
||||||
raise
|
raise
|
||||||
page_entries = self._extract_entries(webpage)
|
page_entries = self._extract_entries(webpage, host)
|
||||||
if not page_entries:
|
if not page_entries:
|
||||||
break
|
break
|
||||||
entries.extend(page_entries)
|
entries.extend(page_entries)
|
||||||
|
@ -8,7 +8,10 @@ from ..compat import compat_HTTPError
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
float_or_none,
|
float_or_none,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
|
str_or_none,
|
||||||
|
try_get,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
|
url_or_none,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -17,65 +20,87 @@ class RteBaseIE(InfoExtractor):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
item_id = self._match_id(url)
|
item_id = self._match_id(url)
|
||||||
|
|
||||||
try:
|
info_dict = {}
|
||||||
json_string = self._download_json(
|
|
||||||
'http://www.rte.ie/rteavgen/getplaylist/?type=web&format=json&id=' + item_id,
|
|
||||||
item_id)
|
|
||||||
except ExtractorError as ee:
|
|
||||||
if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404:
|
|
||||||
error_info = self._parse_json(ee.cause.read().decode(), item_id, fatal=False)
|
|
||||||
if error_info:
|
|
||||||
raise ExtractorError(
|
|
||||||
'%s said: %s' % (self.IE_NAME, error_info['message']),
|
|
||||||
expected=True)
|
|
||||||
raise
|
|
||||||
|
|
||||||
# NB the string values in the JSON are stored using XML escaping(!)
|
|
||||||
show = json_string['shows'][0]
|
|
||||||
title = unescapeHTML(show['title'])
|
|
||||||
description = unescapeHTML(show.get('description'))
|
|
||||||
thumbnail = show.get('thumbnail')
|
|
||||||
duration = float_or_none(show.get('duration'), 1000)
|
|
||||||
timestamp = parse_iso8601(show.get('published'))
|
|
||||||
|
|
||||||
mg = show['media:group'][0]
|
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
|
|
||||||
if mg.get('url'):
|
ENDPOINTS = (
|
||||||
m = re.match(r'(?P<url>rtmpe?://[^/]+)/(?P<app>.+)/(?P<playpath>mp4:.*)', mg['url'])
|
'https://feeds.rasset.ie/rteavgen/player/playlist?type=iptv&format=json&showId=',
|
||||||
if m:
|
'http://www.rte.ie/rteavgen/getplaylist/?type=web&format=json&id=',
|
||||||
m = m.groupdict()
|
)
|
||||||
formats.append({
|
|
||||||
'url': m['url'] + '/' + m['app'],
|
|
||||||
'app': m['app'],
|
|
||||||
'play_path': m['playpath'],
|
|
||||||
'player_url': url,
|
|
||||||
'ext': 'flv',
|
|
||||||
'format_id': 'rtmp',
|
|
||||||
})
|
|
||||||
|
|
||||||
if mg.get('hls_server') and mg.get('hls_url'):
|
for num, ep_url in enumerate(ENDPOINTS, start=1):
|
||||||
formats.extend(self._extract_m3u8_formats(
|
try:
|
||||||
mg['hls_server'] + mg['hls_url'], item_id, 'mp4',
|
data = self._download_json(ep_url + item_id, item_id)
|
||||||
entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
|
except ExtractorError as ee:
|
||||||
|
if num < len(ENDPOINTS) or formats:
|
||||||
|
continue
|
||||||
|
if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404:
|
||||||
|
error_info = self._parse_json(ee.cause.read().decode(), item_id, fatal=False)
|
||||||
|
if error_info:
|
||||||
|
raise ExtractorError(
|
||||||
|
'%s said: %s' % (self.IE_NAME, error_info['message']),
|
||||||
|
expected=True)
|
||||||
|
raise
|
||||||
|
|
||||||
if mg.get('hds_server') and mg.get('hds_url'):
|
# NB the string values in the JSON are stored using XML escaping(!)
|
||||||
formats.extend(self._extract_f4m_formats(
|
show = try_get(data, lambda x: x['shows'][0], dict)
|
||||||
mg['hds_server'] + mg['hds_url'], item_id,
|
if not show:
|
||||||
f4m_id='hds', fatal=False))
|
continue
|
||||||
|
|
||||||
|
if not info_dict:
|
||||||
|
title = unescapeHTML(show['title'])
|
||||||
|
description = unescapeHTML(show.get('description'))
|
||||||
|
thumbnail = show.get('thumbnail')
|
||||||
|
duration = float_or_none(show.get('duration'), 1000)
|
||||||
|
timestamp = parse_iso8601(show.get('published'))
|
||||||
|
info_dict = {
|
||||||
|
'id': item_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'duration': duration,
|
||||||
|
}
|
||||||
|
|
||||||
|
mg = try_get(show, lambda x: x['media:group'][0], dict)
|
||||||
|
if not mg:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if mg.get('url'):
|
||||||
|
m = re.match(r'(?P<url>rtmpe?://[^/]+)/(?P<app>.+)/(?P<playpath>mp4:.*)', mg['url'])
|
||||||
|
if m:
|
||||||
|
m = m.groupdict()
|
||||||
|
formats.append({
|
||||||
|
'url': m['url'] + '/' + m['app'],
|
||||||
|
'app': m['app'],
|
||||||
|
'play_path': m['playpath'],
|
||||||
|
'player_url': url,
|
||||||
|
'ext': 'flv',
|
||||||
|
'format_id': 'rtmp',
|
||||||
|
})
|
||||||
|
|
||||||
|
if mg.get('hls_server') and mg.get('hls_url'):
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
mg['hls_server'] + mg['hls_url'], item_id, 'mp4',
|
||||||
|
entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
|
||||||
|
|
||||||
|
if mg.get('hds_server') and mg.get('hds_url'):
|
||||||
|
formats.extend(self._extract_f4m_formats(
|
||||||
|
mg['hds_server'] + mg['hds_url'], item_id,
|
||||||
|
f4m_id='hds', fatal=False))
|
||||||
|
|
||||||
|
mg_rte_server = str_or_none(mg.get('rte:server'))
|
||||||
|
mg_url = str_or_none(mg.get('url'))
|
||||||
|
if mg_rte_server and mg_url:
|
||||||
|
hds_url = url_or_none(mg_rte_server + mg_url)
|
||||||
|
if hds_url:
|
||||||
|
formats.extend(self._extract_f4m_formats(
|
||||||
|
hds_url, item_id, f4m_id='hds', fatal=False))
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
info_dict['formats'] = formats
|
||||||
'id': item_id,
|
return info_dict
|
||||||
'title': title,
|
|
||||||
'description': description,
|
|
||||||
'thumbnail': thumbnail,
|
|
||||||
'timestamp': timestamp,
|
|
||||||
'duration': duration,
|
|
||||||
'formats': formats,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class RteIE(RteBaseIE):
|
class RteIE(RteBaseIE):
|
||||||
|
@ -65,7 +65,8 @@ class RuutuIE(InfoExtractor):
|
|||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
video_xml = self._download_xml(
|
video_xml = self._download_xml(
|
||||||
'http://gatling.ruutu.fi/media-xml-cache?id=%s' % video_id, video_id)
|
'https://gatling.nelonenmedia.fi/media-xml-cache', video_id,
|
||||||
|
query={'id': video_id})
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
processed_urls = []
|
processed_urls = []
|
||||||
|
@ -15,10 +15,10 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class SafariBaseIE(InfoExtractor):
|
class SafariBaseIE(InfoExtractor):
|
||||||
_LOGIN_URL = 'https://www.safaribooksonline.com/accounts/login/'
|
_LOGIN_URL = 'https://learning.oreilly.com/accounts/login/'
|
||||||
_NETRC_MACHINE = 'safari'
|
_NETRC_MACHINE = 'safari'
|
||||||
|
|
||||||
_API_BASE = 'https://www.safaribooksonline.com/api/v1'
|
_API_BASE = 'https://learning.oreilly.com/api/v1'
|
||||||
_API_FORMAT = 'json'
|
_API_FORMAT = 'json'
|
||||||
|
|
||||||
LOGGED_IN = False
|
LOGGED_IN = False
|
||||||
@ -76,7 +76,7 @@ class SafariIE(SafariBaseIE):
|
|||||||
IE_DESC = 'safaribooksonline.com online video'
|
IE_DESC = 'safaribooksonline.com online video'
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://
|
https?://
|
||||||
(?:www\.)?safaribooksonline\.com/
|
(?:www\.)?(?:safaribooksonline|learning\.oreilly)\.com/
|
||||||
(?:
|
(?:
|
||||||
library/view/[^/]+/(?P<course_id>[^/]+)/(?P<part>[^/?\#&]+)\.html|
|
library/view/[^/]+/(?P<course_id>[^/]+)/(?P<part>[^/?\#&]+)\.html|
|
||||||
videos/[^/]+/[^/]+/(?P<reference_id>[^-]+-[^/?\#&]+)
|
videos/[^/]+/[^/]+/(?P<reference_id>[^-]+-[^/?\#&]+)
|
||||||
@ -104,6 +104,9 @@ class SafariIE(SafariBaseIE):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'https://www.safaribooksonline.com/videos/python-programming-language/9780134217314/9780134217314-PYMC_13_00',
|
'url': 'https://www.safaribooksonline.com/videos/python-programming-language/9780134217314/9780134217314-PYMC_13_00',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://learning.oreilly.com/videos/hadoop-fundamentals-livelessons/9780133392838/9780133392838-00_SeriesIntro',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_PARTNER_ID = '1926081'
|
_PARTNER_ID = '1926081'
|
||||||
@ -160,7 +163,7 @@ class SafariIE(SafariBaseIE):
|
|||||||
|
|
||||||
class SafariApiIE(SafariBaseIE):
|
class SafariApiIE(SafariBaseIE):
|
||||||
IE_NAME = 'safari:api'
|
IE_NAME = 'safari:api'
|
||||||
_VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/api/v1/book/(?P<course_id>[^/]+)/chapter(?:-content)?/(?P<part>[^/?#&]+)\.html'
|
_VALID_URL = r'https?://(?:www\.)?(?:safaribooksonline|learning\.oreilly)\.com/api/v1/book/(?P<course_id>[^/]+)/chapter(?:-content)?/(?P<part>[^/?#&]+)\.html'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html',
|
'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html',
|
||||||
@ -185,7 +188,7 @@ class SafariCourseIE(SafariBaseIE):
|
|||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://
|
https?://
|
||||||
(?:
|
(?:
|
||||||
(?:www\.)?safaribooksonline\.com/
|
(?:www\.)?(?:safaribooksonline|learning\.oreilly)\.com/
|
||||||
(?:
|
(?:
|
||||||
library/view/[^/]+|
|
library/view/[^/]+|
|
||||||
api/v1/book|
|
api/v1/book|
|
||||||
@ -213,6 +216,9 @@ class SafariCourseIE(SafariBaseIE):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'https://www.safaribooksonline.com/videos/python-programming-language/9780134217314',
|
'url': 'https://www.safaribooksonline.com/videos/python-programming-language/9780134217314',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://learning.oreilly.com/videos/hadoop-fundamentals-livelessons/9780133392838',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
@ -19,7 +19,7 @@ class ScrippsNetworksWatchIE(AWSIE):
|
|||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://
|
https?://
|
||||||
watch\.
|
watch\.
|
||||||
(?P<site>hgtv|foodnetwork|travelchannel|diynetwork|cookingchanneltv|geniuskitchen)\.com/
|
(?P<site>geniuskitchen)\.com/
|
||||||
(?:
|
(?:
|
||||||
player\.[A-Z0-9]+\.html\#|
|
player\.[A-Z0-9]+\.html\#|
|
||||||
show/(?:[^/]+/){2}|
|
show/(?:[^/]+/){2}|
|
||||||
@ -28,38 +28,23 @@ class ScrippsNetworksWatchIE(AWSIE):
|
|||||||
(?P<id>\d+)
|
(?P<id>\d+)
|
||||||
'''
|
'''
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://watch.hgtv.com/show/HGTVE/Best-Ever-Treehouses/2241515/Best-Ever-Treehouses/',
|
'url': 'http://watch.geniuskitchen.com/player/3787617/Ample-Hills-Ice-Cream-Bike/',
|
||||||
'md5': '26545fd676d939954c6808274bdb905a',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '4173834',
|
'id': '4194875',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Best Ever Treehouses',
|
'title': 'Ample Hills Ice Cream Bike',
|
||||||
'description': "We're searching for the most over the top treehouses.",
|
'description': 'Courtney Rada churns up a signature GK Now ice cream with The Scoopmaster.',
|
||||||
'uploader': 'ANV',
|
'uploader': 'ANV',
|
||||||
'upload_date': '20170922',
|
'upload_date': '20171011',
|
||||||
'timestamp': 1506056400,
|
'timestamp': 1507698000,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
'add_ie': [AnvatoIE.ie_key()],
|
'add_ie': [AnvatoIE.ie_key()],
|
||||||
}, {
|
|
||||||
'url': 'http://watch.diynetwork.com/show/DSAL/Salvage-Dawgs/2656646/Covington-Church/',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'http://watch.diynetwork.com/player.HNT.html#2656646',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'http://watch.geniuskitchen.com/player/3787617/Ample-Hills-Ice-Cream-Bike/',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_SNI_TABLE = {
|
_SNI_TABLE = {
|
||||||
'hgtv': 'hgtv',
|
|
||||||
'diynetwork': 'diy',
|
|
||||||
'foodnetwork': 'food',
|
|
||||||
'cookingchanneltv': 'cook',
|
|
||||||
'travelchannel': 'trav',
|
|
||||||
'geniuskitchen': 'genius',
|
'geniuskitchen': 'genius',
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -5,6 +5,7 @@ from ..compat import compat_b64decode
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
url_or_none,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -86,9 +87,16 @@ class VivoIE(SharedBaseIE):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def _extract_video_url(self, webpage, video_id, *args):
|
def _extract_video_url(self, webpage, video_id, *args):
|
||||||
|
def decode_url(encoded_url):
|
||||||
|
return compat_b64decode(encoded_url).decode('utf-8')
|
||||||
|
|
||||||
|
stream_url = url_or_none(decode_url(self._search_regex(
|
||||||
|
r'data-stream\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
|
||||||
|
'stream url', default=None, group='url')))
|
||||||
|
if stream_url:
|
||||||
|
return stream_url
|
||||||
return self._parse_json(
|
return self._parse_json(
|
||||||
self._search_regex(
|
self._search_regex(
|
||||||
r'InitializeStream\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1',
|
r'InitializeStream\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||||
webpage, 'stream', group='url'),
|
webpage, 'stream', group='url'),
|
||||||
video_id,
|
video_id, transform_source=decode_url)[0]
|
||||||
transform_source=lambda x: compat_b64decode(x).decode('utf-8'))[0]
|
|
||||||
|
@ -64,7 +64,7 @@ class SixPlayIE(InfoExtractor):
|
|||||||
for asset in clip_data['assets']:
|
for asset in clip_data['assets']:
|
||||||
asset_url = asset.get('full_physical_path')
|
asset_url = asset.get('full_physical_path')
|
||||||
protocol = asset.get('protocol')
|
protocol = asset.get('protocol')
|
||||||
if not asset_url or protocol == 'primetime' or asset_url in urls:
|
if not asset_url or protocol == 'primetime' or asset.get('type') == 'usp_hlsfp_h264' or asset_url in urls:
|
||||||
continue
|
continue
|
||||||
urls.append(asset_url)
|
urls.append(asset_url)
|
||||||
container = asset.get('video_container')
|
container = asset.get('video_container')
|
||||||
@ -81,19 +81,17 @@ class SixPlayIE(InfoExtractor):
|
|||||||
if not urlh:
|
if not urlh:
|
||||||
continue
|
continue
|
||||||
asset_url = urlh.geturl()
|
asset_url = urlh.geturl()
|
||||||
asset_url = re.sub(r'/([^/]+)\.ism/[^/]*\.m3u8', r'/\1.ism/\1.m3u8', asset_url)
|
for i in range(3, 0, -1):
|
||||||
formats.extend(self._extract_m3u8_formats(
|
asset_url = asset_url = asset_url.replace('_sd1/', '_sd%d/' % i)
|
||||||
asset_url, video_id, 'mp4', 'm3u8_native',
|
m3u8_formats = self._extract_m3u8_formats(
|
||||||
m3u8_id='hls', fatal=False))
|
asset_url, video_id, 'mp4', 'm3u8_native',
|
||||||
formats.extend(self._extract_f4m_formats(
|
m3u8_id='hls', fatal=False)
|
||||||
asset_url.replace('.m3u8', '.f4m'),
|
formats.extend(m3u8_formats)
|
||||||
video_id, f4m_id='hds', fatal=False))
|
formats.extend(self._extract_mpd_formats(
|
||||||
formats.extend(self._extract_mpd_formats(
|
asset_url.replace('.m3u8', '.mpd'),
|
||||||
asset_url.replace('.m3u8', '.mpd'),
|
video_id, mpd_id='dash', fatal=False))
|
||||||
video_id, mpd_id='dash', fatal=False))
|
if m3u8_formats:
|
||||||
formats.extend(self._extract_ism_formats(
|
break
|
||||||
re.sub(r'/[^/]+\.m3u8', '/Manifest', asset_url),
|
|
||||||
video_id, ism_id='mss', fatal=False))
|
|
||||||
else:
|
else:
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
asset_url, video_id, 'mp4', 'm3u8_native',
|
asset_url, video_id, 'mp4', 'm3u8_native',
|
||||||
|
@ -16,7 +16,7 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class TBSIE(TurnerBaseIE):
|
class TBSIE(TurnerBaseIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?(?P<site>tbs|tntdrama)\.com/(?:movies|shows/[^/]+/(?:clips|season-\d+/episode-\d+))/(?P<id>[^/?#]+)'
|
_VALID_URL = r'https?://(?:www\.)?(?P<site>tbs|tntdrama)\.com(?P<path>/(?:movies|shows/[^/]+/(?:clips|season-\d+/episode-\d+))/(?P<id>[^/?#]+))'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.tntdrama.com/shows/the-alienist/clips/monster',
|
'url': 'http://www.tntdrama.com/shows/the-alienist/clips/monster',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -40,12 +40,12 @@ class TBSIE(TurnerBaseIE):
|
|||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
site, display_id = re.match(self._VALID_URL, url).groups()
|
site, path, display_id = re.match(self._VALID_URL, url).groups()
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
drupal_settings = self._parse_json(self._search_regex(
|
drupal_settings = self._parse_json(self._search_regex(
|
||||||
r'<script[^>]+?data-drupal-selector="drupal-settings-json"[^>]*?>({.+?})</script>',
|
r'<script[^>]+?data-drupal-selector="drupal-settings-json"[^>]*?>({.+?})</script>',
|
||||||
webpage, 'drupal setting'), display_id)
|
webpage, 'drupal setting'), display_id)
|
||||||
video_data = drupal_settings['turner_playlist'][0]
|
video_data = next(v for v in drupal_settings['turner_playlist'] if v.get('url') == path)
|
||||||
|
|
||||||
media_id = video_data['mediaID']
|
media_id = video_data['mediaID']
|
||||||
title = video_data['title']
|
title = video_data['title']
|
||||||
|
@ -14,20 +14,38 @@ from ..utils import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class UpskillBaseIE(InfoExtractor):
|
class TeachableBaseIE(InfoExtractor):
|
||||||
_LOGIN_URL = 'http://upskillcourses.com/sign_in'
|
_NETRC_MACHINE = 'teachable'
|
||||||
_NETRC_MACHINE = 'upskill'
|
_URL_PREFIX = 'teachable:'
|
||||||
|
|
||||||
|
_SITES = {
|
||||||
|
# Only notable ones here
|
||||||
|
'upskillcourses.com': 'upskill',
|
||||||
|
'academy.gns3.com': 'gns3',
|
||||||
|
'academyhacker.com': 'academyhacker',
|
||||||
|
'stackskills.com': 'stackskills',
|
||||||
|
'market.saleshacker.com': 'saleshacker',
|
||||||
|
'learnability.org': 'learnability',
|
||||||
|
'edurila.com': 'edurila',
|
||||||
|
}
|
||||||
|
|
||||||
|
_VALID_URL_SUB_TUPLE = (_URL_PREFIX, '|'.join(re.escape(site) for site in _SITES.keys()))
|
||||||
|
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
self._login()
|
self._logged_in = False
|
||||||
|
|
||||||
def _login(self):
|
def _login(self, site):
|
||||||
username, password = self._get_login_info()
|
if self._logged_in:
|
||||||
|
return
|
||||||
|
|
||||||
|
username, password = self._get_login_info(
|
||||||
|
netrc_machine=self._SITES.get(site, site))
|
||||||
if username is None:
|
if username is None:
|
||||||
return
|
return
|
||||||
|
|
||||||
login_page, urlh = self._download_webpage_handle(
|
login_page, urlh = self._download_webpage_handle(
|
||||||
self._LOGIN_URL, None, 'Downloading login page')
|
'https://%s/sign_in' % site, None,
|
||||||
|
'Downloading %s login page' % site)
|
||||||
|
|
||||||
login_url = compat_str(urlh.geturl())
|
login_url = compat_str(urlh.geturl())
|
||||||
|
|
||||||
@ -46,18 +64,24 @@ class UpskillBaseIE(InfoExtractor):
|
|||||||
post_url = urljoin(login_url, post_url)
|
post_url = urljoin(login_url, post_url)
|
||||||
|
|
||||||
response = self._download_webpage(
|
response = self._download_webpage(
|
||||||
post_url, None, 'Logging in',
|
post_url, None, 'Logging in to %s' % site,
|
||||||
data=urlencode_postdata(login_form),
|
data=urlencode_postdata(login_form),
|
||||||
headers={
|
headers={
|
||||||
'Content-Type': 'application/x-www-form-urlencoded',
|
'Content-Type': 'application/x-www-form-urlencoded',
|
||||||
'Referer': login_url,
|
'Referer': login_url,
|
||||||
})
|
})
|
||||||
|
|
||||||
|
if '>I accept the new Privacy Policy<' in response:
|
||||||
|
raise ExtractorError(
|
||||||
|
'Unable to login: %s asks you to accept new Privacy Policy. '
|
||||||
|
'Go to https://%s/ and accept.' % (site, site), expected=True)
|
||||||
|
|
||||||
# Successful login
|
# Successful login
|
||||||
if any(re.search(p, response) for p in (
|
if any(re.search(p, response) for p in (
|
||||||
r'class=["\']user-signout',
|
r'class=["\']user-signout',
|
||||||
r'<a[^>]+\bhref=["\']/sign_out',
|
r'<a[^>]+\bhref=["\']/sign_out',
|
||||||
r'>\s*Log out\s*<')):
|
r'>\s*Log out\s*<')):
|
||||||
|
self._logged_in = True
|
||||||
return
|
return
|
||||||
|
|
||||||
message = get_element_by_class('alert', response)
|
message = get_element_by_class('alert', response)
|
||||||
@ -68,8 +92,14 @@ class UpskillBaseIE(InfoExtractor):
|
|||||||
raise ExtractorError('Unable to log in')
|
raise ExtractorError('Unable to log in')
|
||||||
|
|
||||||
|
|
||||||
class UpskillIE(UpskillBaseIE):
|
class TeachableIE(TeachableBaseIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?upskillcourses\.com/courses/[^/]+/lectures/(?P<id>\d+)'
|
_VALID_URL = r'''(?x)
|
||||||
|
(?:
|
||||||
|
%shttps?://(?P<site_t>[^/]+)|
|
||||||
|
https?://(?:www\.)?(?P<site>%s)
|
||||||
|
)
|
||||||
|
/courses/[^/]+/lectures/(?P<id>\d+)
|
||||||
|
''' % TeachableBaseIE._VALID_URL_SUB_TUPLE
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://upskillcourses.com/courses/essential-web-developer-course/lectures/1747100',
|
'url': 'http://upskillcourses.com/courses/essential-web-developer-course/lectures/1747100',
|
||||||
@ -77,7 +107,7 @@ class UpskillIE(UpskillBaseIE):
|
|||||||
'id': 'uzw6zw58or',
|
'id': 'uzw6zw58or',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Welcome to the Course!',
|
'title': 'Welcome to the Course!',
|
||||||
'description': 'md5:8d66c13403783370af62ca97a7357bdd',
|
'description': 'md5:65edb0affa582974de4625b9cdea1107',
|
||||||
'duration': 138.763,
|
'duration': 138.763,
|
||||||
'timestamp': 1479846621,
|
'timestamp': 1479846621,
|
||||||
'upload_date': '20161122',
|
'upload_date': '20161122',
|
||||||
@ -88,10 +118,37 @@ class UpskillIE(UpskillBaseIE):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://upskillcourses.com/courses/119763/lectures/1747100',
|
'url': 'http://upskillcourses.com/courses/119763/lectures/1747100',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://academy.gns3.com/courses/423415/lectures/6885939',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'teachable:https://upskillcourses.com/courses/essential-web-developer-course/lectures/1747100',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _is_teachable(webpage):
|
||||||
|
return 'teachableTracker.linker:autoLink' in webpage and re.search(
|
||||||
|
r'<link[^>]+href=["\']https?://process\.fs\.teachablecdn\.com',
|
||||||
|
webpage)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_url(webpage, source_url):
|
||||||
|
if not TeachableIE._is_teachable(webpage):
|
||||||
|
return
|
||||||
|
if re.match(r'https?://[^/]+/(?:courses|p)', source_url):
|
||||||
|
return '%s%s' % (TeachableBaseIE._URL_PREFIX, source_url)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
site = mobj.group('site') or mobj.group('site_t')
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
self._login(site)
|
||||||
|
|
||||||
|
prefixed = url.startswith(self._URL_PREFIX)
|
||||||
|
if prefixed:
|
||||||
|
url = url[len(self._URL_PREFIX):]
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
@ -113,12 +170,18 @@ class UpskillIE(UpskillBaseIE):
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class UpskillCourseIE(UpskillBaseIE):
|
class TeachableCourseIE(TeachableBaseIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?upskillcourses\.com/courses/(?:enrolled/)?(?P<id>[^/?#&]+)'
|
_VALID_URL = r'''(?x)
|
||||||
|
(?:
|
||||||
|
%shttps?://(?P<site_t>[^/]+)|
|
||||||
|
https?://(?:www\.)?(?P<site>%s)
|
||||||
|
)
|
||||||
|
/(?:courses|p)/(?:enrolled/)?(?P<id>[^/?#&]+)
|
||||||
|
''' % TeachableBaseIE._VALID_URL_SUB_TUPLE
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://upskillcourses.com/courses/essential-web-developer-course/',
|
'url': 'http://upskillcourses.com/courses/essential-web-developer-course/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '119763',
|
'id': 'essential-web-developer-course',
|
||||||
'title': 'The Essential Web Developer Course (Free)',
|
'title': 'The Essential Web Developer Course (Free)',
|
||||||
},
|
},
|
||||||
'playlist_count': 192,
|
'playlist_count': 192,
|
||||||
@ -128,21 +191,37 @@ class UpskillCourseIE(UpskillBaseIE):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://upskillcourses.com/courses/enrolled/119763',
|
'url': 'http://upskillcourses.com/courses/enrolled/119763',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://academy.gns3.com/courses/enrolled/423415',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'teachable:https://learn.vrdev.school/p/gear-vr-developer-mini',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'teachable:https://filmsimplified.com/p/davinci-resolve-15-crash-course',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def suitable(cls, url):
|
def suitable(cls, url):
|
||||||
return False if UpskillIE.suitable(url) else super(
|
return False if TeachableIE.suitable(url) else super(
|
||||||
UpskillCourseIE, cls).suitable(url)
|
TeachableCourseIE, cls).suitable(url)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
course_id = self._match_id(url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
site = mobj.group('site') or mobj.group('site_t')
|
||||||
|
course_id = mobj.group('id')
|
||||||
|
|
||||||
|
self._login(site)
|
||||||
|
|
||||||
|
prefixed = url.startswith(self._URL_PREFIX)
|
||||||
|
if prefixed:
|
||||||
|
prefix = self._URL_PREFIX
|
||||||
|
url = url[len(prefix):]
|
||||||
|
|
||||||
webpage = self._download_webpage(url, course_id)
|
webpage = self._download_webpage(url, course_id)
|
||||||
|
|
||||||
course_id = self._search_regex(
|
url_base = 'https://%s/' % site
|
||||||
r'data-course-id=["\'](\d+)', webpage, 'course id',
|
|
||||||
default=course_id)
|
|
||||||
|
|
||||||
entries = []
|
entries = []
|
||||||
|
|
||||||
@ -162,10 +241,13 @@ class UpskillCourseIE(UpskillBaseIE):
|
|||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
r'<span[^>]+class=["\']lecture-name[^>]+>([^<]+)', li,
|
r'<span[^>]+class=["\']lecture-name[^>]+>([^<]+)', li,
|
||||||
'title', default=None)
|
'title', default=None)
|
||||||
|
entry_url = urljoin(url_base, lecture_url)
|
||||||
|
if prefixed:
|
||||||
|
entry_url = self._URL_PREFIX + entry_url
|
||||||
entries.append(
|
entries.append(
|
||||||
self.url_result(
|
self.url_result(
|
||||||
urljoin('http://upskillcourses.com/', lecture_url),
|
entry_url,
|
||||||
ie=UpskillIE.ie_key(), video_id=lecture_id,
|
ie=TeachableIE.ie_key(), video_id=lecture_id,
|
||||||
video_title=clean_html(title)))
|
video_title=clean_html(title)))
|
||||||
|
|
||||||
course_title = self._html_search_regex(
|
course_title = self._html_search_regex(
|
@ -343,7 +343,7 @@ class ThePlatformFeedIE(ThePlatformBaseIE):
|
|||||||
def _extract_feed_info(self, provider_id, feed_id, filter_query, video_id, custom_fields=None, asset_types_query={}, account_id=None):
|
def _extract_feed_info(self, provider_id, feed_id, filter_query, video_id, custom_fields=None, asset_types_query={}, account_id=None):
|
||||||
real_url = self._URL_TEMPLATE % (self.http_scheme(), provider_id, feed_id, filter_query)
|
real_url = self._URL_TEMPLATE % (self.http_scheme(), provider_id, feed_id, filter_query)
|
||||||
entry = self._download_json(real_url, video_id)['entries'][0]
|
entry = self._download_json(real_url, video_id)['entries'][0]
|
||||||
main_smil_url = 'http://link.theplatform.com/s/%s/media/guid/%d/%s' % (provider_id, account_id, entry['guid']) if account_id else None
|
main_smil_url = 'http://link.theplatform.com/s/%s/media/guid/%d/%s' % (provider_id, account_id, entry['guid']) if account_id else entry.get('plmedia$publicUrl')
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
@ -356,7 +356,8 @@ class ThePlatformFeedIE(ThePlatformBaseIE):
|
|||||||
if first_video_id is None:
|
if first_video_id is None:
|
||||||
first_video_id = cur_video_id
|
first_video_id = cur_video_id
|
||||||
duration = float_or_none(item.get('plfile$duration'))
|
duration = float_or_none(item.get('plfile$duration'))
|
||||||
for asset_type in item['plfile$assetTypes']:
|
file_asset_types = item.get('plfile$assetTypes') or compat_parse_qs(compat_urllib_parse_urlparse(smil_url).query)['assetTypes']
|
||||||
|
for asset_type in file_asset_types:
|
||||||
if asset_type in asset_types:
|
if asset_type in asset_types:
|
||||||
continue
|
continue
|
||||||
asset_types.append(asset_type)
|
asset_types.append(asset_type)
|
||||||
|
117
youtube_dl/extractor/tiktok.py
Normal file
117
youtube_dl/extractor/tiktok.py
Normal file
@ -0,0 +1,117 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
compat_str,
|
||||||
|
ExtractorError,
|
||||||
|
int_or_none,
|
||||||
|
str_or_none,
|
||||||
|
try_get,
|
||||||
|
url_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TikTokBaseIE(InfoExtractor):
|
||||||
|
def _extract_aweme(self, data):
|
||||||
|
video = data['video']
|
||||||
|
description = str_or_none(try_get(data, lambda x: x['desc']))
|
||||||
|
width = int_or_none(try_get(data, lambda x: video['width']))
|
||||||
|
height = int_or_none(try_get(data, lambda x: video['height']))
|
||||||
|
|
||||||
|
format_urls = set()
|
||||||
|
formats = []
|
||||||
|
for format_id in (
|
||||||
|
'play_addr_lowbr', 'play_addr', 'play_addr_h264',
|
||||||
|
'download_addr'):
|
||||||
|
for format in try_get(
|
||||||
|
video, lambda x: x[format_id]['url_list'], list) or []:
|
||||||
|
format_url = url_or_none(format)
|
||||||
|
if not format_url:
|
||||||
|
continue
|
||||||
|
if format_url in format_urls:
|
||||||
|
continue
|
||||||
|
format_urls.add(format_url)
|
||||||
|
formats.append({
|
||||||
|
'url': format_url,
|
||||||
|
'ext': 'mp4',
|
||||||
|
'height': height,
|
||||||
|
'width': width,
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
thumbnail = url_or_none(try_get(
|
||||||
|
video, lambda x: x['cover']['url_list'][0], compat_str))
|
||||||
|
uploader = try_get(data, lambda x: x['author']['nickname'], compat_str)
|
||||||
|
timestamp = int_or_none(data.get('create_time'))
|
||||||
|
comment_count = int_or_none(data.get('comment_count')) or int_or_none(
|
||||||
|
try_get(data, lambda x: x['statistics']['comment_count']))
|
||||||
|
repost_count = int_or_none(try_get(
|
||||||
|
data, lambda x: x['statistics']['share_count']))
|
||||||
|
|
||||||
|
aweme_id = data['aweme_id']
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': aweme_id,
|
||||||
|
'title': uploader or aweme_id,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'uploader': uploader,
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'comment_count': comment_count,
|
||||||
|
'repost_count': repost_count,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class TikTokIE(TikTokBaseIE):
|
||||||
|
_VALID_URL = r'https?://(?:m\.)?tiktok\.com/v/(?P<id>\d+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://m.tiktok.com/v/6606727368545406213.html',
|
||||||
|
'md5': 'd584b572e92fcd48888051f238022420',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6606727368545406213',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Zureeal',
|
||||||
|
'description': '#bowsette#mario#cosplay#uk#lgbt#gaming#asian#bowsettecosplay',
|
||||||
|
'thumbnail': r're:^https?://.*~noop.image',
|
||||||
|
'uploader': 'Zureeal',
|
||||||
|
'timestamp': 1538248586,
|
||||||
|
'upload_date': '20180929',
|
||||||
|
'comment_count': int,
|
||||||
|
'repost_count': int,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
data = self._parse_json(self._search_regex(
|
||||||
|
r'\bdata\s*=\s*({.+?})\s*;', webpage, 'data'), video_id)
|
||||||
|
return self._extract_aweme(data)
|
||||||
|
|
||||||
|
|
||||||
|
class TikTokUserIE(TikTokBaseIE):
|
||||||
|
_VALID_URL = r'https?://(?:m\.)?tiktok\.com/h5/share/usr/(?P<id>\d+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://m.tiktok.com/h5/share/usr/188294915489964032.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '188294915489964032',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 24,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
user_id = self._match_id(url)
|
||||||
|
data = self._download_json(
|
||||||
|
'https://m.tiktok.com/h5/share/usr/list/%s/' % user_id, user_id,
|
||||||
|
query={'_signature': '_'})
|
||||||
|
entries = []
|
||||||
|
for aweme in data['aweme_list']:
|
||||||
|
try:
|
||||||
|
entry = self._extract_aweme(aweme)
|
||||||
|
except ExtractorError:
|
||||||
|
continue
|
||||||
|
entry['extractor_key'] = TikTokIE.ie_key()
|
||||||
|
entries.append(entry)
|
||||||
|
return self.playlist_result(entries, user_id)
|
@ -18,8 +18,9 @@ from ..utils import (
|
|||||||
class TNAFlixNetworkBaseIE(InfoExtractor):
|
class TNAFlixNetworkBaseIE(InfoExtractor):
|
||||||
# May be overridden in descendants if necessary
|
# May be overridden in descendants if necessary
|
||||||
_CONFIG_REGEX = [
|
_CONFIG_REGEX = [
|
||||||
r'flashvars\.config\s*=\s*escape\("([^"]+)"',
|
r'flashvars\.config\s*=\s*escape\("(?P<url>[^"]+)"',
|
||||||
r'<input[^>]+name="config\d?" value="([^"]+)"',
|
r'<input[^>]+name="config\d?" value="(?P<url>[^"]+)"',
|
||||||
|
r'config\s*=\s*(["\'])(?P<url>(?:https?:)?//(?:(?!\1).)+)\1',
|
||||||
]
|
]
|
||||||
_HOST = 'tna'
|
_HOST = 'tna'
|
||||||
_VKEY_SUFFIX = ''
|
_VKEY_SUFFIX = ''
|
||||||
@ -85,7 +86,8 @@ class TNAFlixNetworkBaseIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
cfg_url = self._proto_relative_url(self._html_search_regex(
|
cfg_url = self._proto_relative_url(self._html_search_regex(
|
||||||
self._CONFIG_REGEX, webpage, 'flashvars.config', default=None), 'http:')
|
self._CONFIG_REGEX, webpage, 'flashvars.config', default=None,
|
||||||
|
group='url'), 'http:')
|
||||||
|
|
||||||
if not cfg_url:
|
if not cfg_url:
|
||||||
inputs = self._hidden_inputs(webpage)
|
inputs = self._hidden_inputs(webpage)
|
||||||
|
@ -171,7 +171,8 @@ class TwitterCardIE(TwitterBaseIE):
|
|||||||
urls.append('https://twitter.com/i/videos/' + video_id)
|
urls.append('https://twitter.com/i/videos/' + video_id)
|
||||||
|
|
||||||
for u in urls:
|
for u in urls:
|
||||||
webpage = self._download_webpage(u, video_id)
|
webpage = self._download_webpage(
|
||||||
|
u, video_id, headers={'Referer': 'https://twitter.com/'})
|
||||||
|
|
||||||
iframe_url = self._html_search_regex(
|
iframe_url = self._html_search_regex(
|
||||||
r'<iframe[^>]+src="((?:https?:)?//(?:www\.youtube\.com/embed/[^"]+|(?:www\.)?vine\.co/v/\w+/card))"',
|
r'<iframe[^>]+src="((?:https?:)?//(?:www\.youtube\.com/embed/[^"]+|(?:www\.)?vine\.co/v/\w+/card))"',
|
||||||
|
@ -61,7 +61,7 @@ class UOLIE(InfoExtractor):
|
|||||||
'height': 360,
|
'height': 360,
|
||||||
},
|
},
|
||||||
'5': {
|
'5': {
|
||||||
'width': 1080,
|
'width': 1280,
|
||||||
'height': 720,
|
'height': 720,
|
||||||
},
|
},
|
||||||
'6': {
|
'6': {
|
||||||
@ -80,6 +80,10 @@ class UOLIE(InfoExtractor):
|
|||||||
'width': 568,
|
'width': 568,
|
||||||
'height': 320,
|
'height': 320,
|
||||||
},
|
},
|
||||||
|
'11': {
|
||||||
|
'width': 640,
|
||||||
|
'height': 360,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@ -111,19 +115,31 @@ class UOLIE(InfoExtractor):
|
|||||||
'ver': video_data.get('numRevision', 2),
|
'ver': video_data.get('numRevision', 2),
|
||||||
'r': 'http://mais.uol.com.br',
|
'r': 'http://mais.uol.com.br',
|
||||||
}
|
}
|
||||||
|
for k in ('token', 'sign'):
|
||||||
|
v = video_data.get(k)
|
||||||
|
if v:
|
||||||
|
query[k] = v
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for f in video_data.get('formats', []):
|
for f in video_data.get('formats', []):
|
||||||
f_url = f.get('url') or f.get('secureUrl')
|
f_url = f.get('url') or f.get('secureUrl')
|
||||||
if not f_url:
|
if not f_url:
|
||||||
continue
|
continue
|
||||||
|
f_url = update_url_query(f_url, query)
|
||||||
format_id = str_or_none(f.get('id'))
|
format_id = str_or_none(f.get('id'))
|
||||||
|
if format_id == '10':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
f_url, video_id, 'mp4', 'm3u8_native',
|
||||||
|
m3u8_id='hls', fatal=False))
|
||||||
|
continue
|
||||||
fmt = {
|
fmt = {
|
||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
'url': update_url_query(f_url, query),
|
'url': f_url,
|
||||||
|
'source_preference': 1,
|
||||||
}
|
}
|
||||||
fmt.update(self._FORMATS.get(format_id, {}))
|
fmt.update(self._FORMATS.get(format_id, {}))
|
||||||
formats.append(fmt)
|
formats.append(fmt)
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats, ('height', 'width', 'source_preference', 'tbr', 'ext'))
|
||||||
|
|
||||||
tags = []
|
tags = []
|
||||||
for tag in video_data.get('tags', []):
|
for tag in video_data.get('tags', []):
|
||||||
|
@ -14,10 +14,13 @@ from ..compat import (
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
js_to_json,
|
||||||
InAdvancePagedList,
|
InAdvancePagedList,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
merge_dicts,
|
merge_dicts,
|
||||||
NO_DEFAULT,
|
NO_DEFAULT,
|
||||||
|
parse_filesize,
|
||||||
|
qualities,
|
||||||
RegexNotFoundError,
|
RegexNotFoundError,
|
||||||
sanitized_Request,
|
sanitized_Request,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
@ -27,7 +30,6 @@ from ..utils import (
|
|||||||
unsmuggle_url,
|
unsmuggle_url,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
parse_filesize,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -1063,3 +1065,96 @@ class VimeoLikesIE(InfoExtractor):
|
|||||||
'description': description,
|
'description': description,
|
||||||
'entries': pl,
|
'entries': pl,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class VHXEmbedIE(InfoExtractor):
|
||||||
|
IE_NAME = 'vhx:embed'
|
||||||
|
_VALID_URL = r'https?://embed\.vhx\.tv/videos/(?P<id>\d+)'
|
||||||
|
|
||||||
|
def _call_api(self, video_id, access_token, path='', query=None):
|
||||||
|
return self._download_json(
|
||||||
|
'https://api.vhx.tv/videos/' + video_id + path, video_id, headers={
|
||||||
|
'Authorization': 'Bearer ' + access_token,
|
||||||
|
}, query=query)
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
credentials = self._parse_json(self._search_regex(
|
||||||
|
r'(?s)credentials\s*:\s*({.+?}),', webpage,
|
||||||
|
'config'), video_id, js_to_json)
|
||||||
|
access_token = credentials['access_token']
|
||||||
|
|
||||||
|
query = {}
|
||||||
|
for k, v in credentials.items():
|
||||||
|
if k in ('authorization', 'authUserToken', 'ticket') and v and v != 'undefined':
|
||||||
|
if k == 'authUserToken':
|
||||||
|
query['auth_user_token'] = v
|
||||||
|
else:
|
||||||
|
query[k] = v
|
||||||
|
files = self._call_api(video_id, access_token, '/files', query)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for f in files:
|
||||||
|
href = try_get(f, lambda x: x['_links']['source']['href'])
|
||||||
|
if not href:
|
||||||
|
continue
|
||||||
|
method = f.get('method')
|
||||||
|
if method == 'hls':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
href, video_id, 'mp4', 'm3u8_native',
|
||||||
|
m3u8_id='hls', fatal=False))
|
||||||
|
elif method == 'dash':
|
||||||
|
formats.extend(self._extract_mpd_formats(
|
||||||
|
href, video_id, mpd_id='dash', fatal=False))
|
||||||
|
else:
|
||||||
|
fmt = {
|
||||||
|
'filesize': int_or_none(try_get(f, lambda x: x['size']['bytes'])),
|
||||||
|
'format_id': 'http',
|
||||||
|
'preference': 1,
|
||||||
|
'url': href,
|
||||||
|
'vcodec': f.get('codec'),
|
||||||
|
}
|
||||||
|
quality = f.get('quality')
|
||||||
|
if quality:
|
||||||
|
fmt.update({
|
||||||
|
'format_id': 'http-' + quality,
|
||||||
|
'height': int_or_none(self._search_regex(r'(\d+)p', quality, 'height', default=None)),
|
||||||
|
})
|
||||||
|
formats.append(fmt)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
video_data = self._call_api(video_id, access_token)
|
||||||
|
title = video_data.get('title') or video_data['name']
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
for subtitle in try_get(video_data, lambda x: x['tracks']['subtitles'], list) or []:
|
||||||
|
lang = subtitle.get('srclang') or subtitle.get('label')
|
||||||
|
for _link in subtitle.get('_links', {}).values():
|
||||||
|
href = _link.get('href')
|
||||||
|
if not href:
|
||||||
|
continue
|
||||||
|
subtitles.setdefault(lang, []).append({
|
||||||
|
'url': href,
|
||||||
|
})
|
||||||
|
|
||||||
|
q = qualities(['small', 'medium', 'large', 'source'])
|
||||||
|
thumbnails = []
|
||||||
|
for thumbnail_id, thumbnail_url in video_data.get('thumbnail', {}).items():
|
||||||
|
thumbnails.append({
|
||||||
|
'id': thumbnail_id,
|
||||||
|
'url': thumbnail_url,
|
||||||
|
'preference': q(thumbnail_id),
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': video_data.get('description'),
|
||||||
|
'duration': int_or_none(try_get(video_data, lambda x: x['duration']['seconds'])),
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
'thumbnails': thumbnails,
|
||||||
|
'timestamp': unified_timestamp(video_data.get('created_at')),
|
||||||
|
'view_count': int_or_none(video_data.get('plays_count')),
|
||||||
|
}
|
||||||
|
@ -293,8 +293,12 @@ class VKIE(VKBaseIE):
|
|||||||
# This video is no longer available, because its author has been blocked.
|
# This video is no longer available, because its author has been blocked.
|
||||||
'url': 'https://vk.com/video-10639516_456240611',
|
'url': 'https://vk.com/video-10639516_456240611',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}
|
},
|
||||||
]
|
{
|
||||||
|
# The video is not available in your region.
|
||||||
|
'url': 'https://vk.com/video-51812607_171445436',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
@ -354,6 +358,9 @@ class VKIE(VKBaseIE):
|
|||||||
|
|
||||||
r'<!>This video is no longer available, because it has been deleted.':
|
r'<!>This video is no longer available, because it has been deleted.':
|
||||||
'Video %s is no longer available, because it has been deleted.',
|
'Video %s is no longer available, because it has been deleted.',
|
||||||
|
|
||||||
|
r'<!>The video .+? is not available in your region.':
|
||||||
|
'Video %s is not available in your region.',
|
||||||
}
|
}
|
||||||
|
|
||||||
for error_re, error_msg in ERRORS.items():
|
for error_re, error_msg in ERRORS.items():
|
||||||
|
@ -120,8 +120,10 @@ class VRVIE(VRVBaseIE):
|
|||||||
url, video_id,
|
url, video_id,
|
||||||
headers=self.geo_verification_headers())
|
headers=self.geo_verification_headers())
|
||||||
media_resource = self._parse_json(self._search_regex(
|
media_resource = self._parse_json(self._search_regex(
|
||||||
r'window\.__INITIAL_STATE__\s*=\s*({.+?})</script>',
|
[
|
||||||
webpage, 'inital state'), video_id).get('watch', {}).get('mediaResource') or {}
|
r'window\.__INITIAL_STATE__\s*=\s*({.+?})(?:</script>|;)',
|
||||||
|
r'window\.__INITIAL_STATE__\s*=\s*({.+})'
|
||||||
|
], webpage, 'inital state'), video_id).get('watch', {}).get('mediaResource') or {}
|
||||||
|
|
||||||
video_data = media_resource.get('json')
|
video_data = media_resource.get('json')
|
||||||
if not video_data:
|
if not video_data:
|
||||||
|
@ -12,7 +12,7 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class WistiaIE(InfoExtractor):
|
class WistiaIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?:wistia:|https?://(?:fast\.)?wistia\.net/embed/iframe/)(?P<id>[a-z0-9]+)'
|
_VALID_URL = r'(?:wistia:|https?://(?:fast\.)?wistia\.(?:net|com)/embed/iframe/)(?P<id>[a-z0-9]+)'
|
||||||
_API_URL = 'http://fast.wistia.com/embed/medias/%s.json'
|
_API_URL = 'http://fast.wistia.com/embed/medias/%s.json'
|
||||||
_IFRAME_URL = 'http://fast.wistia.net/embed/iframe/%s'
|
_IFRAME_URL = 'http://fast.wistia.net/embed/iframe/%s'
|
||||||
|
|
||||||
@ -35,12 +35,15 @@ class WistiaIE(InfoExtractor):
|
|||||||
# with hls video
|
# with hls video
|
||||||
'url': 'wistia:807fafadvk',
|
'url': 'wistia:807fafadvk',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://fast.wistia.com/embed/iframe/sh7fpupwlt',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_url(webpage):
|
def _extract_url(webpage):
|
||||||
match = re.search(
|
match = re.search(
|
||||||
r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
|
r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.(?:net|com)/embed/iframe/.+?)\1', webpage)
|
||||||
if match:
|
if match:
|
||||||
return unescapeHTML(match.group('url'))
|
return unescapeHTML(match.group('url'))
|
||||||
|
|
||||||
|
140
youtube_dl/extractor/wwe.py
Normal file
140
youtube_dl/extractor/wwe.py
Normal file
@ -0,0 +1,140 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
|
from ..utils import (
|
||||||
|
try_get,
|
||||||
|
unescapeHTML,
|
||||||
|
url_or_none,
|
||||||
|
urljoin,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class WWEBaseIE(InfoExtractor):
|
||||||
|
_SUBTITLE_LANGS = {
|
||||||
|
'English': 'en',
|
||||||
|
'Deutsch': 'de',
|
||||||
|
}
|
||||||
|
|
||||||
|
def _extract_entry(self, data, url, video_id=None):
|
||||||
|
video_id = compat_str(video_id or data['nid'])
|
||||||
|
title = data['title']
|
||||||
|
|
||||||
|
formats = self._extract_m3u8_formats(
|
||||||
|
data['file'], video_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
|
m3u8_id='hls')
|
||||||
|
|
||||||
|
description = data.get('description')
|
||||||
|
thumbnail = urljoin(url, data.get('image'))
|
||||||
|
series = data.get('show_name')
|
||||||
|
episode = data.get('episode_name')
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
tracks = data.get('tracks')
|
||||||
|
if isinstance(tracks, list):
|
||||||
|
for track in tracks:
|
||||||
|
if not isinstance(track, dict):
|
||||||
|
continue
|
||||||
|
if track.get('kind') != 'captions':
|
||||||
|
continue
|
||||||
|
track_file = url_or_none(track.get('file'))
|
||||||
|
if not track_file:
|
||||||
|
continue
|
||||||
|
label = track.get('label')
|
||||||
|
lang = self._SUBTITLE_LANGS.get(label, label) or 'en'
|
||||||
|
subtitles.setdefault(lang, []).append({
|
||||||
|
'url': track_file,
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'series': series,
|
||||||
|
'episode': episode,
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class WWEIE(WWEBaseIE):
|
||||||
|
_VALID_URL = r'https?://(?:[^/]+\.)?wwe\.com/(?:[^/]+/)*videos/(?P<id>[^/?#&]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.wwe.com/videos/daniel-bryan-vs-andrade-cien-almas-smackdown-live-sept-4-2018',
|
||||||
|
'md5': '92811c6a14bfc206f7a6a9c5d9140184',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '40048199',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Daniel Bryan vs. Andrade "Cien" Almas: SmackDown LIVE, Sept. 4, 2018',
|
||||||
|
'description': 'md5:2d7424dbc6755c61a0e649d2a8677f67',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://de.wwe.com/videos/gran-metalik-vs-tony-nese-wwe-205-live-sept-4-2018',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
landing = self._parse_json(
|
||||||
|
self._html_search_regex(
|
||||||
|
r'(?s)Drupal\.settings\s*,\s*({.+?})\s*\)\s*;',
|
||||||
|
webpage, 'drupal settings'),
|
||||||
|
display_id)['WWEVideoLanding']
|
||||||
|
|
||||||
|
data = landing['initialVideo']['playlist'][0]
|
||||||
|
video_id = landing.get('initialVideoId')
|
||||||
|
|
||||||
|
info = self._extract_entry(data, url, video_id)
|
||||||
|
info['display_id'] = display_id
|
||||||
|
return info
|
||||||
|
|
||||||
|
|
||||||
|
class WWEPlaylistIE(WWEBaseIE):
|
||||||
|
_VALID_URL = r'https?://(?:[^/]+\.)?wwe\.com/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.wwe.com/shows/raw/2018-11-12',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2018-11-12',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 11,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.wwe.com/article/walk-the-prank-wwe-edition',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.wwe.com/shows/wwenxt/article/matt-riddle-interview',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def suitable(cls, url):
|
||||||
|
return False if WWEIE.suitable(url) else super(WWEPlaylistIE, cls).suitable(url)
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
for mobj in re.finditer(
|
||||||
|
r'data-video\s*=\s*(["\'])(?P<data>{.+?})\1', webpage):
|
||||||
|
video = self._parse_json(
|
||||||
|
mobj.group('data'), display_id, transform_source=unescapeHTML,
|
||||||
|
fatal=False)
|
||||||
|
if not video:
|
||||||
|
continue
|
||||||
|
data = try_get(video, lambda x: x['playlist'][0], dict)
|
||||||
|
if not data:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
entry = self._extract_entry(data, url)
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
entry['extractor_key'] = WWEIE.ie_key()
|
||||||
|
entries.append(entry)
|
||||||
|
|
||||||
|
return self.playlist_result(entries, display_id)
|
@ -45,7 +45,7 @@ class XVideosIE(InfoExtractor):
|
|||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(
|
webpage = self._download_webpage(
|
||||||
'http://www.xvideos.com/video%s/' % video_id, video_id)
|
'https://www.xvideos.com/video%s/' % video_id, video_id)
|
||||||
|
|
||||||
mobj = re.search(r'<h1 class="inlineError">(.+?)</h1>', webpage)
|
mobj = re.search(r'<h1 class="inlineError">(.+?)</h1>', webpage)
|
||||||
if mobj:
|
if mobj:
|
||||||
|
@ -26,7 +26,7 @@ class YourPornIE(InfoExtractor):
|
|||||||
self._search_regex(
|
self._search_regex(
|
||||||
r'data-vnfo=(["\'])(?P<data>{.+?})\1', webpage, 'data info',
|
r'data-vnfo=(["\'])(?P<data>{.+?})\1', webpage, 'data info',
|
||||||
group='data'),
|
group='data'),
|
||||||
video_id)[video_id])
|
video_id)[video_id]).replace('/cdn/', '/cdn2/')
|
||||||
|
|
||||||
title = (self._search_regex(
|
title = (self._search_regex(
|
||||||
r'<[^>]+\bclass=["\']PostEditTA[^>]+>([^<]+)', webpage, 'title',
|
r'<[^>]+\bclass=["\']PostEditTA[^>]+>([^<]+)', webpage, 'title',
|
||||||
|
@ -48,6 +48,7 @@ from ..utils import (
|
|||||||
unified_strdate,
|
unified_strdate,
|
||||||
unsmuggle_url,
|
unsmuggle_url,
|
||||||
uppercase_escape,
|
uppercase_escape,
|
||||||
|
url_or_none,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -1386,8 +1387,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
self._downloader.report_warning(err_msg)
|
self._downloader.report_warning(err_msg)
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
def _mark_watched(self, video_id, video_info):
|
def _mark_watched(self, video_id, video_info, player_response):
|
||||||
playback_url = video_info.get('videostats_playback_base_url', [None])[0]
|
playback_url = url_or_none(try_get(
|
||||||
|
player_response,
|
||||||
|
lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(
|
||||||
|
video_info, lambda x: x['videostats_playback_base_url'][0]))
|
||||||
if not playback_url:
|
if not playback_url:
|
||||||
return
|
return
|
||||||
parsed_playback_url = compat_urlparse.urlparse(playback_url)
|
parsed_playback_url = compat_urlparse.urlparse(playback_url)
|
||||||
@ -1712,30 +1716,36 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
else:
|
else:
|
||||||
video_description = ''
|
video_description = ''
|
||||||
|
|
||||||
if 'multifeed_metadata_list' in video_info and not smuggled_data.get('force_singlefeed', False):
|
if not smuggled_data.get('force_singlefeed', False):
|
||||||
if not self._downloader.params.get('noplaylist'):
|
if not self._downloader.params.get('noplaylist'):
|
||||||
entries = []
|
multifeed_metadata_list = try_get(
|
||||||
feed_ids = []
|
player_response,
|
||||||
multifeed_metadata_list = video_info['multifeed_metadata_list'][0]
|
lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
|
||||||
for feed in multifeed_metadata_list.split(','):
|
compat_str) or try_get(
|
||||||
# Unquote should take place before split on comma (,) since textual
|
video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)
|
||||||
# fields may contain comma as well (see
|
if multifeed_metadata_list:
|
||||||
# https://github.com/rg3/youtube-dl/issues/8536)
|
entries = []
|
||||||
feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
|
feed_ids = []
|
||||||
entries.append({
|
for feed in multifeed_metadata_list.split(','):
|
||||||
'_type': 'url_transparent',
|
# Unquote should take place before split on comma (,) since textual
|
||||||
'ie_key': 'Youtube',
|
# fields may contain comma as well (see
|
||||||
'url': smuggle_url(
|
# https://github.com/rg3/youtube-dl/issues/8536)
|
||||||
'%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
|
feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
|
||||||
{'force_singlefeed': True}),
|
entries.append({
|
||||||
'title': '%s (%s)' % (video_title, feed_data['title'][0]),
|
'_type': 'url_transparent',
|
||||||
})
|
'ie_key': 'Youtube',
|
||||||
feed_ids.append(feed_data['id'][0])
|
'url': smuggle_url(
|
||||||
self.to_screen(
|
'%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
|
||||||
'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
|
{'force_singlefeed': True}),
|
||||||
% (', '.join(feed_ids), video_id))
|
'title': '%s (%s)' % (video_title, feed_data['title'][0]),
|
||||||
return self.playlist_result(entries, video_id, video_title, video_description)
|
})
|
||||||
self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
|
feed_ids.append(feed_data['id'][0])
|
||||||
|
self.to_screen(
|
||||||
|
'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
|
||||||
|
% (', '.join(feed_ids), video_id))
|
||||||
|
return self.playlist_result(entries, video_id, video_title, video_description)
|
||||||
|
else:
|
||||||
|
self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
|
||||||
|
|
||||||
if view_count is None:
|
if view_count is None:
|
||||||
view_count = extract_view_count(video_info)
|
view_count = extract_view_count(video_info)
|
||||||
@ -2116,7 +2126,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
self.mark_watched(video_id, video_info)
|
self.mark_watched(video_id, video_info, player_response)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
57
youtube_dl/extractor/zype.py
Normal file
57
youtube_dl/extractor/zype.py
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class ZypeIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://player\.zype\.com/embed/(?P<id>[\da-fA-F]+)\.js\?.*?api_key=[^&]+'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://player.zype.com/embed/5b400b834b32992a310622b9.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ&autoplay=false&controls=true&da=false',
|
||||||
|
'md5': 'eaee31d474c76a955bdaba02a505c595',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5b400b834b32992a310622b9',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Smoky Barbecue Favorites',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpe?g',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_urls(webpage):
|
||||||
|
return [
|
||||||
|
mobj.group('url')
|
||||||
|
for mobj in re.finditer(
|
||||||
|
r'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//player\.zype\.com/embed/[\da-fA-F]+\.js\?.*?api_key=.+?)\1',
|
||||||
|
webpage)]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
title = self._search_regex(
|
||||||
|
r'video_title\s*[:=]\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
|
||||||
|
'title', group='value')
|
||||||
|
|
||||||
|
m3u8_url = self._search_regex(
|
||||||
|
r'(["\'])(?P<url>(?:(?!\1).)+\.m3u8(?:(?!\1).)*)\1', webpage,
|
||||||
|
'm3u8 url', group='url')
|
||||||
|
|
||||||
|
formats = self._extract_m3u8_formats(
|
||||||
|
m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
|
m3u8_id='hls')
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
thumbnail = self._search_regex(
|
||||||
|
r'poster\s*[:=]\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, 'thumbnail',
|
||||||
|
default=False, group='url')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@ -39,6 +39,7 @@ from .compat import (
|
|||||||
compat_HTMLParser,
|
compat_HTMLParser,
|
||||||
compat_basestring,
|
compat_basestring,
|
||||||
compat_chr,
|
compat_chr,
|
||||||
|
compat_cookiejar,
|
||||||
compat_ctypes_WINFUNCTYPE,
|
compat_ctypes_WINFUNCTYPE,
|
||||||
compat_etree_fromstring,
|
compat_etree_fromstring,
|
||||||
compat_expanduser,
|
compat_expanduser,
|
||||||
@ -1139,6 +1140,33 @@ class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
|
|||||||
req, **kwargs)
|
req, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
|
||||||
|
def save(self, filename=None, ignore_discard=False, ignore_expires=False):
|
||||||
|
# Store session cookies with `expires` set to 0 instead of an empty
|
||||||
|
# string
|
||||||
|
for cookie in self:
|
||||||
|
if cookie.expires is None:
|
||||||
|
cookie.expires = 0
|
||||||
|
compat_cookiejar.MozillaCookieJar.save(self, filename, ignore_discard, ignore_expires)
|
||||||
|
|
||||||
|
def load(self, filename=None, ignore_discard=False, ignore_expires=False):
|
||||||
|
compat_cookiejar.MozillaCookieJar.load(self, filename, ignore_discard, ignore_expires)
|
||||||
|
# Session cookies are denoted by either `expires` field set to
|
||||||
|
# an empty string or 0. MozillaCookieJar only recognizes the former
|
||||||
|
# (see [1]). So we need force the latter to be recognized as session
|
||||||
|
# cookies on our own.
|
||||||
|
# Session cookies may be important for cookies-based authentication,
|
||||||
|
# e.g. usually, when user does not check 'Remember me' check box while
|
||||||
|
# logging in on a site, some important cookies are stored as session
|
||||||
|
# cookies so that not recognizing them will result in failed login.
|
||||||
|
# 1. https://bugs.python.org/issue17164
|
||||||
|
for cookie in self:
|
||||||
|
# Treat `expires=0` cookies as session cookies
|
||||||
|
if cookie.expires == 0:
|
||||||
|
cookie.expires = None
|
||||||
|
cookie.discard = True
|
||||||
|
|
||||||
|
|
||||||
class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
|
class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
|
||||||
def __init__(self, cookiejar=None):
|
def __init__(self, cookiejar=None):
|
||||||
compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
|
compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
|
||||||
@ -3948,8 +3976,12 @@ def write_xattr(path, key, value):
|
|||||||
|
|
||||||
|
|
||||||
def random_birthday(year_field, month_field, day_field):
|
def random_birthday(year_field, month_field, day_field):
|
||||||
|
start_date = datetime.date(1950, 1, 1)
|
||||||
|
end_date = datetime.date(1995, 12, 31)
|
||||||
|
offset = random.randint(0, (end_date - start_date).days)
|
||||||
|
random_date = start_date + datetime.timedelta(offset)
|
||||||
return {
|
return {
|
||||||
year_field: str(random.randint(1950, 1995)),
|
year_field: str(random_date.year),
|
||||||
month_field: str(random.randint(1, 12)),
|
month_field: str(random_date.month),
|
||||||
day_field: str(random.randint(1, 31)),
|
day_field: str(random_date.day),
|
||||||
}
|
}
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
__version__ = '2018.11.07'
|
__version__ = '2018.12.17'
|
||||||
|
Loading…
Reference in New Issue
Block a user