diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md
index 058eb4321..7607e0e03 100644
--- a/.github/ISSUE_TEMPLATE.md
+++ b/.github/ISSUE_TEMPLATE.md
@@ -6,8 +6,8 @@
---
-### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.10.05*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
-- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.10.05**
+### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.11.07*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
+- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.11.07**
### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
[debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
-[debug] youtube-dl version 2018.10.05
+[debug] youtube-dl version 2018.11.07
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {}
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 333acee80..bbcb78808 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -296,5 +296,26 @@ title = self._search_regex(
### Use safe conversion functions
-Wrap all extracted numeric data into safe functions from `utils`: `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
+Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
+
+Use `url_or_none` for safe URL processing.
+
+Use `try_get` for safe metadata extraction from parsed JSON.
+
+Explore [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py) for more useful convenience functions.
+
+#### More examples
+
+##### Safely extract optional description from parsed JSON
+```python
+description = try_get(response, lambda x: x['result']['video'][0]['summary'], compat_str)
+```
+
+##### Safely extract more optional metadata
+```python
+video = try_get(response, lambda x: x['result']['video'][0], dict) or {}
+description = video.get('summary')
+duration = float_or_none(video.get('durationMs'), scale=1000)
+view_count = int_or_none(video.get('views'))
+```
diff --git a/ChangeLog b/ChangeLog
index 86cf489b1..fa5de8b04 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,61 @@
+version 2018.11.07
+
+Extractors
++ [youtube] Add another JS signature function name regex (#18091, #18093,
+ #18094)
+* [facebook] Fix tahoe request (#17171)
+* [cliphunter] Fix extraction (#18083)
++ [youtube:playlist] Add support for invidio.us (#18077)
+* [zattoo] Arrange API hosts for derived extractors (#18035)
++ [youtube] Add fallback metadata extraction from videoDetails (#18052)
+
+
+version 2018.11.03
+
+Core
+* [extractor/common] Ensure response handle is not prematurely closed before
+ it can be read if it matches expected_status (#17195, #17846, #17447)
+
+Extractors
+* [laola1tv:embed] Set correct stream access URL scheme (#16341)
++ [ehftv] Add support for ehftv.com (#15408)
+* [azmedien] Adopt to major site redesign (#17745, #17746)
++ [twitcasting] Add support for twitcasting.tv (#17981)
+* [orf:tvthek] Fix extraction (#17737, #17956, #18024)
++ [openload] Add support for oload.fun (#18045)
+* [njpwworld] Fix authentication (#17427)
++ [linkedin:learning] Add support for linkedin.com/learning (#13545)
+* [theplatform] Improve error detection (#13222)
+* [cnbc] Simplify extraction (#14280, #17110)
++ [cbnc] Add support for new URL schema (#14193)
+* [aparat] Improve extraction and extract more metadata (#17445, #18008)
+* [aparat] Fix extraction
+
+
+version 2018.10.29
+
+Core
++ [extractor/common] Add validation for JSON-LD URLs
+
+Extractors
++ [sportbox] Add support for matchtv.ru
+* [sportbox] Fix extraction (#17978)
+* [screencast] Fix extraction (#14590, #14617, #17990)
++ [openload] Add support for oload.icu
++ [ivi] Add support for ivi.tv
+* [crunchyroll] Improve extraction failsafeness (#17991)
+* [dailymail] Fix formats extraction (#17976)
+* [viewster] Reduce format requests
+* [cwtv] Handle API errors (#17905)
++ [rutube] Use geo verification headers (#17897)
++ [brightcove:legacy] Add fallbacks to brightcove:new (#13912)
+- [tv3] Remove extractor (#10461, #15339)
+* [ted] Fix extraction for HTTP and RTMP formats (#5941, #17572, #17894)
++ [openload] Add support for oload.cc (#17823)
++ [patreon] Extract post_file URL (#17792)
+* [patreon] Fix extraction (#14502, #10471)
+
+
version 2018.10.05
Extractors
diff --git a/README.md b/README.md
index fdd115c9b..35c3de512 100644
--- a/README.md
+++ b/README.md
@@ -1168,7 +1168,28 @@ title = self._search_regex(
### Use safe conversion functions
-Wrap all extracted numeric data into safe functions from `utils`: `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
+Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
+
+Use `url_or_none` for safe URL processing.
+
+Use `try_get` for safe metadata extraction from parsed JSON.
+
+Explore [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py) for more useful convenience functions.
+
+#### More examples
+
+##### Safely extract optional description from parsed JSON
+```python
+description = try_get(response, lambda x: x['result']['video'][0]['summary'], compat_str)
+```
+
+##### Safely extract more optional metadata
+```python
+video = try_get(response, lambda x: x['result']['video'][0], dict) or {}
+description = video.get('summary')
+duration = float_or_none(video.get('durationMs'), scale=1000)
+view_count = int_or_none(video.get('views'))
+```
# EMBEDDING YOUTUBE-DL
diff --git a/docs/supportedsites.md b/docs/supportedsites.md
index f167a6ddc..24c3254c3 100644
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -84,8 +84,6 @@
- **awaan:season**
- **awaan:video**
- **AZMedien**: AZ Medien videos
- - **AZMedienPlaylist**: AZ Medien playlists
- - **AZMedienShowPlaylist**: AZ Medien show playlists
- **BaiduVideo**: 百度视频
- **bambuser**
- **bambuser:channel**
@@ -178,6 +176,7 @@
- **Clyp**
- **cmt.com**
- **CNBC**
+ - **CNBCVideo**
- **CNN**
- **CNNArticle**
- **CNNBlogs**
@@ -251,6 +250,7 @@
- **EchoMsk**
- **egghead:course**: egghead.io course
- **egghead:lesson**: egghead.io lesson
+ - **ehftv**
- **eHow**
- **EinsUndEinsTV**
- **Einthusan**
@@ -445,6 +445,8 @@
- **limelight:channel**
- **limelight:channel_list**
- **LineTV**
+ - **linkedin:learning**
+ - **linkedin:learning:course**
- **LiTV**
- **LiveLeak**
- **LiveLeakEmbed**
@@ -818,7 +820,7 @@
- **Spiegeltv**
- **sport.francetvinfo.fr**
- **Sport5**
- - **SportBoxEmbed**
+ - **SportBox**
- **SportDeutschland**
- **SpringboardPlatform**
- **Sprout**
@@ -909,7 +911,6 @@
- **TV2**
- **tv2.hu**
- **TV2Article**
- - **TV3**
- **TV4**: tv4.se and tv4play.se
- **TV5MondePlus**: TV5MONDE+
- **TVA**
@@ -931,6 +932,7 @@
- **TVPlayer**
- **TVPlayHome**
- **Tweakers**
+ - **TwitCasting**
- **twitch:chapter**
- **twitch:clips**
- **twitch:profile**
diff --git a/test/helper.py b/test/helper.py
index dfee217a9..aa9a1c9b2 100644
--- a/test/helper.py
+++ b/test/helper.py
@@ -7,6 +7,7 @@ import json
import os.path
import re
import types
+import ssl
import sys
import youtube_dl.extractor
@@ -244,3 +245,12 @@ def expect_warnings(ydl, warnings_re):
real_warning(w)
ydl.report_warning = _report_warning
+
+
+def http_server_port(httpd):
+ if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket):
+ # In Jython SSLSocket is not a subclass of socket.socket
+ sock = httpd.socket.sock
+ else:
+ sock = httpd.socket
+ return sock.getsockname()[1]
diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py
index 4833396a5..06be72616 100644
--- a/test/test_InfoExtractor.py
+++ b/test/test_InfoExtractor.py
@@ -9,11 +9,30 @@ import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from test.helper import FakeYDL, expect_dict, expect_value
-from youtube_dl.compat import compat_etree_fromstring
+from test.helper import FakeYDL, expect_dict, expect_value, http_server_port
+from youtube_dl.compat import compat_etree_fromstring, compat_http_server
from youtube_dl.extractor.common import InfoExtractor
from youtube_dl.extractor import YoutubeIE, get_info_extractor
from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError
+import threading
+
+
+TEAPOT_RESPONSE_STATUS = 418
+TEAPOT_RESPONSE_BODY = "
418 I'm a teapot
"
+
+
+class InfoExtractorTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
+ def log_message(self, format, *args):
+ pass
+
+ def do_GET(self):
+ if self.path == '/teapot':
+ self.send_response(TEAPOT_RESPONSE_STATUS)
+ self.send_header('Content-Type', 'text/html; charset=utf-8')
+ self.end_headers()
+ self.wfile.write(TEAPOT_RESPONSE_BODY.encode())
+ else:
+ assert False
class TestIE(InfoExtractor):
@@ -743,6 +762,25 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
for i in range(len(entries)):
expect_dict(self, entries[i], expected_entries[i])
+ def test_response_with_expected_status_returns_content(self):
+ # Checks for mitigations against the effects of
+ # that affect Python 3.4.1+, which
+ # manifest as `_download_webpage`, `_download_xml`, `_download_json`,
+ # or the underlying `_download_webpage_handle` returning no content
+ # when a response matches `expected_status`.
+
+ httpd = compat_http_server.HTTPServer(
+ ('127.0.0.1', 0), InfoExtractorTestRequestHandler)
+ port = http_server_port(httpd)
+ server_thread = threading.Thread(target=httpd.serve_forever)
+ server_thread.daemon = True
+ server_thread.start()
+
+ (content, urlh) = self.ie._download_webpage_handle(
+ 'http://127.0.0.1:%d/teapot' % port, None,
+ expected_status=TEAPOT_RESPONSE_STATUS)
+ self.assertEqual(content, TEAPOT_RESPONSE_BODY)
+
if __name__ == '__main__':
unittest.main()
diff --git a/test/test_downloader_http.py b/test/test_downloader_http.py
index 5cf2bf1a5..750472281 100644
--- a/test/test_downloader_http.py
+++ b/test/test_downloader_http.py
@@ -9,26 +9,16 @@ import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from test.helper import try_rm
+from test.helper import http_server_port, try_rm
from youtube_dl import YoutubeDL
from youtube_dl.compat import compat_http_server
from youtube_dl.downloader.http import HttpFD
from youtube_dl.utils import encodeFilename
-import ssl
import threading
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
-def http_server_port(httpd):
- if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket):
- # In Jython SSLSocket is not a subclass of socket.socket
- sock = httpd.socket.sock
- else:
- sock = httpd.socket
- return sock.getsockname()[1]
-
-
TEST_SIZE = 10 * 1024
diff --git a/test/test_http.py b/test/test_http.py
index 409fec9c8..3ee0a5dda 100644
--- a/test/test_http.py
+++ b/test/test_http.py
@@ -8,6 +8,7 @@ import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from test.helper import http_server_port
from youtube_dl import YoutubeDL
from youtube_dl.compat import compat_http_server, compat_urllib_request
import ssl
@@ -16,15 +17,6 @@ import threading
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
-def http_server_port(httpd):
- if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket):
- # In Jython SSLSocket is not a subclass of socket.socket
- sock = httpd.socket.sock
- else:
- sock = httpd.socket
- return sock.getsockname()[1]
-
-
class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
def log_message(self, format, *args):
pass
diff --git a/youtube_dl/extractor/aparat.py b/youtube_dl/extractor/aparat.py
index 6eb8bbb6e..883dcee7a 100644
--- a/youtube_dl/extractor/aparat.py
+++ b/youtube_dl/extractor/aparat.py
@@ -4,6 +4,7 @@ from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
int_or_none,
+ merge_dicts,
mimetype2ext,
url_or_none,
)
@@ -12,59 +13,83 @@ from ..utils import (
class AparatIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?aparat\.com/(?:v/|video/video/embed/videohash/)(?P[a-zA-Z0-9]+)'
- _TEST = {
+ _TESTS = [{
'url': 'http://www.aparat.com/v/wP8On',
'md5': '131aca2e14fe7c4dcb3c4877ba300c89',
'info_dict': {
'id': 'wP8On',
'ext': 'mp4',
'title': 'تیم گلکسی 11 - زومیت',
- 'age_limit': 0,
+ 'description': 'md5:096bdabcdcc4569f2b8a5e903a3b3028',
+ 'duration': 231,
+ 'timestamp': 1387394859,
+ 'upload_date': '20131218',
+ 'view_count': int,
},
- # 'skip': 'Extremely unreliable',
- }
+ }, {
+ # multiple formats
+ 'url': 'https://www.aparat.com/v/8dflw/',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
video_id = self._match_id(url)
- # Note: There is an easier-to-parse configuration at
- # http://www.aparat.com/video/video/config/videohash/%video_id
- # but the URL in there does not work
- webpage = self._download_webpage(
- 'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id,
- video_id)
+ # Provides more metadata
+ webpage = self._download_webpage(url, video_id, fatal=False)
- title = self._search_regex(r'\s+title:\s*"([^"]+)"', webpage, 'title')
+ if not webpage:
+ # Note: There is an easier-to-parse configuration at
+ # http://www.aparat.com/video/video/config/videohash/%video_id
+ # but the URL in there does not work
+ webpage = self._download_webpage(
+ 'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id,
+ video_id)
- file_list = self._parse_json(
+ options = self._parse_json(
self._search_regex(
- r'fileList\s*=\s*JSON\.parse\(\'([^\']+)\'\)', webpage,
- 'file list'),
+ r'options\s*=\s*JSON\.parse\(\s*(["\'])(?P(?:(?!\1).)+)\1\s*\)',
+ webpage, 'options', group='value'),
video_id)
+ player = options['plugins']['sabaPlayerPlugin']
+
formats = []
- for item in file_list[0]:
- file_url = url_or_none(item.get('file'))
- if not file_url:
- continue
- ext = mimetype2ext(item.get('type'))
- label = item.get('label')
- formats.append({
- 'url': file_url,
- 'ext': ext,
- 'format_id': label or ext,
- 'height': int_or_none(self._search_regex(
- r'(\d+)[pP]', label or '', 'height', default=None)),
- })
- self._sort_formats(formats)
+ for sources in player['multiSRC']:
+ for item in sources:
+ if not isinstance(item, dict):
+ continue
+ file_url = url_or_none(item.get('src'))
+ if not file_url:
+ continue
+ item_type = item.get('type')
+ if item_type == 'application/vnd.apple.mpegurl':
+ formats.extend(self._extract_m3u8_formats(
+ file_url, video_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls',
+ fatal=False))
+ else:
+ ext = mimetype2ext(item.get('type'))
+ label = item.get('label')
+ formats.append({
+ 'url': file_url,
+ 'ext': ext,
+ 'format_id': 'http-%s' % (label or ext),
+ 'height': int_or_none(self._search_regex(
+ r'(\d+)[pP]', label or '', 'height',
+ default=None)),
+ })
+ self._sort_formats(
+ formats, field_preference=('height', 'width', 'tbr', 'format_id'))
- thumbnail = self._search_regex(
- r'image:\s*"([^"]+)"', webpage, 'thumbnail', fatal=False)
+ info = self._search_json_ld(webpage, video_id, default={})
- return {
+ if not info.get('title'):
+ info['title'] = player['title']
+
+ return merge_dicts(info, {
'id': video_id,
- 'title': title,
- 'thumbnail': thumbnail,
- 'age_limit': self._family_friendly_search(webpage),
+ 'thumbnail': url_or_none(options.get('poster')),
+ 'duration': int_or_none(player.get('duration')),
'formats': formats,
- }
+ })
diff --git a/youtube_dl/extractor/azmedien.py b/youtube_dl/extractor/azmedien.py
index 68f26e2ca..a57a5f114 100644
--- a/youtube_dl/extractor/azmedien.py
+++ b/youtube_dl/extractor/azmedien.py
@@ -1,213 +1,90 @@
# coding: utf-8
from __future__ import unicode_literals
+import json
import re
from .common import InfoExtractor
from .kaltura import KalturaIE
-from ..utils import (
- get_element_by_class,
- get_element_by_id,
- strip_or_none,
- urljoin,
-)
-class AZMedienBaseIE(InfoExtractor):
- def _kaltura_video(self, partner_id, entry_id):
- return self.url_result(
- 'kaltura:%s:%s' % (partner_id, entry_id), ie=KalturaIE.ie_key(),
- video_id=entry_id)
-
-
-class AZMedienIE(AZMedienBaseIE):
+class AZMedienIE(InfoExtractor):
IE_DESC = 'AZ Medien videos'
_VALID_URL = r'''(?x)
https?://
(?:www\.)?
- (?:
+ (?P
telezueri\.ch|
telebaern\.tv|
telem1\.ch
)/
- [0-9]+-show-[^/\#]+
- (?:
- /[0-9]+-episode-[^/\#]+
- (?:
- /[0-9]+-segment-(?:[^/\#]+\#)?|
- \#
- )|
- \#
+ [^/]+/
+ (?P
+ [^/]+-(?P\d+)
)
- (?P[^\#]+)
+ (?:
+ \#video=
+ (?P
+ [_0-9a-z]+
+ )
+ )?
'''
_TESTS = [{
- # URL with 'segment'
- 'url': 'http://www.telezueri.ch/62-show-zuerinews/13772-episode-sonntag-18-dezember-2016/32419-segment-massenabweisungen-beim-hiltl-club-wegen-pelzboom',
+ 'url': 'https://www.telezueri.ch/sonntalk/bundesrats-vakanzen-eu-rahmenabkommen-133214569',
'info_dict': {
- 'id': '1_2444peh4',
+ 'id': '1_anruz3wy',
'ext': 'mp4',
- 'title': 'Massenabweisungen beim Hiltl Club wegen Pelzboom',
- 'description': 'md5:9ea9dd1b159ad65b36ddcf7f0d7c76a8',
- 'uploader_id': 'TeleZ?ri',
- 'upload_date': '20161218',
- 'timestamp': 1482084490,
+ 'title': 'Bundesrats-Vakanzen / EU-Rahmenabkommen',
+ 'description': 'md5:dd9f96751ec9c35e409a698a328402f3',
+ 'uploader_id': 'TVOnline',
+ 'upload_date': '20180930',
+ 'timestamp': 1538328802,
},
'params': {
'skip_download': True,
},
}, {
- # URL with 'segment' and fragment:
- 'url': 'http://www.telebaern.tv/118-show-news/14240-episode-dienstag-17-januar-2017/33666-segment-achtung-gefahr#zu-wenig-pflegerinnen-und-pfleger',
- 'only_matching': True
- }, {
- # URL with 'episode' and fragment:
- 'url': 'http://www.telem1.ch/47-show-sonntalk/13986-episode-soldaten-fuer-grenzschutz-energiestrategie-obama-bilanz#soldaten-fuer-grenzschutz-energiestrategie-obama-bilanz',
- 'only_matching': True
- }, {
- # URL with 'show' and fragment:
- 'url': 'http://www.telezueri.ch/66-show-sonntalk#burka-plakate-trump-putin-china-besuch',
+ 'url': 'https://www.telebaern.tv/telebaern-news/montag-1-oktober-2018-ganze-sendung-133531189#video=0_7xjo9lf1',
'only_matching': True
}]
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
-
- partner_id = self._search_regex(
- r'