mirror of
https://codeberg.org/polarisfm/youtube-dl
synced 2024-11-22 08:34:32 +01:00
[youtube] modify regex to get chapters from description (closes #24819)
This commit is contained in:
parent
00eb865b3c
commit
78d7146748
@ -15,6 +15,41 @@ from youtube_dl.extractor import YoutubeIE
|
||||
class TestYoutubeChapters(unittest.TestCase):
|
||||
|
||||
_TEST_CASES = [
|
||||
(
|
||||
# https://www.youtube.com/watch?v=gBRKnvK1JUE
|
||||
# pattern: 00:00 - 09:24 <title>
|
||||
'''Here is Nucleus's 1979 album Out Of The Long Dark: https://www.youtube.com/watch?v=GX4Eh1DPb-E<br /><br />And here is their 1971 live album: https://www.youtube.com/watch?v=cpbM75B8qaE<br /><br /><a href="#" onclick="yt.www.watch.player.seekTo(00*60+00);return false;">00:00</a> - <a href="#" onclick="yt.www.watch.player.seekTo(09*60+24);return false;">09:24</a> roots<br /><a href="#" onclick="yt.www.watch.player.seekTo(09*60+24);return false;">09:24</a> - <a href="#" onclick="yt.www.watch.player.seekTo(14*60+19);return false;">14:19</a> images<br /><a href="#" onclick="yt.www.watch.player.seekTo(14*60+19);return false;">14:19</a> - <a href="#" onclick="yt.www.watch.player.seekTo(18*60+20);return false;">18:20</a> caliban<br /><a href="#" onclick="yt.www.watch.player.seekTo(18*60+20);return false;">18:20</a> - <a href="#" onclick="yt.www.watch.player.seekTo(21*60+42);return false;">21:42</a> whapatiti<br /><a href="#" onclick="yt.www.watch.player.seekTo(21*60+42);return false;">21:42</a> - <a href="#" onclick="yt.www.watch.player.seekTo(26*60+18);return false;">26:18</a> capricorn<br /><a href="#" onclick="yt.www.watch.player.seekTo(26*60+18);return false;">26:18</a> - <a href="#" onclick="yt.www.watch.player.seekTo(29*60+42);return false;">29:42</a> odokamona<br /><a href="#" onclick="yt.www.watch.player.seekTo(29*60+42);return false;">29:42</a> - <a href="#" onclick="yt.www.watch.player.seekTo(37*60+26);return false;">37:26</a> southern roots and celebration<br /><br />Bass Guitar – Roger Sutton<br />Design – Keith Davis (3)<br />Drums – Clive Thacker<br />Engineer – Roger Wake<br />Guitar – Jocelyn Pitchen<br />Percussion – Aureo de Souza<br />Piano, Electric Piano – Dave MacRae<br />Producer – Fritz Fryer<br />Tenor Saxophone, Soprano Saxophone, Flute, Flute [Bamboo] – Brian Smith<br />Trumpet – Ian Carr<br />Vocals – Joy Yates<br />Written-By – Brian Smith (tracks: B1 to B3), Dave MacRae (tracks: B4), Ian Carr (tracks: A) ''',
|
||||
2246,
|
||||
[{
|
||||
'start_time': 0,
|
||||
'end_time': 564,
|
||||
'title': 'roots',
|
||||
}, {
|
||||
'start_time': 564,
|
||||
'end_time': 859,
|
||||
'title': 'images',
|
||||
}, {
|
||||
'start_time': 859,
|
||||
'end_time': 1100,
|
||||
'title': 'caliban',
|
||||
}, {
|
||||
'start_time': 1100,
|
||||
'end_time': 1302,
|
||||
'title': 'whapatiti',
|
||||
}, {
|
||||
'start_time': 1302,
|
||||
'end_time': 1578,
|
||||
'title': 'capricorn',
|
||||
}, {
|
||||
'start_time': 1578,
|
||||
'end_time': 1782,
|
||||
'title': 'odokamona',
|
||||
}, {
|
||||
'start_time': 1782,
|
||||
'end_time': 2246,
|
||||
'title': 'southern roots and celebration',
|
||||
}]
|
||||
),
|
||||
(
|
||||
# https://www.youtube.com/watch?v=A22oy8dFjqc
|
||||
# pattern: 00:00 - <title>
|
||||
|
@ -1621,7 +1621,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
if not description:
|
||||
return None
|
||||
chapter_lines = re.findall(
|
||||
r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
|
||||
r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>(?:[^<]*<a.*?)?[^>]*)(?=$|<br\s*/>)',
|
||||
description)
|
||||
if not chapter_lines:
|
||||
return None
|
||||
|
Loading…
Reference in New Issue
Block a user