From 3574d22de917edec9ca9d65a81f2f329bf9633d4 Mon Sep 17 00:00:00 2001 From: David Sn Date: Sat, 28 Mar 2020 19:42:42 +0100 Subject: [PATCH 1/3] [cda] Try to fix and improve extraction (fixes #24458) Signed-off-by: David Sn --- youtube_dl/extractor/cda.py | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/cda.py b/youtube_dl/extractor/cda.py index 0c3af23d5..b48501794 100644 --- a/youtube_dl/extractor/cda.py +++ b/youtube_dl/extractor/cda.py @@ -5,6 +5,7 @@ import codecs import re from .common import InfoExtractor +from ..compat import compat_urllib_parse_unquote from ..utils import ( ExtractorError, float_or_none, @@ -123,6 +124,19 @@ class CDAIE(InfoExtractor): 'age_limit': 18 if need_confirm_age else 0, } + def decrypt_file(file): + b = [] + + for ch in file: + f = ord(ch) + b.append(chr(33 + (f + 14) % 94) if 33 <= f and 126 >= f else chr(f)) + + return "".join(b) + + def decode(file): + decoded = codecs.decode(codecs.decode(file, "rot_13"), "rot_13") + return "https://" + decrypt_file(compat_urllib_parse_unquote(decoded)) + ".mp4" + def extract_format(page, version): json_str = self._html_search_regex( r'player_data=(\\?["\'])(?P.+?)\1', page, @@ -137,10 +151,9 @@ class CDAIE(InfoExtractor): if not video or 'file' not in video: self.report_warning('Unable to extract %s version information' % version) return - if video['file'].startswith('uggc'): - video['file'] = codecs.decode(video['file'], 'rot_13') - if video['file'].endswith('adc.mp4'): - video['file'] = video['file'].replace('adc.mp4', '.mp4') + video['file'] = decode(video['file']) + if video['file'].endswith('adc.mp4'): + video['file'] = video['file'].replace('adc.mp4', '.mp4') f = { 'url': video['file'], } From fd76f28c6c9e91056a3196b549a2e8ed4a6ebd82 Mon Sep 17 00:00:00 2001 From: David Sn Date: Sun, 29 Mar 2020 12:25:53 +0200 Subject: [PATCH 2/3] [cda] Improve extraction and keep old method for legacy It is unknown whether CDA keeps using the old ROT13 decode method, as the original player.js code still contains the old method. Signed-off-by: David Sn --- youtube_dl/extractor/cda.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/cda.py b/youtube_dl/extractor/cda.py index b48501794..4b081a6f4 100644 --- a/youtube_dl/extractor/cda.py +++ b/youtube_dl/extractor/cda.py @@ -133,10 +133,6 @@ class CDAIE(InfoExtractor): return "".join(b) - def decode(file): - decoded = codecs.decode(codecs.decode(file, "rot_13"), "rot_13") - return "https://" + decrypt_file(compat_urllib_parse_unquote(decoded)) + ".mp4" - def extract_format(page, version): json_str = self._html_search_regex( r'player_data=(\\?["\'])(?P.+?)\1', page, @@ -151,9 +147,14 @@ class CDAIE(InfoExtractor): if not video or 'file' not in video: self.report_warning('Unable to extract %s version information' % version) return - video['file'] = decode(video['file']) - if video['file'].endswith('adc.mp4'): - video['file'] = video['file'].replace('adc.mp4', '.mp4') + if "http" not in video['file'] and ".mp4" not in video['file'] and "uggcf://" not in video['file']: + video['file'] = decrypt_file(compat_urllib_parse_unquote(video['file'])) + if not video['file'].startswith("http"): + video['file'] = "https://" + video['file'] + ".mp4" + elif video['file'].startswith('uggc'): + video['file'] = codecs.decode(video['file'], 'rot_13') + if video['file'].endswith('adc.mp4'): + video['file'] = video['file'].replace('adc.mp4', '.mp4') f = { 'url': video['file'], } From c2eafa71573f7ba4cd4c9dd0657a4f74eed63ac3 Mon Sep 17 00:00:00 2001 From: David Sn Date: Sat, 4 Apr 2020 16:37:46 +0200 Subject: [PATCH 3/3] [cda] Fix url extraction after recent player.js update Signed-off-by: David Sn --- youtube_dl/extractor/cda.py | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/cda.py b/youtube_dl/extractor/cda.py index 4b081a6f4..6e462f624 100644 --- a/youtube_dl/extractor/cda.py +++ b/youtube_dl/extractor/cda.py @@ -124,14 +124,27 @@ class CDAIE(InfoExtractor): 'age_limit': 18 if need_confirm_age else 0, } - def decrypt_file(file): + def decrypt_file(a): + # first replace very cringy joke, then apply decodeURIComponent + a = compat_urllib_parse_unquote(a.replace("_XDDD", "")) + + # store decrypted characters b = [] - for ch in file: - f = ord(ch) + for e in range(len(a)): + f = ord(a[e]) b.append(chr(33 + (f + 14) % 94) if 33 <= f and 126 >= f else chr(f)) - return "".join(b) + # decrypted URL + a = "".join(b) + + # more "obfuscation" to deal with + a = a.replace(".cda.mp4", "") + a = a.replace(".2cda.pl", ".cda.pl") + a = a.replace(".3cda.pl", ".cda.pl") + + # return extracted file as URL to video file + return "https://" + a + ".mp4" def extract_format(page, version): json_str = self._html_search_regex( @@ -148,9 +161,7 @@ class CDAIE(InfoExtractor): self.report_warning('Unable to extract %s version information' % version) return if "http" not in video['file'] and ".mp4" not in video['file'] and "uggcf://" not in video['file']: - video['file'] = decrypt_file(compat_urllib_parse_unquote(video['file'])) - if not video['file'].startswith("http"): - video['file'] = "https://" + video['file'] + ".mp4" + video['file'] = decrypt_file(video['file']) elif video['file'].startswith('uggc'): video['file'] = codecs.decode(video['file'], 'rot_13') if video['file'].endswith('adc.mp4'):