From 1ac7f461845b3f9c0c3a2e6a1308bf82d3e8e55a Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 1 Sep 2022 16:23:18 +0530 Subject: [PATCH] Update to ytdl-commit-ed5c44e7 [compat] Replace deficient ChainMap class in Py3.3 and earlier https://github.com/ytdl-org/youtube-dl/commit/ed5c44e7b74ac77f87ca5ed6cb5e964a0c6a0678 --- README.md | 2 +- test/test_jsinterp.py | 35 +++++++++++++++++ yt_dlp/YoutubeDL.py | 8 ++-- yt_dlp/jsinterp.py | 91 ++++++++++++++++++++++++------------------- 4 files changed, 92 insertions(+), 44 deletions(-) diff --git a/README.md b/README.md index 176832ca9..c4667bb57 100644 --- a/README.md +++ b/README.md @@ -71,7 +71,7 @@ # NEW FEATURES -* Merged with **youtube-dl v2021.12.17+ [commit/b0a60ce](https://github.com/ytdl-org/youtube-dl/commit/b0a60ce2032172aeaaf27fe3866ab72768f10cb2)** and **youtube-dlc v2020.11.11-3+ [commit/f9401f2](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee)**: You get all the features and patches of [youtube-dlc](https://github.com/blackjack4494/yt-dlc) in addition to the latest [youtube-dl](https://github.com/ytdl-org/youtube-dl) +* Merged with **youtube-dl v2021.12.17+ [commit/ed5c44e](https://github.com/ytdl-org/youtube-dl/commit/ed5c44e7b74ac77f87ca5ed6cb5e964a0c6a0678)** and **youtube-dlc v2020.11.11-3+ [commit/f9401f2](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee)**: You get all the features and patches of [youtube-dlc](https://github.com/blackjack4494/yt-dlc) in addition to the latest [youtube-dl](https://github.com/ytdl-org/youtube-dl) * **[SponsorBlock Integration](#sponsorblock-options)**: You can mark/remove sponsor sections in youtube videos by utilizing the [SponsorBlock](https://sponsor.ajay.app) API diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py index 0cdf726fb..b46d0949d 100644 --- a/test/test_jsinterp.py +++ b/test/test_jsinterp.py @@ -71,6 +71,9 @@ def test_operators(self): jsi = JSInterpreter('function f(){return 0 ?? 42;}') self.assertEqual(jsi.call_function('f'), 0) + jsi = JSInterpreter('function f(){return "life, the universe and everything" < 42;}') + self.assertFalse(jsi.call_function('f')) + def test_array_access(self): jsi = JSInterpreter('function f(){var x = [1,2,3]; x[0] = 4; x[0] = 5; x[2.0] = 7; return x;}') self.assertEqual(jsi.call_function('f'), [5, 2, 7]) @@ -193,6 +196,30 @@ def test_try(self): ''') self.assertEqual(jsi.call_function('x'), 10) + def test_catch(self): + jsi = JSInterpreter(''' + function x() { try{throw 10} catch(e){return 5} } + ''') + self.assertEqual(jsi.call_function('x'), 5) + + def test_finally(self): + jsi = JSInterpreter(''' + function x() { try{throw 10} finally {return 42} } + ''') + self.assertEqual(jsi.call_function('x'), 42) + jsi = JSInterpreter(''' + function x() { try{throw 10} catch(e){return 5} finally {return 42} } + ''') + self.assertEqual(jsi.call_function('x'), 42) + + def test_nested_try(self): + jsi = JSInterpreter(''' + function x() {try { + try{throw 10} finally {throw 42} + } catch(e){return 5} } + ''') + self.assertEqual(jsi.call_function('x'), 5) + def test_for_loop_continue(self): jsi = JSInterpreter(''' function x() { a=0; for (i=0; i-10; i++) { continue; a++ } return a } @@ -205,6 +232,14 @@ def test_for_loop_break(self): ''') self.assertEqual(jsi.call_function('x'), 0) + def test_for_loop_try(self): + jsi = JSInterpreter(''' + function x() { + for (i=0; i-10; i++) { try { if (i == 5) throw i} catch {return 10} finally {break} }; + return 42 } + ''') + self.assertEqual(jsi.call_function('x'), 42) + def test_literal_list(self): jsi = JSInterpreter(''' function x() { return [1, 2, "asdf", [5, 6, 7]][3] } diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 10c17ea00..2b5b3fdfc 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -2528,9 +2528,6 @@ def sanitize_numeric_fields(info): '--live-from-start is passed, but there are no formats that can be downloaded from the start. ' 'If you want to download from the current time, use --no-live-from-start')) - if not formats: - self.raise_no_formats(info_dict) - def is_wellformed(f): url = f.get('url') if not url: @@ -2543,7 +2540,10 @@ def is_wellformed(f): return True # Filter out malformed formats for better extraction robustness - formats = list(filter(is_wellformed, formats)) + formats = list(filter(is_wellformed, formats or [])) + + if not formats: + self.raise_no_formats(info_dict) formats_dict = {} diff --git a/yt_dlp/jsinterp.py b/yt_dlp/jsinterp.py index 27d7f0dfa..2bb4acf3e 100644 --- a/yt_dlp/jsinterp.py +++ b/yt_dlp/jsinterp.py @@ -72,6 +72,8 @@ def _js_comp_op(op): def wrapped(a, b): if JS_Undefined in (a, b): return False + if isinstance(a, str) or isinstance(b, str): + return op(str(a or 0), str(b or 0)) return op(a or 0, b or 0) return wrapped @@ -268,7 +270,9 @@ def _separate(expr, delim=',', max_split=None): yield expr[start:] @classmethod - def _separate_at_paren(cls, expr, delim): + def _separate_at_paren(cls, expr, delim=None): + if delim is None: + delim = expr and _MATCHING_PARENS[expr[0]] separated = list(cls._separate(expr, delim, 1)) if len(separated) < 2: raise cls.Exception(f'No terminating paren {delim}', expr) @@ -347,7 +351,7 @@ def interpret_statement(self, stmt, local_vars, allow_recursion=100): if expr.startswith('new '): obj = expr[4:] if obj.startswith('Date('): - left, right = self._separate_at_paren(obj[4:], ')') + left, right = self._separate_at_paren(obj[4:]) expr = unified_timestamp( self.interpret_expression(left, local_vars, allow_recursion), False) if not expr: @@ -361,8 +365,8 @@ def interpret_statement(self, stmt, local_vars, allow_recursion=100): return None, should_return if expr.startswith('{'): - inner, outer = self._separate_at_paren(expr, '}') - # Look for Map first + inner, outer = self._separate_at_paren(expr) + # try for object expression (Map) sub_expressions = [list(self._separate(sub_expr.strip(), ':', 1)) for sub_expr in self._separate(inner)] if all(len(sub_expr) == 2 for sub_expr in sub_expressions): def dict_item(key, val): @@ -380,7 +384,7 @@ def dict_item(key, val): expr = self._dump(inner, local_vars) + outer if expr.startswith('('): - inner, outer = self._separate_at_paren(expr, ')') + inner, outer = self._separate_at_paren(expr) inner, should_abort = self.interpret_statement(inner, local_vars, allow_recursion) if not outer or should_abort: return inner, should_abort or should_return @@ -388,53 +392,62 @@ def dict_item(key, val): expr = self._dump(inner, local_vars) + outer if expr.startswith('['): - inner, outer = self._separate_at_paren(expr, ']') + inner, outer = self._separate_at_paren(expr) name = self._named_object(local_vars, [ self.interpret_expression(item, local_vars, allow_recursion) for item in self._separate(inner)]) expr = name + outer - m = re.match(rf'''(?x) - (?Ptry|finally)\s*| - (?Pcatch\s*(?P\(\s*{_NAME_RE}\s*\)))| - (?Pswitch)\s*\(| - (?Pfor)\s*\(|''', expr) - if m and m.group('try'): - if expr[m.end()] == '{': - try_expr, expr = self._separate_at_paren(expr[m.end():], '}') - else: - try_expr, expr = expr[m.end() - 1:], '' + m = re.match(r'''(?x) + (?Ptry)\s*\{| + (?Pswitch)\s*\(| + (?Pfor)\s*\( + ''', expr) + md = m.groupdict() if m else {} + if md.get('try'): + try_expr, expr = self._separate_at_paren(expr[m.end() - 1:]) + err = None try: ret, should_abort = self.interpret_statement(try_expr, local_vars, allow_recursion) if should_abort: return ret, True - except JS_Throw as e: - local_vars[self._EXC_NAME] = e.error except Exception as e: # XXX: This works for now, but makes debugging future issues very hard - local_vars[self._EXC_NAME] = e - ret, should_abort = self.interpret_statement(expr, local_vars, allow_recursion) - return ret, should_abort or should_return + err = e - elif m and m.group('catch'): - catch_expr, expr = self._separate_at_paren(expr[m.end():], '}') - if self._EXC_NAME in local_vars: - catch_vars = local_vars.new_child({m.group('err'): local_vars.pop(self._EXC_NAME)}) - ret, should_abort = self.interpret_statement(catch_expr, catch_vars, allow_recursion) + pending = (None, False) + m = re.match(r'catch\s*(?P\(\s*{_NAME_RE}\s*\))?\{{'.format(**globals()), expr) + if m: + sub_expr, expr = self._separate_at_paren(expr[m.end() - 1:]) + if err: + catch_vars = {} + if m.group('err'): + catch_vars[m.group('err')] = err.error if isinstance(err, JS_Throw) else err + catch_vars = local_vars.new_child(catch_vars) + err, pending = None, self.interpret_statement(sub_expr, catch_vars, allow_recursion) + + m = re.match(r'finally\s*\{', expr) + if m: + sub_expr, expr = self._separate_at_paren(expr[m.end() - 1:]) + ret, should_abort = self.interpret_statement(sub_expr, local_vars, allow_recursion) if should_abort: return ret, True - ret, should_abort = self.interpret_statement(expr, local_vars, allow_recursion) - return ret, should_abort or should_return + ret, should_abort = pending + if should_abort: + return ret, True - elif m and m.group('for'): - constructor, remaining = self._separate_at_paren(expr[m.end() - 1:], ')') + if err: + raise err + + elif md.get('for'): + constructor, remaining = self._separate_at_paren(expr[m.end() - 1:]) if remaining.startswith('{'): - body, expr = self._separate_at_paren(remaining, '}') + body, expr = self._separate_at_paren(remaining) else: switch_m = re.match(r'switch\s*\(', remaining) # FIXME if switch_m: - switch_val, remaining = self._separate_at_paren(remaining[switch_m.end() - 1:], ')') + switch_val, remaining = self._separate_at_paren(remaining[switch_m.end() - 1:]) body, expr = self._separate_at_paren(remaining, '}') body = 'switch(%s){%s}' % (switch_val, body) else: @@ -453,11 +466,9 @@ def dict_item(key, val): except JS_Continue: pass self.interpret_expression(increment, local_vars, allow_recursion) - ret, should_abort = self.interpret_statement(expr, local_vars, allow_recursion) - return ret, should_abort or should_return - elif m and m.group('switch'): - switch_val, remaining = self._separate_at_paren(expr[m.end() - 1:], ')') + elif md.get('switch'): + switch_val, remaining = self._separate_at_paren(expr[m.end() - 1:]) switch_val = self.interpret_expression(switch_val, local_vars, allow_recursion) body, expr = self._separate_at_paren(remaining, '}') items = body.replace('default:', 'case default:').split('case ')[1:] @@ -480,6 +491,8 @@ def dict_item(key, val): break if matched: break + + if md: ret, should_abort = self.interpret_statement(expr, local_vars, allow_recursion) return ret, should_abort or should_return @@ -584,7 +597,7 @@ def dict_item(key, val): member = self.interpret_expression(m.group('member2'), local_vars, allow_recursion) arg_str = expr[m.end():] if arg_str.startswith('('): - arg_str, remaining = self._separate_at_paren(arg_str, ')') + arg_str, remaining = self._separate_at_paren(arg_str) else: arg_str, remaining = None, arg_str @@ -769,7 +782,7 @@ def extract_function_code(self, funcname): \((?P[^)]*)\)\s* (?P{.+})''' % {'name': re.escape(funcname)}, self.code) - code, _ = self._separate_at_paren(func_m.group('code'), '}') + code, _ = self._separate_at_paren(func_m.group('code')) if func_m is None: raise self.Exception(f'Could not find JS function "{funcname}"') return [x.strip() for x in func_m.group('args').split(',')], code @@ -784,7 +797,7 @@ def extract_function_from_code(self, argnames, code, *global_stack): if mobj is None: break start, body_start = mobj.span() - body, remaining = self._separate_at_paren(code[body_start - 1:], '}') + body, remaining = self._separate_at_paren(code[body_start - 1:]) name = self._named_object(local_vars, self.extract_function_from_code( [x.strip() for x in mobj.group('args').split(',')], body, local_vars, *global_stack))