[jsinterp, extractor/youtube] Minor fixes

This commit is contained in:
pukkandan 2022-08-30 17:23:59 +05:30
parent 5135ed3d4a
commit d81ba7d491
No known key found for this signature in database
GPG key ID: 7EEE9E1E817D0A39
5 changed files with 30 additions and 14 deletions

View file

@ -129,6 +129,11 @@ def test_precedence(self):
self.assertEqual(jsi.call_function('x'), [20, 20, 30, 40, 50]) self.assertEqual(jsi.call_function('x'), [20, 20, 30, 40, 50])
def test_builtins(self): def test_builtins(self):
jsi = JSInterpreter('''
function x() { return NaN }
''')
self.assertTrue(math.isnan(jsi.call_function('x')))
jsi = JSInterpreter(''' jsi = JSInterpreter('''
function x() { return new Date('Wednesday 31 December 1969 18:01:26 MDT') - 0; } function x() { return new Date('Wednesday 31 December 1969 18:01:26 MDT') - 0; }
''') ''')

View file

@ -51,15 +51,15 @@ def store(self, section, key, data, dtype='json'):
tb = traceback.format_exc() tb = traceback.format_exc()
self._ydl.report_warning(f'Writing cache to {fn!r} failed: {tb}') self._ydl.report_warning(f'Writing cache to {fn!r} failed: {tb}')
def _validate(self, data, after): def _validate(self, data, min_ver):
version = traverse_obj(data, 'yt-dlp_version') version = traverse_obj(data, 'yt-dlp_version')
if not version: # Backward compatibility if not version: # Backward compatibility
data, version = {'data': data}, '2022.08.19' data, version = {'data': data}, '2022.08.19'
if not after or version_tuple(version) > version_tuple(after): if not min_ver or version_tuple(version) >= version_tuple(min_ver):
return data['data'] return data['data']
self._ydl.write_debug(f'Discarding old cache from version {version} (need {after})') self._ydl.write_debug(f'Discarding old cache from version {version} (needs {min_ver})')
def load(self, section, key, dtype='json', default=None, *, after=None): def load(self, section, key, dtype='json', default=None, *, min_ver=None):
assert dtype in ('json',) assert dtype in ('json',)
if not self.enabled: if not self.enabled:
@ -70,7 +70,7 @@ def load(self, section, key, dtype='json', default=None, *, after=None):
try: try:
with open(cache_fn, encoding='utf-8') as cachef: with open(cache_fn, encoding='utf-8') as cachef:
self._ydl.write_debug(f'Loading {section}.{key} from cache') self._ydl.write_debug(f'Loading {section}.{key} from cache')
return self._validate(json.load(cachef), after) return self._validate(json.load(cachef), min_ver)
except (ValueError, KeyError): except (ValueError, KeyError):
try: try:
file_size = os.path.getsize(cache_fn) file_size = os.path.getsize(cache_fn)

View file

@ -52,6 +52,8 @@ class PhantomJSwrapper:
This class is experimental. This class is experimental.
""" """
INSTALL_HINT = 'Please download it from https://phantomjs.org/download.html'
_BASE_JS = R''' _BASE_JS = R'''
phantom.onError = function(msg, trace) {{ phantom.onError = function(msg, trace) {{
var msgStack = ['PHANTOM ERROR: ' + msg]; var msgStack = ['PHANTOM ERROR: ' + msg];
@ -110,8 +112,7 @@ def __init__(self, extractor, required_version=None, timeout=10000):
self.exe = check_executable('phantomjs', ['-v']) self.exe = check_executable('phantomjs', ['-v'])
if not self.exe: if not self.exe:
raise ExtractorError( raise ExtractorError(f'PhantomJS not found, {self.INSTALL_HINT}', expected=True)
'PhantomJS not found, Please download it from https://phantomjs.org/download.html', expected=True)
self.extractor = extractor self.extractor = extractor
@ -237,6 +238,6 @@ def execute(self, jscode, video_id=None, *, note='Executing JS'):
except Exception as e: except Exception as e:
raise ExtractorError(f'{note} failed: Unable to run PhantomJS binary', cause=e) raise ExtractorError(f'{note} failed: Unable to run PhantomJS binary', cause=e)
if returncode: if returncode:
raise ExtractorError(f'{note} failed:\n{stderr.strip()}') raise ExtractorError(f'{note} failed with returncode {returncode}:\n{stderr.strip()}')
return stdout return stdout

View file

@ -2670,7 +2670,7 @@ def _extract_n_function_name(self, jscode):
def _extract_n_function_code(self, video_id, player_url): def _extract_n_function_code(self, video_id, player_url):
player_id = self._extract_player_info(player_url) player_id = self._extract_player_info(player_url)
func_code = self.cache.load('youtube-nsig', player_id, after='2022.08.19.1') func_code = self.cache.load('youtube-nsig', player_id, min_ver='2022.08.19.2')
jscode = func_code or self._load_player(video_id, player_url) jscode = func_code or self._load_player(video_id, player_url)
jsi = JSInterpreter(jscode) jsi = JSInterpreter(jscode)
@ -3282,7 +3282,8 @@ def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, i
except ExtractorError as e: except ExtractorError as e:
phantomjs_hint = '' phantomjs_hint = ''
if isinstance(e, JSInterpreter.Exception): if isinstance(e, JSInterpreter.Exception):
phantomjs_hint = f' Install {self._downloader._format_err("PhantomJS", self._downloader.Styles.EMPHASIS)} to workaround the issue\n' phantomjs_hint = (f' Install {self._downloader._format_err("PhantomJS", self._downloader.Styles.EMPHASIS)} '
f'to workaround the issue. {PhantomJSwrapper.INSTALL_HINT}\n')
self.report_warning( self.report_warning(
f'nsig extraction failed: You may experience throttling for some formats\n{phantomjs_hint}' f'nsig extraction failed: You may experience throttling for some formats\n{phantomjs_hint}'
f' n = {query["n"][0]} ; player = {player_url}', video_id=video_id, only_once=True) f' n = {query["n"][0]} ; player = {player_url}', video_id=video_id, only_once=True)

View file

@ -172,7 +172,14 @@ def wrap_interpreter(cls, f):
def interpret_statement(self, stmt, local_vars, allow_recursion, *args, **kwargs): def interpret_statement(self, stmt, local_vars, allow_recursion, *args, **kwargs):
if cls.ENABLED and stmt.strip(): if cls.ENABLED and stmt.strip():
cls.write(stmt, level=allow_recursion) cls.write(stmt, level=allow_recursion)
ret, should_ret = f(self, stmt, local_vars, allow_recursion, *args, **kwargs) try:
ret, should_ret = f(self, stmt, local_vars, allow_recursion, *args, **kwargs)
except Exception as e:
if cls.ENABLED:
if isinstance(e, ExtractorError):
e = e.orig_msg
cls.write('=> Raises:', e, '<-|', stmt, level=allow_recursion)
raise
if cls.ENABLED and stmt.strip(): if cls.ENABLED and stmt.strip():
cls.write(['->', '=>'][should_ret], repr(ret), '<-|', stmt, level=allow_recursion) cls.write(['->', '=>'][should_ret], repr(ret), '<-|', stmt, level=allow_recursion)
return ret, should_ret return ret, should_ret
@ -226,7 +233,7 @@ def _regex_flags(cls, expr):
@staticmethod @staticmethod
def _separate(expr, delim=',', max_split=None): def _separate(expr, delim=',', max_split=None):
OP_CHARS = '+-*/%&|^=<>!,;{}()[]:' OP_CHARS = '+-*/%&|^=<>!,;{}:'
if not expr: if not expr:
return return
counters = {k: 0 for k in _MATCHING_PARENS.values()} counters = {k: 0 for k in _MATCHING_PARENS.values()}
@ -504,7 +511,7 @@ def dict_item(key, val):
(?P<op>{"|".join(map(re.escape, set(_OPERATORS) - _COMP_OPERATORS))})? (?P<op>{"|".join(map(re.escape, set(_OPERATORS) - _COMP_OPERATORS))})?
=(?!=)(?P<expr>.*)$ =(?!=)(?P<expr>.*)$
)|(?P<return> )|(?P<return>
(?!if|return|true|false|null|undefined)(?P<name>{_NAME_RE})$ (?!if|return|true|false|null|undefined|NaN)(?P<name>{_NAME_RE})$
)|(?P<indexing> )|(?P<indexing>
(?P<in>{_NAME_RE})\[(?P<idx>.+)\]$ (?P<in>{_NAME_RE})\[(?P<idx>.+)\]$
)|(?P<attribute> )|(?P<attribute>
@ -539,6 +546,8 @@ def dict_item(key, val):
raise JS_Continue() raise JS_Continue()
elif expr == 'undefined': elif expr == 'undefined':
return JS_Undefined, should_return return JS_Undefined, should_return
elif expr == 'NaN':
return float('NaN'), should_return
elif m and m.group('return'): elif m and m.group('return'):
return local_vars.get(m.group('name'), JS_Undefined), should_return return local_vars.get(m.group('name'), JS_Undefined), should_return
@ -784,7 +793,7 @@ def resf(args, kwargs={}, allow_recursion=100):
global_stack[0].update(itertools.zip_longest(argnames, args, fillvalue=None)) global_stack[0].update(itertools.zip_longest(argnames, args, fillvalue=None))
global_stack[0].update(kwargs) global_stack[0].update(kwargs)
var_stack = LocalNameSpace(*global_stack) var_stack = LocalNameSpace(*global_stack)
ret, should_abort = self.interpret_statement(code.replace('\n', ''), var_stack, allow_recursion - 1) ret, should_abort = self.interpret_statement(code.replace('\n', ' '), var_stack, allow_recursion - 1)
if should_abort: if should_abort:
return ret return ret
return resf return resf