[ie/facebook] Improve extraction

Partially addresses #4311

Authored by: jingtra, ringus1

Co-authored-by: Jing Kjeldsen <jingtra@gmail.com>
This commit is contained in:
ringus1 2024-02-15 13:46:57 -06:00 committed by bashonly
parent beaa1a4455
commit 2e30b5567b
No known key found for this signature in database
GPG key ID: 783F096F253D15B0

View file

@ -500,6 +500,7 @@ def extract_metadata(webpage):
webpage, 'description', default=None) webpage, 'description', default=None)
uploader_data = ( uploader_data = (
get_first(media, ('owner', {dict})) get_first(media, ('owner', {dict}))
or get_first(post, ('video', 'creation_story', 'attachments', ..., 'media', lambda k, v: k == 'owner' and v['name']))
or get_first(post, (..., 'video', lambda k, v: k == 'owner' and v['name'])) or get_first(post, (..., 'video', lambda k, v: k == 'owner' and v['name']))
or get_first(post, ('node', 'actors', ..., {dict})) or get_first(post, ('node', 'actors', ..., {dict}))
or get_first(post, ('event', 'event_creator', {dict})) or {}) or get_first(post, ('event', 'event_creator', {dict})) or {})
@ -583,8 +584,8 @@ def extract_relay_data(_filter):
def extract_relay_prefetched_data(_filter): def extract_relay_prefetched_data(_filter):
return traverse_obj(extract_relay_data(_filter), ( return traverse_obj(extract_relay_data(_filter), (
'require', (None, (..., ..., ..., '__bbox', 'require')), 'require', (None, (..., ..., ..., '__bbox', 'require')),
lambda _, v: 'RelayPrefetchedStreamCache' in v, ..., ..., lambda _, v: any(key.startswith('RelayPrefetchedStreamCache') for key in v),
'__bbox', 'result', 'data', {dict}), get_all=False) or {} ..., ..., '__bbox', 'result', 'data', {dict}), get_all=False) or {}
if not video_data: if not video_data:
server_js_data = self._parse_json(self._search_regex([ server_js_data = self._parse_json(self._search_regex([