[go,viu] Extract subtitles from the m3u8 manifest (#3219)

Authored by: fstirlitz
This commit is contained in:
Felix S 2022-03-27 09:35:14 +00:00 committed by GitHub
parent 4628a3aa75
commit 47b8bf207b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 6 additions and 5 deletions

View file

@ -217,6 +217,7 @@ def _real_extract(self, url):
title = video_data['title'] title = video_data['title']
formats = [] formats = []
subtitles = {}
for asset in video_data.get('assets', {}).get('asset', []): for asset in video_data.get('assets', {}).get('asset', []):
asset_url = asset.get('value') asset_url = asset.get('value')
if not asset_url: if not asset_url:
@ -256,8 +257,10 @@ def _real_extract(self, url):
error_message = ', '.join([error['message'] for error in errors]) error_message = ', '.join([error['message'] for error in errors])
raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True) raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True)
asset_url += '?' + entitlement['uplynkData']['sessionKey'] asset_url += '?' + entitlement['uplynkData']['sessionKey']
formats.extend(self._extract_m3u8_formats( fmts, subs = self._extract_m3u8_formats_and_subtitles(
asset_url, video_id, 'mp4', m3u8_id=format_id or 'hls', fatal=False)) asset_url, video_id, 'mp4', m3u8_id=format_id or 'hls', fatal=False)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
else: else:
f = { f = {
'format_id': format_id, 'format_id': format_id,
@ -281,7 +284,6 @@ def _real_extract(self, url):
formats.append(f) formats.append(f)
self._sort_formats(formats) self._sort_formats(formats)
subtitles = {}
for cc in video_data.get('closedcaption', {}).get('src', []): for cc in video_data.get('closedcaption', {}).get('src', []):
cc_url = cc.get('value') cc_url = cc.get('value')
if not cc_url: if not cc_url:

View file

@ -88,10 +88,9 @@ def _real_extract(self, url):
# r'(/hlsc_)[a-z]+(\d+\.m3u8)', # r'(/hlsc_)[a-z]+(\d+\.m3u8)',
# r'\1whe\2', video_data['href']) # r'\1whe\2', video_data['href'])
m3u8_url = video_data['href'] m3u8_url = video_data['href']
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4') formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4')
self._sort_formats(formats) self._sort_formats(formats)
subtitles = {}
for key, value in video_data.items(): for key, value in video_data.items():
mobj = re.match(r'^subtitle_(?P<lang>[^_]+)_(?P<ext>(vtt|srt))', key) mobj = re.match(r'^subtitle_(?P<lang>[^_]+)_(?P<ext>(vtt|srt))', key)
if not mobj: if not mobj: