[cbsnews] Extract subtitles

added test function for CBS News subtitles
This commit is contained in:
John Assael 2016-01-21 19:51:48 +00:00 committed by Sergey M․
parent 32d77eeb04
commit 4118cc02c1

View file

@ -41,6 +41,21 @@ class CBSNewsIE(InfoExtractor):
'skip_download': True,
},
},
{
'url': 'http://www.cbsnews.com/videos/mountain-lions-of-l-a/',
'info_dict': {
'id': 'Mountain Lions of L.A.',
'ext': 'flv',
'title': 'Fort Hood shooting: Army downplays mental illness as cause of attack',
'thumbnail': 're:^http?://.*\.jpg$',
'subtitles': 're:^http?://.*\.xml$',
'duration': 787,
},
'params': {
# rtmp download
'skip_download': True,
},
},
]
def _real_extract(self, url):
@ -85,10 +100,21 @@ def _real_extract(self, url):
fmt['ext'] = 'mp4'
formats.append(fmt)
if 'mpxRefId' in video_info:
cap_url = 'http://www.cbsnews.com/videos/captions/%s.adb_xml' % video_info['mpxRefId']
subtitles = {
'en': [{
'url': cap_url,
'ext': 'xml'
}], }
else:
subtitles = {}
return {
'id': video_id,
'title': title,
'thumbnail': thumbnail,
'duration': duration,
'formats': formats,
'subtitles': subtitles,
}