[livestream] Parse SMIL (#2713)

This commit is contained in:
Philipp Hagemeister 2014-07-25 11:39:17 +02:00
parent b490b8849a
commit cbf915f3f6
2 changed files with 64 additions and 10 deletions

View file

@ -5,11 +5,14 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
compat_str,
compat_urllib_parse_urlparse, compat_urllib_parse_urlparse,
compat_urlparse, compat_urlparse,
xpath_with_ns, ExtractorError,
compat_str, find_xpath_attr,
int_or_none,
orderedSet, orderedSet,
xpath_with_ns,
) )
@ -24,20 +27,71 @@ class LivestreamIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'Live from Webster Hall NYC', 'title': 'Live from Webster Hall NYC',
'upload_date': '20121012', 'upload_date': '20121012',
'like_count': int,
'view_count': int,
'thumbnail': 're:^http://.*\.jpg$'
} }
} }
def _extract_video_info(self, video_data): def _extract_video_info(self, video_data):
video_url = ( video_id = compat_str(video_data['id'])
video_data.get('progressive_url_hd') or
video_data.get('progressive_url') FORMAT_KEYS = (
('sd', 'progressive_url'),
('hd', 'progressive_url_hd'),
) )
formats = [{
'format_id': format_id,
'url': video_data[key],
'quality': i + 1,
} for i, (format_id, key) in enumerate(FORMAT_KEYS)
if video_data.get(key)]
smil_url = video_data.get('smil_url')
if smil_url:
_SWITCH_XPATH = (
'.//{http://www.w3.org/2001/SMIL20/Language}body/'
'{http://www.w3.org/2001/SMIL20/Language}switch')
smil_doc = self._download_xml(
smil_url, video_id, note='Downloading SMIL information')
title_node = find_xpath_attr(
smil_doc, './/{http://www.w3.org/2001/SMIL20/Language}meta',
'name', 'title')
if title_node is None:
self.report_warning('Cannot find SMIL id')
switch_node = smil_doc.find(_SWITCH_XPATH)
else:
title_id = title_node.attrib['content']
switch_node = find_xpath_attr(
smil_doc, _SWITCH_XPATH, 'id', title_id)
if switch_node is None:
raise ExtractorError('Cannot find switch node')
video_nodes = switch_node.findall(
'{http://www.w3.org/2001/SMIL20/Language}video')
for vn in video_nodes:
tbr = int_or_none(vn.attrib.get('system-bitrate'))
furl = (
'http://livestream-f.akamaihd.net/%s?v=3.0.3&fp=WIN%%2014,0,0,145&seek=%s' %
(vn.attrib['src'], vn.attrib['clipBegin']))
formats.append({
'url': furl,
'format_id': 'smil_%d' % tbr,
'ext': 'flv',
'tbr': tbr,
'preference': -1000,
})
self._sort_formats(formats)
return { return {
'id': compat_str(video_data['id']), 'id': video_id,
'url': video_url, 'formats': formats,
'title': video_data['caption'], 'title': video_data['caption'],
'thumbnail': video_data['thumbnail_url'], 'thumbnail': video_data.get('thumbnail_url'),
'upload_date': video_data['updated_at'].replace('-', '')[:8], 'upload_date': video_data['updated_at'].replace('-', '')[:8],
'like_count': video_data.get('likes', {}).get('total'),
'view_count': video_data.get('views'),
} }
def _real_extract(self, url): def _real_extract(self, url):

View file

@ -242,8 +242,8 @@ def write_json_file(obj, fn):
if sys.version_info >= (2,7): if sys.version_info >= (2,7):
def find_xpath_attr(node, xpath, key, val): def find_xpath_attr(node, xpath, key, val):
""" Find the xpath xpath[@key=val] """ """ Find the xpath xpath[@key=val] """
assert re.match(r'^[a-zA-Z]+$', key) assert re.match(r'^[a-zA-Z-]+$', key)
assert re.match(r'^[a-zA-Z0-9@\s:._]*$', val) assert re.match(r'^[a-zA-Z0-9@\s:._-]*$', val)
expr = xpath + u"[@%s='%s']" % (key, val) expr = xpath + u"[@%s='%s']" % (key, val)
return node.find(expr) return node.find(expr)
else: else: