Merge remote-tracking branch 'aajanki/wdr_live'

This commit is contained in:
Philipp Hagemeister 2015-02-26 01:34:01 +01:00
commit feccc3ff37
3 changed files with 155 additions and 51 deletions

View file

@ -11,6 +11,7 @@
from .http import HttpFD
from ..compat import (
compat_urlparse,
compat_urllib_error,
)
from ..utils import (
struct_pack,
@ -121,7 +122,8 @@ def read_abst(self):
self.read_unsigned_int() # BootstrapinfoVersion
# Profile,Live,Update,Reserved
self.read(1)
flags = self.read_unsigned_char()
live = flags & 0x20 != 0
# time scale
self.read_unsigned_int()
# CurrentMediaTime
@ -160,6 +162,7 @@ def read_abst(self):
return {
'segments': segments,
'fragments': fragments,
'live': live,
}
def read_bootstrap_info(self):
@ -182,6 +185,10 @@ def build_fragments_list(boot_info):
for segment, fragments_count in segment_run_table['segment_run']:
for _ in range(fragments_count):
res.append((segment, next(fragments_counter)))
if boot_info['live']:
res = res[-2:]
return res
@ -246,6 +253,38 @@ def _get_unencrypted_media(self, doc):
self.report_error('Unsupported DRM')
return media
def _get_bootstrap_from_url(self, bootstrap_url):
bootstrap = self.ydl.urlopen(bootstrap_url).read()
return read_bootstrap_info(bootstrap)
def _update_live_fragments(self, bootstrap_url, latest_fragment):
fragments_list = []
retries = 30
while (not fragments_list) and (retries > 0):
boot_info = self._get_bootstrap_from_url(bootstrap_url)
fragments_list = build_fragments_list(boot_info)
fragments_list = [f for f in fragments_list if f[1] > latest_fragment]
if not fragments_list:
# Retry after a while
time.sleep(5.0)
retries -= 1
if not fragments_list:
self.report_error('Failed to update fragments')
return fragments_list
def _parse_bootstrap_node(self, node, base_url):
if node.text is None:
bootstrap_url = compat_urlparse.urljoin(
base_url, node.attrib['url'])
boot_info = self._get_bootstrap_from_url(bootstrap_url)
else:
bootstrap_url = None
bootstrap = base64.b64decode(node.text)
boot_info = read_bootstrap_info(bootstrap)
return (boot_info, bootstrap_url)
def real_download(self, filename, info_dict):
man_url = info_dict['url']
requested_bitrate = info_dict.get('tbr')
@ -265,18 +304,13 @@ def real_download(self, filename, info_dict):
base_url = compat_urlparse.urljoin(man_url, media.attrib['url'])
bootstrap_node = doc.find(_add_ns('bootstrapInfo'))
if bootstrap_node.text is None:
bootstrap_url = compat_urlparse.urljoin(
base_url, bootstrap_node.attrib['url'])
bootstrap = self.ydl.urlopen(bootstrap_url).read()
else:
bootstrap = base64.b64decode(bootstrap_node.text)
boot_info, bootstrap_url = self._parse_bootstrap_node(bootstrap_node, base_url)
live = boot_info['live']
metadata_node = media.find(_add_ns('metadata'))
if metadata_node is not None:
metadata = base64.b64decode(metadata_node.text)
else:
metadata = None
boot_info = read_bootstrap_info(bootstrap)
fragments_list = build_fragments_list(boot_info)
if self.params.get('test', False):
@ -301,7 +335,8 @@ def real_download(self, filename, info_dict):
(dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb')
write_flv_header(dest_stream)
write_metadata_tag(dest_stream, metadata)
if not live:
write_metadata_tag(dest_stream, metadata)
# This dict stores the download progress, it's updated by the progress
# hook
@ -348,24 +383,45 @@ def frag_progress_hook(s):
http_dl.add_progress_hook(frag_progress_hook)
frags_filenames = []
for (seg_i, frag_i) in fragments_list:
while fragments_list:
seg_i, frag_i = fragments_list.pop(0)
name = 'Seg%d-Frag%d' % (seg_i, frag_i)
url = base_url + name
if akamai_pv:
url += '?' + akamai_pv.strip(';')
frag_filename = '%s-%s' % (tmpfilename, name)
success = http_dl.download(frag_filename, {'url': url})
if not success:
return False
with open(frag_filename, 'rb') as down:
down_data = down.read()
reader = FlvReader(down_data)
while True:
_, box_type, box_data = reader.read_box_info()
if box_type == b'mdat':
dest_stream.write(box_data)
break
frags_filenames.append(frag_filename)
try:
success = http_dl.download(frag_filename, {'url': url})
if not success:
return False
with open(frag_filename, 'rb') as down:
down_data = down.read()
reader = FlvReader(down_data)
while True:
_, box_type, box_data = reader.read_box_info()
if box_type == b'mdat':
dest_stream.write(box_data)
break
if live:
os.remove(frag_filename)
else:
frags_filenames.append(frag_filename)
except (compat_urllib_error.HTTPError, ) as err:
if live and (err.code == 404 or err.code == 410):
# We didn't keep up with the live window. Continue
# with the next available fragment.
msg = 'Fragment %d unavailable' % frag_i
self.report_warning(msg)
fragments_list = []
else:
raise
if not fragments_list and live and bootstrap_url:
fragments_list = self._update_live_fragments(bootstrap_url, frag_i)
total_frags += len(fragments_list)
if fragments_list and (fragments_list[0][1] > frag_i + 1):
msg = 'Missed %d fragments' % (fragments_list[0][1] - (frag_i + 1))
self.report_warning(msg)
dest_stream.close()

View file

@ -921,39 +921,57 @@ def _extract_smil_formats(self, smil_url, video_id, fatal=True):
formats = []
rtmp_count = 0
for video in smil.findall('./body/switch/video'):
src = video.get('src')
if not src:
continue
bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
width = int_or_none(video.get('width'))
height = int_or_none(video.get('height'))
proto = video.get('proto')
if not proto:
if base:
if base.startswith('rtmp'):
proto = 'rtmp'
elif base.startswith('http'):
proto = 'http'
ext = video.get('ext')
if proto == 'm3u8':
formats.extend(self._extract_m3u8_formats(src, video_id, ext))
elif proto == 'rtmp':
rtmp_count += 1
streamer = video.get('streamer') or base
formats.append({
'url': streamer,
'play_path': src,
'ext': 'flv',
'format_id': 'rtmp-%d' % (rtmp_count if bitrate is None else bitrate),
'tbr': bitrate,
'width': width,
'height': height,
})
if smil.findall('./body/seq/video'):
video = smil.findall('./body/seq/video')[0]
fmts, rtmp_count = self._parse_smil_video(video, base, rtmp_count)
formats.extend(fmts)
else:
for video in smil.findall('./body/switch/video'):
fmts, rtmp_count = self._parse_smil_video(video, base, rtmp_count)
formats.extend(fmts)
self._sort_formats(formats)
return formats
def _parse_smil_video(self, video, base, rtmp_count):
src = video.get('src')
if not src:
return ([], rtmp_count)
bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
width = int_or_none(video.get('width'))
height = int_or_none(video.get('height'))
proto = video.get('proto')
if not proto:
if base:
if base.startswith('rtmp'):
proto = 'rtmp'
elif base.startswith('http'):
proto = 'http'
ext = video.get('ext')
if proto == 'm3u8':
return (self._extract_m3u8_formats(src, video_id, ext), rtmp_count)
elif proto == 'rtmp':
rtmp_count += 1
streamer = video.get('streamer') or base
return ([{
'url': streamer,
'play_path': src,
'ext': 'flv',
'format_id': 'rtmp-%d' % (rtmp_count if bitrate is None else bitrate),
'tbr': bitrate,
'width': width,
'height': height,
}], rtmp_count)
elif proto.startswith('http'):
return ([{
'url': base + src,
'ext': ext or 'flv',
'tbr': bitrate,
'width': width,
'height': height,
}], rtmp_count)
def _live_title(self, name):
""" Generate the title for a live video """
now = datetime.datetime.now()

View file

@ -28,6 +28,7 @@ class WDRIE(InfoExtractor):
'title': 'Servicezeit',
'description': 'md5:c8f43e5e815eeb54d0b96df2fba906cb',
'upload_date': '20140310',
'is_live': False
},
'params': {
'skip_download': True,
@ -41,6 +42,7 @@ class WDRIE(InfoExtractor):
'title': 'Marga Spiegel ist tot',
'description': 'md5:2309992a6716c347891c045be50992e4',
'upload_date': '20140311',
'is_live': False
},
'params': {
'skip_download': True,
@ -55,6 +57,7 @@ class WDRIE(InfoExtractor):
'title': 'Erlebte Geschichten: Marga Spiegel (29.11.2009)',
'description': 'md5:2309992a6716c347891c045be50992e4',
'upload_date': '20091129',
'is_live': False
},
},
{
@ -66,6 +69,7 @@ class WDRIE(InfoExtractor):
'title': 'Flavia Coelho: Amar é Amar',
'description': 'md5:7b29e97e10dfb6e265238b32fa35b23a',
'upload_date': '20140717',
'is_live': False
},
},
{
@ -74,6 +78,20 @@ class WDRIE(InfoExtractor):
'info_dict': {
'id': 'mediathek/video/sendungen/quarks_und_co/filterseite-quarks-und-co100',
}
},
{
'url': 'http://www1.wdr.de/mediathek/video/livestream/index.html',
'info_dict': {
'id': 'mdb-103364',
'title': 're:^WDR Fernsehen [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
'description': 'md5:ae2ff888510623bf8d4b115f95a9b7c9',
'ext': 'flv',
'upload_date': '20150212',
'is_live': True
},
'params': {
'skip_download': True,
},
}
]
@ -119,6 +137,10 @@ def _real_extract(self, url):
video_url = flashvars['dslSrc'][0]
title = flashvars['trackerClipTitle'][0]
thumbnail = flashvars['startPicture'][0] if 'startPicture' in flashvars else None
is_live = flashvars.get('isLive', ['0'])[0] == '1'
if is_live:
title = self._live_title(title)
if 'trackerClipAirTime' in flashvars:
upload_date = flashvars['trackerClipAirTime'][0]
@ -131,6 +153,13 @@ def _real_extract(self, url):
if video_url.endswith('.f4m'):
video_url += '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18'
ext = 'flv'
elif video_url.endswith('.smil'):
fmt = self._extract_smil_formats(video_url, page_id)[0]
video_url = fmt['url']
sep = '&' if '?' in video_url else '?'
video_url += sep
video_url += 'hdcore=3.3.0&plugin=aasp-3.3.0.99.43'
ext = fmt['ext']
else:
ext = determine_ext(video_url)
@ -144,6 +173,7 @@ def _real_extract(self, url):
'description': description,
'thumbnail': thumbnail,
'upload_date': upload_date,
'is_live': is_live
}