#70 Allow downloading of unplayable video formats

Video postprocessors are also turned off when this option is used

Co-authored-by: shirtjs <2660574+shirtjs@users.noreply.github.com>
Co-authored-by: pukkandan <pukkandan@gmail.com>
This commit is contained in:
shirt-dev 2021-02-11 22:51:59 -05:00 committed by GitHub
parent 584bab3766
commit 63ad4d43eb
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
29 changed files with 106 additions and 47 deletions

View file

@ -537,6 +537,11 @@ ## Video Format Options:
bestvideo+bestaudio), output to given
container format. One of mkv, mp4, ogg,
webm, flv. Ignored if no merge is required
--allow-unplayable-formats Allow unplayable formats to be listed and
downloaded. All video postprocessing will
also be turned off
--no-allow-unplayable-formats Do not allow unplayable formats to be
listed or downloaded (default)
## Subtitle Options:
--write-subs Write subtitle file

View file

@ -179,6 +179,7 @@ class YoutubeDL(object):
of 'skip_download' or 'simulate'.
simulate: Do not download the video files.
format: Video format code. see "FORMAT SELECTION" for more details.
allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
format_sort: How to sort the video formats. see "Sorting Formats"
for more details.
format_sort_force: Force the given format_sort. see "Sorting Formats"
@ -2291,10 +2292,15 @@ def existing_file(*filepaths):
if info_dict.get('requested_formats') is not None:
downloaded = []
merger = FFmpegMergerPP(self)
if not merger.available:
self.report_warning('You have requested multiple '
'formats but ffmpeg is not installed.'
' The formats won\'t be merged.')
if self.params.get('allow_unplayable_formats'):
self.report_warning(
'You have requested merging of multiple formats '
'while also allowing unplayable formats to be downloaded. '
'The formats won\'t be merged to prevent data corruption.')
elif not merger.available:
self.report_warning(
'You have requested merging of multiple formats but ffmpeg is not installed. '
'The formats won\'t be merged.')
def compatible_formats(formats):
# TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
@ -2346,7 +2352,7 @@ def correct_ext(filename):
downloaded.append(fname)
partial_success, real_download = dl(fname, new_info)
success = success and partial_success
if merger.available:
if merger.available and not self.params.get('allow_unplayable_formats'):
info_dict['__postprocessors'].append(merger)
info_dict['__files_to_merge'] = downloaded
# Even if there were no downloads, it is being merged only now

View file

@ -212,9 +212,6 @@ def parse_retries(retries):
if opts.recodevideo is not None:
if opts.recodevideo not in REMUX_EXTENSIONS:
parser.error('invalid video recode format specified')
if opts.remuxvideo and opts.recodevideo:
opts.remuxvideo = None
write_string('WARNING: --remux-video is ignored since --recode-video was given\n', out=sys.stderr)
if opts.remuxvideo is not None:
opts.remuxvideo = opts.remuxvideo.replace(' ', '')
remux_regex = r'{0}(?:/{0})*$'.format(r'(?:\w+>)?(?:%s)' % '|'.join(REMUX_EXTENSIONS))
@ -265,6 +262,40 @@ def parse_retries(retries):
any_printing = opts.print_json
download_archive_fn = expand_path(opts.download_archive) if opts.download_archive is not None else opts.download_archive
def report_conflict(arg1, arg2):
write_string('WARNING: %s is ignored since %s was given\n' % (arg2, arg1), out=sys.stderr)
if opts.remuxvideo and opts.recodevideo:
report_conflict('--recode-video', '--remux-video')
opts.remuxvideo = False
if opts.allow_unplayable_formats:
if opts.extractaudio:
report_conflict('--allow-unplayable-formats', '--extract-audio')
opts.extractaudio = False
if opts.remuxvideo:
report_conflict('--allow-unplayable-formats', '--remux-video')
opts.remuxvideo = False
if opts.recodevideo:
report_conflict('--allow-unplayable-formats', '--recode-video')
opts.recodevideo = False
if opts.addmetadata:
report_conflict('--allow-unplayable-formats', '--add-metadata')
opts.addmetadata = False
if opts.embedsubtitles:
report_conflict('--allow-unplayable-formats', '--embed-subs')
opts.embedsubtitles = False
if opts.embedthumbnail:
report_conflict('--allow-unplayable-formats', '--embed-thumbnail')
opts.embedthumbnail = False
if opts.xattrs:
report_conflict('--allow-unplayable-formats', '--xattrs')
opts.xattrs = False
if opts.fixup and opts.fixup.lower() not in ('never', 'ignore'):
report_conflict('--allow-unplayable-formats', '--fixup')
opts.fixup = 'never'
if opts.sponskrub:
report_conflict('--allow-unplayable-formats', '--sponskrub')
opts.sponskrub = False
# PostProcessors
postprocessors = []
if opts.metafromfield:
@ -393,6 +424,7 @@ def parse_retries(retries):
'simulate': opts.simulate or any_getting,
'skip_download': opts.skip_download,
'format': opts.format,
'allow_unplayable_formats': opts.allow_unplayable_formats,
'format_sort': opts.format_sort,
'format_sort_force': opts.format_sort_force,
'allow_multiple_video_streams': opts.allow_multiple_video_streams,

View file

@ -267,13 +267,14 @@ def _get_unencrypted_media(self, doc):
media = doc.findall(_add_ns('media'))
if not media:
self.report_error('No media found')
for e in (doc.findall(_add_ns('drmAdditionalHeader'))
+ doc.findall(_add_ns('drmAdditionalHeaderSet'))):
# If id attribute is missing it's valid for all media nodes
# without drmAdditionalHeaderId or drmAdditionalHeaderSetId attribute
if 'id' not in e.attrib:
self.report_error('Missing ID in f4m DRM')
media = remove_encrypted_media(media)
if not self.params.get('allow_unplayable_formats'):
for e in (doc.findall(_add_ns('drmAdditionalHeader'))
+ doc.findall(_add_ns('drmAdditionalHeaderSet'))):
# If id attribute is missing it's valid for all media nodes
# without drmAdditionalHeaderId or drmAdditionalHeaderSetId attribute
if 'id' not in e.attrib:
self.report_error('Missing ID in f4m DRM')
media = remove_encrypted_media(media)
if not media:
self.report_error('Unsupported DRM')
return media

View file

@ -29,9 +29,8 @@ class HlsFD(FragmentFD):
FD_NAME = 'hlsnative'
@staticmethod
def can_download(manifest, info_dict):
UNSUPPORTED_FEATURES = (
r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1]
def can_download(manifest, info_dict, allow_unplayable_formats=False):
UNSUPPORTED_FEATURES = [
# r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2]
# Live streams heuristic does not always work (e.g. geo restricted to Germany
@ -50,7 +49,11 @@ def can_download(manifest, info_dict):
# 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.2
# 4. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.5
# 5. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.5
)
]
if not allow_unplayable_formats:
UNSUPPORTED_FEATURES += [
r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1]
]
check_results = [not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES]
is_aes128_enc = '#EXT-X-KEY:METHOD=AES-128' in manifest
check_results.append(can_decrypt_frag or not is_aes128_enc)
@ -66,7 +69,7 @@ def real_download(self, filename, info_dict):
man_url = urlh.geturl()
s = urlh.read().decode('utf-8', 'ignore')
if not self.can_download(s, info_dict):
if not self.can_download(s, info_dict, self.params.get('allow_unplayable_formats')):
if info_dict.get('extra_param_to_segment_url') or info_dict.get('_decryption_key_url'):
self.report_error('pycrypto not found. Please install it.')
return False

View file

@ -479,10 +479,10 @@ def _parse_brightcove_metadata(self, json_data, video_id, headers={}):
ext = mimetype2ext(source.get('type'))
src = source.get('src')
# https://support.brightcove.com/playback-api-video-fields-reference#key_systems_object
if container == 'WVM' or source.get('key_systems'):
if not self._downloader.params.get('allow_unplayable_formats') and (container == 'WVM' or source.get('key_systems')):
num_drm_sources += 1
continue
elif ext == 'ism':
elif ext == 'ism' and self._downloader.params.get('allow_unplayable_formats'):
continue
elif ext == 'm3u8' or container == 'M2TS':
if not src:
@ -546,7 +546,7 @@ def build_format_id(kind):
error = errors[0]
raise ExtractorError(
error.get('message') or error.get('error_subcode') or error['error_code'], expected=True)
if sources and num_drm_sources == len(sources):
if not self._downloader.params.get('allow_unplayable_formats') and sources and num_drm_sources == len(sources):
raise ExtractorError('This video is DRM protected.', expected=True)
self._sort_formats(formats)

View file

@ -147,7 +147,7 @@ def _real_extract(self, url):
is_live = item.get('type') == 'LIVE'
formats = []
for format_id, stream_url in item.get('streamUrls', {}).items():
if 'drmOnly=true' in stream_url:
if not self._downloader.params.get('allow_unplayable_formats') and 'drmOnly=true' in stream_url:
continue
if 'playerType=flash' in stream_url:
stream_formats = self._extract_m3u8_formats(

View file

@ -2358,6 +2358,8 @@ def extract_Initialization(source):
extract_Initialization(segment_template)
return ms_info
allow_unplayable_formats = self._downloader.params.get('allow_unplayable_formats')
mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration'))
formats = []
for period in mpd_doc.findall(_add_ns('Period')):
@ -2367,11 +2369,11 @@ def extract_Initialization(source):
'timescale': 1,
})
for adaptation_set in period.findall(_add_ns('AdaptationSet')):
if is_drm_protected(adaptation_set):
if is_drm_protected(adaptation_set) and allow_unplayable_formats is False:
continue
adaption_set_ms_info = extract_multisegment_info(adaptation_set, period_ms_info)
for representation in adaptation_set.findall(_add_ns('Representation')):
if is_drm_protected(representation):
if is_drm_protected(representation) and allow_unplayable_formats is False:
continue
representation_attrib = adaptation_set.attrib.copy()
representation_attrib.update(representation.attrib)
@ -2585,7 +2587,7 @@ def _parse_ism_formats(self, ism_doc, ism_url, ism_id=None):
1. [MS-SSTR]: Smooth Streaming Protocol,
https://msdn.microsoft.com/en-us/library/ff469518.aspx
"""
if ism_doc.get('IsLive') == 'TRUE' or ism_doc.find('Protection') is not None:
if ism_doc.get('IsLive') == 'TRUE' or (ism_doc.find('Protection') is not None and not self._downloader.params.get('allow_unplayable_formats')):
return []
duration = int(ism_doc.attrib['Duration'])

View file

@ -103,7 +103,7 @@ def _real_extract(self, url):
formats = []
for e in media['MediaURLs']:
if e.get('UseDRM') is True:
if not self._downloader.params.get('allow_unplayable_formats') and e.get('UseDRM') is True:
continue
format_url = url_or_none(e.get('Path'))
if not format_url:

View file

@ -96,7 +96,7 @@ def _real_extract(self, url):
video = self._download_json(
'http://api.globovideos.com/videos/%s/playlist' % video_id,
video_id)['videos'][0]
if video.get('encrypted') is True:
if not self._downloader.params.get('allow_unplayable_formats') and video.get('encrypted') is True:
raise ExtractorError('This video is DRM protected.', expected=True)
title = video['title']

View file

@ -141,7 +141,7 @@ def _real_extract(self, url):
title = video_data['title']
if video_data.get('drmProtected'):
if not self._downloader.params.get('allow_unplayable_formats') and video_data.get('drmProtected'):
raise ExtractorError('This video is DRM protected.', expected=True)
headers = {'Referer': url}

View file

@ -163,7 +163,7 @@ def _real_extract(self, url):
for f in result.get('files', []):
f_url = f.get('url')
content_format = f.get('content_format')
if not f_url or '-MDRM-' in content_format or '-FPS-' in content_format:
if not f_url or (not self._downloader.params.get('allow_unplayable_formats') and ('-MDRM-' in content_format or '-FPS-' in content_format)):
continue
formats.append({
'url': f_url,

View file

@ -309,7 +309,7 @@ def sign_url(unsigned_url):
if f.get('fileExt') == 'chun':
continue
# DRM-protected video, cannot be decrypted
if f.get('fileExt') == 'wvm':
if not self._downloader.params.get('allow_unplayable_formats') and f.get('fileExt') == 'wvm':
continue
if not f.get('fileExt'):
# QT indicates QuickTime; some videos have broken fileExt

View file

@ -96,7 +96,7 @@ def _extract_info(self, pc, mobile, i, referer):
urls = []
for stream in pc_item.get('streams', []):
stream_url = stream.get('url')
if not stream_url or stream.get('drmProtected') or stream_url in urls:
if not stream_url or (not self._downloader.params.get('allow_unplayable_formats') and stream.get('drmProtected')) or stream_url in urls:
continue
urls.append(stream_url)
ext = determine_ext(stream_url)

View file

@ -36,7 +36,7 @@ def _real_extract(self, url):
'$include': '[HasClosedCaptions]',
})
if try_get(content_package, lambda x: x['Constraints']['Security']['Type']):
if not self._downloader.params.get('allow_unplayable_formats') and try_get(content_package, lambda x: x['Constraints']['Security']['Type']):
raise ExtractorError('This video is DRM protected.', expected=True)
manifest_base_url = content_package_url + 'manifest.'

View file

@ -66,7 +66,7 @@ def _real_extract(self, url):
video_data = common_data['video']
if video_data.get('drm'):
if not self._downloader.params.get('allow_unplayable_formats') and video_data.get('drm'):
raise ExtractorError('This video is DRM protected.', expected=True)
brightcove_id = video_data.get('brightcoveId') or 'ref:' + video_data['referenceId']

View file

@ -246,7 +246,7 @@ def _get_info(self, url, video_id):
})
if not formats:
if drm:
if not self._downloader.params.get('allow_unplayable_formats') and drm:
raise ExtractorError('This video is DRM protected.', expected=True)
return

View file

@ -34,7 +34,7 @@ def _extract_video_info(self, url, clip_id):
'ids': clip_id,
})[0]
if video.get('is_protected') is True:
if not self._downloader.params.get('allow_unplayable_formats') and video.get('is_protected') is True:
raise ExtractorError('This video is DRM protected.', expected=True)
formats = []

View file

@ -125,7 +125,7 @@ def _real_extract(self, url):
})
mpd_url = data.get('urlDash')
if not data.get('drm') and mpd_url:
if (not self._downloader.params.get('allow_unplayable_formats') and not data.get('drm')) and mpd_url:
formats.extend(self._extract_mpd_formats(
mpd_url, media_id, mpd_id='dash', fatal=False))

View file

@ -201,7 +201,7 @@ def pv(name):
if not formats:
drm = xpath_text(video_xml, './Clip/DRM', default=None)
if drm:
if not self._downloader.params.get('allow_unplayable_formats') and drm:
raise ExtractorError('This video is DRM protected.', expected=True)
ns_st_cds = pv('ns_st_cds')
if ns_st_cds != 'free':

View file

@ -111,7 +111,7 @@ def _real_extract(self, url):
playout = self._call_api(
'playout/url/' + video_id, video_id)['playout']
if playout.get('drm'):
if not self._downloader.params.get('allow_unplayable_formats') and playout.get('drm'):
raise ExtractorError('This video is DRM protected.', expected=True)
formats = self._extract_m3u8_formats(playout['url'], video_id, 'mp4')

View file

@ -75,7 +75,7 @@ def _real_extract(self, url):
video_id = self._match_id(url)
content = self._call_api(
'1.5', 'IN/CONTENT/VIDEOURL/VOD/' + video_id, video_id)
if content.get('isEncrypted'):
if not self._downloader.params.get('allow_unplayable_formats') and content.get('isEncrypted'):
raise ExtractorError('This video is DRM protected.', expected=True)
dash_url = content['videoURL']
headers = {

View file

@ -154,7 +154,7 @@ def _real_extract(self, url):
})
if not formats:
for meta in (info.get('Metas') or []):
if meta.get('Key') == 'Encryption' and meta.get('Value') == '1':
if not self._downloader.params.get('allow_unplayable_formats') and meta.get('Key') == 'Encryption' and meta.get('Value') == '1':
raise ExtractorError(
'This video is DRM protected.', expected=True)
# Most likely because geo-blocked

View file

@ -74,7 +74,7 @@ def _real_extract(self, url):
})
# IsDrm does not necessarily mean the video is DRM protected (see
# https://github.com/ytdl-org/youtube-dl/issues/13994).
if metadata.get('IsDrm'):
if not self._downloader.params.get('allow_unplayable_formats') and metadata.get('IsDrm'):
self.report_warning('This video is probably DRM protected.', path)
video_id = metadata['IdMedia']
details = metadata['Details']

View file

@ -69,7 +69,7 @@ def make_urls(proto, suffix):
if formats:
break
else:
if info.get('isDrm'):
if not self._downloader.params.get('allow_unplayable_formats') and info.get('isDrm'):
raise ExtractorError(
'Video %s is DRM protected' % video_id, expected=True)
if info.get('geoblocked'):

View file

@ -315,7 +315,7 @@ def add_format(format_id, format_dict, protocol='http'):
# Despite CODECS metadata in m3u8 all video-only formats
# are actually video+audio
for f in m3u8_formats:
if '_drm/index_' in f['url']:
if not self._downloader.params.get('allow_unplayable_formats') and '_drm/index_' in f['url']:
continue
if f.get('acodec') == 'none' and f.get('vcodec') != 'none':
f['acodec'] = None

View file

@ -45,7 +45,7 @@ def _real_extract(self, url):
encryption = self._search_regex(
r'encryption%3D(c(?:enc|bc(?:s-aapl)?))',
m3u8_url, 'encryption', default=None)
if encryption and encryption in ('cenc', 'cbcs-aapl'):
if not self._downloader.params.get('allow_unplayable_formats') and encryption and encryption in ('cenc', 'cbcs-aapl'):
raise ExtractorError('This video is DRM protected.', expected=True)
formats = self._extract_m3u8_formats(

View file

@ -1618,7 +1618,7 @@ def feed_entry(name):
formats.append(f)
if not formats:
if streaming_data.get('licenseInfos'):
if not self._downloader.params.get('allow_unplayable_formats') and streaming_data.get('licenseInfos'):
raise ExtractorError(
'This video is DRM protected.', expected=True)
pemr = try_get(

View file

@ -519,6 +519,16 @@ def _dict_from_multiple_values_options_callback(
'If a merge is required (e.g. bestvideo+bestaudio), '
'output to given container format. One of mkv, mp4, ogg, webm, flv. '
'Ignored if no merge is required'))
video_format.add_option(
'--allow-unplayable-formats',
action='store_true', dest='allow_unplayable_formats', default=False,
help=(
'Allow unplayable formats to be listed and downloaded. '
'All video postprocessing will also be turned off'))
video_format.add_option(
'--no-allow-unplayable-formats',
action='store_false', dest='allow_unplayable_formats',
help='Do not allow unplayable formats to be listed or downloaded (default)')
subtitles = optparse.OptionGroup(parser, 'Subtitle Options')
subtitles.add_option(