From 9b12e9a573c177a7f2e91a383822ad4aa17a99b1 Mon Sep 17 00:00:00 2001 From: nixxo Date: Tue, 9 Nov 2021 22:07:52 +0100 Subject: [PATCH] [la7] Fix extractor (#1575) Closes #1065 Authored by: nixxo --- yt_dlp/extractor/la7.py | 54 +++++++++++++++++++++++++++++++---------- 1 file changed, 41 insertions(+), 13 deletions(-) diff --git a/yt_dlp/extractor/la7.py b/yt_dlp/extractor/la7.py index 363fbd6a5..de985e450 100644 --- a/yt_dlp/extractor/la7.py +++ b/yt_dlp/extractor/la7.py @@ -7,8 +7,9 @@ from ..utils import ( determine_ext, float_or_none, + HEADRequest, + int_or_none, parse_duration, - smuggle_url, unified_strdate, ) @@ -25,19 +26,38 @@ class LA7IE(InfoExtractor): 'url': 'http://www.la7.it/crozza/video/inccool8-02-10-2015-163722', 'md5': '8b613ffc0c4bf9b9e377169fc19c214c', 'info_dict': { - 'id': '0_42j6wd36', + 'id': 'inccool8-02-10-2015-163722', 'ext': 'mp4', 'title': 'Inc.Cool8', 'description': 'Benvenuti nell\'incredibile mondo della INC. COOL. 8. dove “INC.” sta per “Incorporated” “COOL” sta per “fashion” ed Eight sta per il gesto atletico', 'thumbnail': 're:^https?://.*', - 'uploader_id': 'kdla7pillole@iltrovatore.it', - 'timestamp': 1443814869, 'upload_date': '20151002', }, }, { 'url': 'http://www.la7.it/omnibus/rivedila7/omnibus-news-02-07-2016-189077', 'only_matching': True, }] + _HOST = 'https://awsvodpkg.iltrovatore.it' + + def _generate_mp4_url(self, quality, m3u8_formats): + for f in m3u8_formats: + if f['vcodec'] != 'none' and quality in f['url']: + http_url = '%s%s.mp4' % (self._HOST, quality) + + urlh = self._request_webpage( + HEADRequest(http_url), quality, + note='Check filesize', fatal=False) + if urlh: + http_f = f.copy() + del http_f['manifest_url'] + http_f.update({ + 'format_id': http_f['format_id'].replace('hls-', 'https-'), + 'url': http_url, + 'protocol': 'https', + 'filesize_approx': int_or_none(urlh.headers.get('Content-Length', None)), + }) + return http_f + return None def _real_extract(self, url): video_id = self._match_id(url) @@ -46,22 +66,30 @@ def _real_extract(self, url): url = '%s//%s' % (self.http_scheme(), url) webpage = self._download_webpage(url, video_id) + video_path = self._search_regex(r'(/content/.*?).mp4', webpage, 'video_path') - player_data = self._search_regex( - [r'(?s)videoParams\s*=\s*({.+?});', r'videoLa7\(({[^;]+})\);'], - webpage, 'player data') - vid = self._search_regex(r'vid\s*:\s*"(.+?)",', player_data, 'vid') + formats = self._extract_mpd_formats( + f'{self._HOST}/local/dash/,{video_path}.mp4.urlset/manifest.mpd', + video_id, mpd_id='dash', fatal=False) + m3u8_formats = self._extract_m3u8_formats( + f'{self._HOST}/local/hls/,{video_path}.mp4.urlset/master.m3u8', + video_id, 'mp4', m3u8_id='hls', fatal=False) + formats.extend(m3u8_formats) + + for q in filter(None, video_path.split(',')): + http_f = self._generate_mp4_url(q, m3u8_formats) + if http_f: + formats.append(http_f) + + self._sort_formats(formats) return { - '_type': 'url_transparent', - 'url': smuggle_url('kaltura:103:%s' % vid, { - 'service_url': 'http://nkdam.iltrovatore.it', - }), 'id': video_id, 'title': self._og_search_title(webpage, default=None), 'description': self._og_search_description(webpage, default=None), 'thumbnail': self._og_search_thumbnail(webpage, default=None), - 'ie_key': 'Kaltura', + 'formats': formats, + 'upload_date': unified_strdate(self._search_regex(r'datetime="(.+?)"', webpage, 'upload_date', fatal=False)) }