Update to ytdl-2021.02.10

Except: [archiveorg] Fix and improve extraction (5fc53690cbe6abb11941a3f4846b566a7472753e)
This commit is contained in:
pukkandan 2021-02-11 02:52:55 +05:30
parent 539d158c50
commit cc2db87805
8 changed files with 345 additions and 237 deletions

View file

@ -1,6 +1,5 @@
# Supported sites # Supported sites
- **1tv**: Первый канал - **1tv**: Первый канал
- **1up.com**
- **20min** - **20min**
- **220.ro** - **220.ro**
- **23video** - **23video**
@ -394,6 +393,8 @@ # Supported sites
- **HungamaSong** - **HungamaSong**
- **Hypem** - **Hypem**
- **ign.com** - **ign.com**
- **IGNArticle**
- **IGNVideo**
- **IHeartRadio** - **IHeartRadio**
- **iheartradio:podcast** - **iheartradio:podcast**
- **imdb**: Internet Movie Database trailers - **imdb**: Internet Movie Database trailers
@ -701,7 +702,6 @@ # Supported sites
- **parliamentlive.tv**: UK parliament videos - **parliamentlive.tv**: UK parliament videos
- **Patreon** - **Patreon**
- **pbs**: Public Broadcasting Service (PBS) and member stations: PBS: Public Broadcasting Service, APT - Alabama Public Television (WBIQ), GPB/Georgia Public Broadcasting (WGTV), Mississippi Public Broadcasting (WMPN), Nashville Public Television (WNPT), WFSU-TV (WFSU), WSRE (WSRE), WTCI (WTCI), WPBA/Channel 30 (WPBA), Alaska Public Media (KAKM), Arizona PBS (KAET), KNME-TV/Channel 5 (KNME), Vegas PBS (KLVX), AETN/ARKANSAS ETV NETWORK (KETS), KET (WKLE), WKNO/Channel 10 (WKNO), LPB/LOUISIANA PUBLIC BROADCASTING (WLPB), OETA (KETA), Ozarks Public Television (KOZK), WSIU Public Broadcasting (WSIU), KEET TV (KEET), KIXE/Channel 9 (KIXE), KPBS San Diego (KPBS), KQED (KQED), KVIE Public Television (KVIE), PBS SoCal/KOCE (KOCE), ValleyPBS (KVPT), CONNECTICUT PUBLIC TELEVISION (WEDH), KNPB Channel 5 (KNPB), SOPTV (KSYS), Rocky Mountain PBS (KRMA), KENW-TV3 (KENW), KUED Channel 7 (KUED), Wyoming PBS (KCWC), Colorado Public Television / KBDI 12 (KBDI), KBYU-TV (KBYU), Thirteen/WNET New York (WNET), WGBH/Channel 2 (WGBH), WGBY (WGBY), NJTV Public Media NJ (WNJT), WLIW21 (WLIW), mpt/Maryland Public Television (WMPB), WETA Television and Radio (WETA), WHYY (WHYY), PBS 39 (WLVT), WVPT - Your Source for PBS and More! (WVPT), Howard University Television (WHUT), WEDU PBS (WEDU), WGCU Public Media (WGCU), WPBT2 (WPBT), WUCF TV (WUCF), WUFT/Channel 5 (WUFT), WXEL/Channel 42 (WXEL), WLRN/Channel 17 (WLRN), WUSF Public Broadcasting (WUSF), ETV (WRLK), UNC-TV (WUNC), PBS Hawaii - Oceanic Cable Channel 10 (KHET), Idaho Public Television (KAID), KSPS (KSPS), OPB (KOPB), KWSU/Channel 10 & KTNW/Channel 31 (KWSU), WILL-TV (WILL), Network Knowledge - WSEC/Springfield (WSEC), WTTW11 (WTTW), Iowa Public Television/IPTV (KDIN), Nine Network (KETC), PBS39 Fort Wayne (WFWA), WFYI Indianapolis (WFYI), Milwaukee Public Television (WMVS), WNIN (WNIN), WNIT Public Television (WNIT), WPT (WPNE), WVUT/Channel 22 (WVUT), WEIU/Channel 51 (WEIU), WQPT-TV (WQPT), WYCC PBS Chicago (WYCC), WIPB-TV (WIPB), WTIU (WTIU), CET (WCET), ThinkTVNetwork (WPTD), WBGU-TV (WBGU), WGVU TV (WGVU), NET1 (KUON), Pioneer Public Television (KWCM), SDPB Television (KUSD), TPT (KTCA), KSMQ (KSMQ), KPTS/Channel 8 (KPTS), KTWU/Channel 11 (KTWU), East Tennessee PBS (WSJK), WCTE-TV (WCTE), WLJT, Channel 11 (WLJT), WOSU TV (WOSU), WOUB/WOUC (WOUB), WVPB (WVPB), WKYU-PBS (WKYU), KERA 13 (KERA), MPBN (WCBB), Mountain Lake PBS (WCFE), NHPTV (WENH), Vermont PBS (WETK), witf (WITF), WQED Multimedia (WQED), WMHT Educational Telecommunications (WMHT), Q-TV (WDCQ), WTVS Detroit Public TV (WTVS), CMU Public Television (WCMU), WKAR-TV (WKAR), WNMU-TV Public TV 13 (WNMU), WDSE - WRPT (WDSE), WGTE TV (WGTE), Lakeland Public Television (KAWE), KMOS-TV - Channels 6.1, 6.2 and 6.3 (KMOS), MontanaPBS (KUSM), KRWG/Channel 22 (KRWG), KACV (KACV), KCOS/Channel 13 (KCOS), WCNY/Channel 24 (WCNY), WNED (WNED), WPBS (WPBS), WSKG Public TV (WSKG), WXXI (WXXI), WPSU (WPSU), WVIA Public Media Studios (WVIA), WTVI (WTVI), Western Reserve PBS (WNEO), WVIZ/PBS ideastream (WVIZ), KCTS 9 (KCTS), Basin PBS (KPBT), KUHT / Channel 8 (KUHT), KLRN (KLRN), KLRU (KLRU), WTJX Channel 12 (WTJX), WCVE PBS (WCVE), KBTC Public Television (KBTC) - **pbs**: Public Broadcasting Service (PBS) and member stations: PBS: Public Broadcasting Service, APT - Alabama Public Television (WBIQ), GPB/Georgia Public Broadcasting (WGTV), Mississippi Public Broadcasting (WMPN), Nashville Public Television (WNPT), WFSU-TV (WFSU), WSRE (WSRE), WTCI (WTCI), WPBA/Channel 30 (WPBA), Alaska Public Media (KAKM), Arizona PBS (KAET), KNME-TV/Channel 5 (KNME), Vegas PBS (KLVX), AETN/ARKANSAS ETV NETWORK (KETS), KET (WKLE), WKNO/Channel 10 (WKNO), LPB/LOUISIANA PUBLIC BROADCASTING (WLPB), OETA (KETA), Ozarks Public Television (KOZK), WSIU Public Broadcasting (WSIU), KEET TV (KEET), KIXE/Channel 9 (KIXE), KPBS San Diego (KPBS), KQED (KQED), KVIE Public Television (KVIE), PBS SoCal/KOCE (KOCE), ValleyPBS (KVPT), CONNECTICUT PUBLIC TELEVISION (WEDH), KNPB Channel 5 (KNPB), SOPTV (KSYS), Rocky Mountain PBS (KRMA), KENW-TV3 (KENW), KUED Channel 7 (KUED), Wyoming PBS (KCWC), Colorado Public Television / KBDI 12 (KBDI), KBYU-TV (KBYU), Thirteen/WNET New York (WNET), WGBH/Channel 2 (WGBH), WGBY (WGBY), NJTV Public Media NJ (WNJT), WLIW21 (WLIW), mpt/Maryland Public Television (WMPB), WETA Television and Radio (WETA), WHYY (WHYY), PBS 39 (WLVT), WVPT - Your Source for PBS and More! (WVPT), Howard University Television (WHUT), WEDU PBS (WEDU), WGCU Public Media (WGCU), WPBT2 (WPBT), WUCF TV (WUCF), WUFT/Channel 5 (WUFT), WXEL/Channel 42 (WXEL), WLRN/Channel 17 (WLRN), WUSF Public Broadcasting (WUSF), ETV (WRLK), UNC-TV (WUNC), PBS Hawaii - Oceanic Cable Channel 10 (KHET), Idaho Public Television (KAID), KSPS (KSPS), OPB (KOPB), KWSU/Channel 10 & KTNW/Channel 31 (KWSU), WILL-TV (WILL), Network Knowledge - WSEC/Springfield (WSEC), WTTW11 (WTTW), Iowa Public Television/IPTV (KDIN), Nine Network (KETC), PBS39 Fort Wayne (WFWA), WFYI Indianapolis (WFYI), Milwaukee Public Television (WMVS), WNIN (WNIN), WNIT Public Television (WNIT), WPT (WPNE), WVUT/Channel 22 (WVUT), WEIU/Channel 51 (WEIU), WQPT-TV (WQPT), WYCC PBS Chicago (WYCC), WIPB-TV (WIPB), WTIU (WTIU), CET (WCET), ThinkTVNetwork (WPTD), WBGU-TV (WBGU), WGVU TV (WGVU), NET1 (KUON), Pioneer Public Television (KWCM), SDPB Television (KUSD), TPT (KTCA), KSMQ (KSMQ), KPTS/Channel 8 (KPTS), KTWU/Channel 11 (KTWU), East Tennessee PBS (WSJK), WCTE-TV (WCTE), WLJT, Channel 11 (WLJT), WOSU TV (WOSU), WOUB/WOUC (WOUB), WVPB (WVPB), WKYU-PBS (WKYU), KERA 13 (KERA), MPBN (WCBB), Mountain Lake PBS (WCFE), NHPTV (WENH), Vermont PBS (WETK), witf (WITF), WQED Multimedia (WQED), WMHT Educational Telecommunications (WMHT), Q-TV (WDCQ), WTVS Detroit Public TV (WTVS), CMU Public Television (WCMU), WKAR-TV (WKAR), WNMU-TV Public TV 13 (WNMU), WDSE - WRPT (WDSE), WGTE TV (WGTE), Lakeland Public Television (KAWE), KMOS-TV - Channels 6.1, 6.2 and 6.3 (KMOS), MontanaPBS (KUSM), KRWG/Channel 22 (KRWG), KACV (KACV), KCOS/Channel 13 (KCOS), WCNY/Channel 24 (WCNY), WNED (WNED), WPBS (WPBS), WSKG Public TV (WSKG), WXXI (WXXI), WPSU (WPSU), WVIA Public Media Studios (WVIA), WTVI (WTVI), Western Reserve PBS (WNEO), WVIZ/PBS ideastream (WVIZ), KCTS 9 (KCTS), Basin PBS (KPBT), KUHT / Channel 8 (KUHT), KLRN (KLRN), KLRU (KLRU), WTJX Channel 12 (WTJX), WCVE PBS (WCVE), KBTC Public Television (KBTC)
- **pcmag**
- **PearVideo** - **PearVideo**
- **PeerTube** - **PeerTube**
- **People** - **People**

View file

@ -19,55 +19,46 @@
_TESTS = [ _TESTS = [
( (
'https://s.ytimg.com/yts/jsbin/html5player-vflHOr_nV.js', 'https://s.ytimg.com/yts/jsbin/html5player-vflHOr_nV.js',
'js',
86, 86,
'>=<;:/.-[+*)(\'&%$#"!ZYX0VUTSRQPONMLKJIHGFEDCBA\\yxwvutsrqponmlkjihgfedcba987654321', '>=<;:/.-[+*)(\'&%$#"!ZYX0VUTSRQPONMLKJIHGFEDCBA\\yxwvutsrqponmlkjihgfedcba987654321',
), ),
( (
'https://s.ytimg.com/yts/jsbin/html5player-vfldJ8xgI.js', 'https://s.ytimg.com/yts/jsbin/html5player-vfldJ8xgI.js',
'js',
85, 85,
'3456789a0cdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS[UVWXYZ!"#$%&\'()*+,-./:;<=>?@', '3456789a0cdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS[UVWXYZ!"#$%&\'()*+,-./:;<=>?@',
), ),
( (
'https://s.ytimg.com/yts/jsbin/html5player-vfle-mVwz.js', 'https://s.ytimg.com/yts/jsbin/html5player-vfle-mVwz.js',
'js',
90, 90,
']\\[@?>=<;:/.-,+*)(\'&%$#"hZYXWVUTSRQPONMLKJIHGFEDCBAzyxwvutsrqponmlkjiagfedcb39876', ']\\[@?>=<;:/.-,+*)(\'&%$#"hZYXWVUTSRQPONMLKJIHGFEDCBAzyxwvutsrqponmlkjiagfedcb39876',
), ),
( (
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl0Cbn9e.js', 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl0Cbn9e.js',
'js',
84, 84,
'O1I3456789abcde0ghijklmnopqrstuvwxyzABCDEFGHfJKLMN2PQRSTUVW@YZ!"#$%&\'()*+,-./:;<=', 'O1I3456789abcde0ghijklmnopqrstuvwxyzABCDEFGHfJKLMN2PQRSTUVW@YZ!"#$%&\'()*+,-./:;<=',
), ),
( (
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js', 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js',
'js',
'2ACFC7A61CA478CD21425E5A57EBD73DDC78E22A.2094302436B2D377D14A3BBA23022D023B8BC25AA', '2ACFC7A61CA478CD21425E5A57EBD73DDC78E22A.2094302436B2D377D14A3BBA23022D023B8BC25AA',
'A52CB8B320D22032ABB3A41D773D2B6342034902.A22E87CDD37DBE75A5E52412DC874AC16A7CFCA2', 'A52CB8B320D22032ABB3A41D773D2B6342034902.A22E87CDD37DBE75A5E52412DC874AC16A7CFCA2',
), ),
( (
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflBb0OQx.js', 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflBb0OQx.js',
'js',
84, 84,
'123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQ0STUVWXYZ!"#$%&\'()*+,@./:;<=>' '123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQ0STUVWXYZ!"#$%&\'()*+,@./:;<=>'
), ),
( (
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl9FYC6l.js', 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl9FYC6l.js',
'js',
83, 83,
'123456789abcdefghijklmnopqr0tuvwxyzABCDETGHIJKLMNOPQRS>UVWXYZ!"#$%&\'()*+,-./:;<=F' '123456789abcdefghijklmnopqr0tuvwxyzABCDETGHIJKLMNOPQRS>UVWXYZ!"#$%&\'()*+,-./:;<=F'
), ),
( (
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflCGk6yw/html5player.js', 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflCGk6yw/html5player.js',
'js',
'4646B5181C6C3020DF1D9C7FCFEA.AD80ABF70C39BD369CCCAE780AFBB98FA6B6CB42766249D9488C288', '4646B5181C6C3020DF1D9C7FCFEA.AD80ABF70C39BD369CCCAE780AFBB98FA6B6CB42766249D9488C288',
'82C8849D94266724DC6B6AF89BBFA087EACCD963.B93C07FBA084ACAEFCF7C9D1FD0203C6C1815B6B' '82C8849D94266724DC6B6AF89BBFA087EACCD963.B93C07FBA084ACAEFCF7C9D1FD0203C6C1815B6B'
), ),
( (
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js', 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js',
'js',
'312AA52209E3623129A412D56A40F11CB0AF14AE.3EE09501CB14E3BCDC3B2AE808BF3F1D14E7FBF12', '312AA52209E3623129A412D56A40F11CB0AF14AE.3EE09501CB14E3BCDC3B2AE808BF3F1D14E7FBF12',
'112AA5220913623229A412D56A40F11CB0AF14AE.3EE0950FCB14EEBCDC3B2AE808BF331D14E7FBF3', '112AA5220913623229A412D56A40F11CB0AF14AE.3EE0950FCB14EEBCDC3B2AE808BF331D14E7FBF3',
) )
@ -78,6 +69,10 @@ class TestPlayerInfo(unittest.TestCase):
def test_youtube_extract_player_info(self): def test_youtube_extract_player_info(self):
PLAYER_URLS = ( PLAYER_URLS = (
('https://www.youtube.com/s/player/64dddad9/player_ias.vflset/en_US/base.js', '64dddad9'), ('https://www.youtube.com/s/player/64dddad9/player_ias.vflset/en_US/base.js', '64dddad9'),
('https://www.youtube.com/s/player/64dddad9/player_ias.vflset/fr_FR/base.js', '64dddad9'),
('https://www.youtube.com/s/player/64dddad9/player-plasma-ias-phone-en_US.vflset/base.js', '64dddad9'),
('https://www.youtube.com/s/player/64dddad9/player-plasma-ias-phone-de_DE.vflset/base.js', '64dddad9'),
('https://www.youtube.com/s/player/64dddad9/player-plasma-ias-tablet-en_US.vflset/base.js', '64dddad9'),
# obsolete # obsolete
('https://www.youtube.com/yts/jsbin/player_ias-vfle4-e03/en_US/base.js', 'vfle4-e03'), ('https://www.youtube.com/yts/jsbin/player_ias-vfle4-e03/en_US/base.js', 'vfle4-e03'),
('https://www.youtube.com/yts/jsbin/player_ias-vfl49f_g4/en_US/base.js', 'vfl49f_g4'), ('https://www.youtube.com/yts/jsbin/player_ias-vfl49f_g4/en_US/base.js', 'vfl49f_g4'),
@ -100,13 +95,13 @@ def setUp(self):
os.mkdir(self.TESTDATA_DIR) os.mkdir(self.TESTDATA_DIR)
def make_tfunc(url, stype, sig_input, expected_sig): def make_tfunc(url, sig_input, expected_sig):
m = re.match(r'.*-([a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.[a-z]+$', url) m = re.match(r'.*-([a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.[a-z]+$', url)
assert m, '%r should follow URL format' % url assert m, '%r should follow URL format' % url
test_id = m.group(1) test_id = m.group(1)
def test_func(self): def test_func(self):
basename = 'player-%s.%s' % (test_id, stype) basename = 'player-%s.js' % test_id
fn = os.path.join(self.TESTDATA_DIR, basename) fn = os.path.join(self.TESTDATA_DIR, basename)
if not os.path.exists(fn): if not os.path.exists(fn):
@ -114,22 +109,16 @@ def test_func(self):
ydl = FakeYDL() ydl = FakeYDL()
ie = YoutubeIE(ydl) ie = YoutubeIE(ydl)
if stype == 'js': with io.open(fn, encoding='utf-8') as testf:
with io.open(fn, encoding='utf-8') as testf: jscode = testf.read()
jscode = testf.read() func = ie._parse_sig_js(jscode)
func = ie._parse_sig_js(jscode)
else:
assert stype == 'swf'
with open(fn, 'rb') as testf:
swfcode = testf.read()
func = ie._parse_sig_swf(swfcode)
src_sig = ( src_sig = (
compat_str(string.printable[:sig_input]) compat_str(string.printable[:sig_input])
if isinstance(sig_input, int) else sig_input) if isinstance(sig_input, int) else sig_input)
got_sig = func(src_sig) got_sig = func(src_sig)
self.assertEqual(got_sig, expected_sig) self.assertEqual(got_sig, expected_sig)
test_func.__name__ = str('test_signature_' + stype + '_' + test_id) test_func.__name__ = str('test_signature_js_' + test_id)
setattr(TestSignature, test_func.__name__, test_func) setattr(TestSignature, test_func.__name__, test_func)

View file

@ -95,6 +95,9 @@ def _real_extract(self, url):
if 'Ten film jest dostępny dla użytkowników premium' in webpage: if 'Ten film jest dostępny dla użytkowników premium' in webpage:
raise ExtractorError('This video is only available for premium users.', expected=True) raise ExtractorError('This video is only available for premium users.', expected=True)
if re.search(r'niedostępn[ey] w(?:&nbsp;|\s+)Twoim kraju\s*<', webpage):
self.raise_geo_restricted()
need_confirm_age = False need_confirm_age = False
if self._html_search_regex(r'(<form[^>]+action="[^"]*/a/validatebirth[^"]*")', if self._html_search_regex(r'(<form[^>]+action="[^"]*/a/validatebirth[^"]*")',
webpage, 'birthday validate form', default=None): webpage, 'birthday validate form', default=None):

View file

@ -502,8 +502,8 @@
from .hypem import HypemIE from .hypem import HypemIE
from .ign import ( from .ign import (
IGNIE, IGNIE,
OneUPIE, IGNVideoIE,
PCMagIE, IGNArticleIE,
) )
from .iheart import ( from .iheart import (
IHeartRadioIE, IHeartRadioIE,

View file

@ -3,230 +3,255 @@
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import (
compat_parse_qs,
compat_urllib_parse_urlparse,
)
from ..utils import ( from ..utils import (
HEADRequest,
determine_ext,
int_or_none, int_or_none,
parse_iso8601, parse_iso8601,
strip_or_none,
try_get,
) )
class IGNIE(InfoExtractor): class IGNBaseIE(InfoExtractor):
def _call_api(self, slug):
return self._download_json(
'http://apis.ign.com/{0}/v3/{0}s/slug/{1}'.format(self._PAGE_TYPE, slug), slug)
class IGNIE(IGNBaseIE):
""" """
Extractor for some of the IGN sites, like www.ign.com, es.ign.com de.ign.com. Extractor for some of the IGN sites, like www.ign.com, es.ign.com de.ign.com.
Some videos of it.ign.com are also supported Some videos of it.ign.com are also supported
""" """
_VALID_URL = r'https?://.+?\.ign\.com/(?:[^/]+/)?(?P<type>videos|show_videos|articles|feature|(?:[^/]+/\d+/video))(/.+)?/(?P<name_or_id>.+)' _VALID_URL = r'https?://(?:.+?\.ign|www\.pcmag)\.com/videos/(?:\d{4}/\d{2}/\d{2}/)?(?P<id>[^/?&#]+)'
IE_NAME = 'ign.com' IE_NAME = 'ign.com'
_PAGE_TYPE = 'video'
_API_URL_TEMPLATE = 'http://apis.ign.com/video/v3/videos/%s' _TESTS = [{
_EMBED_RE = r'<iframe[^>]+?["\']((?:https?:)?//.+?\.ign\.com.+?/embed.+?)["\']' 'url': 'http://www.ign.com/videos/2013/06/05/the-last-of-us-review',
'md5': 'd2e1586d9987d40fad7867bf96a018ea',
_TESTS = [ 'info_dict': {
{ 'id': '8f862beef863986b2785559b9e1aa599',
'url': 'http://www.ign.com/videos/2013/06/05/the-last-of-us-review', 'ext': 'mp4',
'md5': 'febda82c4bafecd2d44b6e1a18a595f8', 'title': 'The Last of Us Review',
'info_dict': { 'description': 'md5:c8946d4260a4d43a00d5ae8ed998870c',
'id': '8f862beef863986b2785559b9e1aa599', 'timestamp': 1370440800,
'ext': 'mp4', 'upload_date': '20130605',
'title': 'The Last of Us Review', 'tags': 'count:9',
'description': 'md5:c8946d4260a4d43a00d5ae8ed998870c', }
'timestamp': 1370440800, }, {
'upload_date': '20130605', 'url': 'http://www.pcmag.com/videos/2015/01/06/010615-whats-new-now-is-gogo-snooping-on-your-data',
'uploader_id': 'cberidon@ign.com', 'md5': 'f1581a6fe8c5121be5b807684aeac3f6',
} 'info_dict': {
}, 'id': 'ee10d774b508c9b8ec07e763b9125b91',
{ 'ext': 'mp4',
'url': 'http://me.ign.com/en/feature/15775/100-little-things-in-gta-5-that-will-blow-your-mind', 'title': 'What\'s New Now: Is GoGo Snooping on Your Data?',
'info_dict': { 'description': 'md5:817a20299de610bd56f13175386da6fa',
'id': '100-little-things-in-gta-5-that-will-blow-your-mind', 'timestamp': 1420571160,
}, 'upload_date': '20150106',
'playlist': [ 'tags': 'count:4',
{ }
'info_dict': { }, {
'id': '5ebbd138523268b93c9141af17bec937', 'url': 'https://www.ign.com/videos/is-a-resident-evil-4-remake-on-the-way-ign-daily-fix',
'ext': 'mp4', 'only_matching': True,
'title': 'GTA 5 Video Review', }]
'description': 'Rockstar drops the mic on this generation of games. Watch our review of the masterly Grand Theft Auto V.',
'timestamp': 1379339880,
'upload_date': '20130916',
'uploader_id': 'danieljkrupa@gmail.com',
},
},
{
'info_dict': {
'id': '638672ee848ae4ff108df2a296418ee2',
'ext': 'mp4',
'title': '26 Twisted Moments from GTA 5 in Slow Motion',
'description': 'The twisted beauty of GTA 5 in stunning slow motion.',
'timestamp': 1386878820,
'upload_date': '20131212',
'uploader_id': 'togilvie@ign.com',
},
},
],
'params': {
'skip_download': True,
},
},
{
'url': 'http://www.ign.com/articles/2014/08/15/rewind-theater-wild-trailer-gamescom-2014?watch',
'md5': '618fedb9c901fd086f6f093564ef8558',
'info_dict': {
'id': '078fdd005f6d3c02f63d795faa1b984f',
'ext': 'mp4',
'title': 'Rewind Theater - Wild Trailer Gamescom 2014',
'description': 'Brian and Jared explore Michel Ancel\'s captivating new preview.',
'timestamp': 1408047180,
'upload_date': '20140814',
'uploader_id': 'jamesduggan1990@gmail.com',
},
},
{
'url': 'http://me.ign.com/en/videos/112203/video/how-hitman-aims-to-be-different-than-every-other-s',
'only_matching': True,
},
{
'url': 'http://me.ign.com/ar/angry-birds-2/106533/video/lrd-ldyy-lwl-lfylm-angry-birds',
'only_matching': True,
},
{
# videoId pattern
'url': 'http://www.ign.com/articles/2017/06/08/new-ducktales-short-donalds-birthday-doesnt-go-as-planned',
'only_matching': True,
},
]
def _find_video_id(self, webpage):
res_id = [
r'"video_id"\s*:\s*"(.*?)"',
r'class="hero-poster[^"]*?"[^>]*id="(.+?)"',
r'data-video-id="(.+?)"',
r'<object id="vid_(.+?)"',
r'<meta name="og:image" content=".*/(.+?)-(.+?)/.+.jpg"',
r'videoId&quot;\s*:\s*&quot;(.+?)&quot;',
r'videoId["\']\s*:\s*["\']([^"\']+?)["\']',
]
return self._search_regex(res_id, webpage, 'video id', default=None)
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) display_id = self._match_id(url)
name_or_id = mobj.group('name_or_id') video = self._call_api(display_id)
page_type = mobj.group('type') video_id = video['videoId']
webpage = self._download_webpage(url, name_or_id) metadata = video['metadata']
if page_type != 'video': title = metadata.get('longTitle') or metadata.get('title') or metadata['name']
multiple_urls = re.findall(
r'<param name="flashvars"[^>]*value="[^"]*?url=(https?://www\.ign\.com/videos/.*?)["&]',
webpage)
if multiple_urls:
entries = [self.url_result(u, ie='IGN') for u in multiple_urls]
return {
'_type': 'playlist',
'id': name_or_id,
'entries': entries,
}
video_id = self._find_video_id(webpage)
if not video_id:
return self.url_result(self._search_regex(
self._EMBED_RE, webpage, 'embed url'))
return self._get_video_info(video_id)
def _get_video_info(self, video_id):
api_data = self._download_json(
self._API_URL_TEMPLATE % video_id, video_id)
formats = [] formats = []
m3u8_url = api_data['refs'].get('m3uUrl') refs = video.get('refs') or {}
m3u8_url = refs.get('m3uUrl')
if m3u8_url: if m3u8_url:
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_url, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False)) m3u8_id='hls', fatal=False))
f4m_url = api_data['refs'].get('f4mUrl')
f4m_url = refs.get('f4mUrl')
if f4m_url: if f4m_url:
formats.extend(self._extract_f4m_formats( formats.extend(self._extract_f4m_formats(
f4m_url, video_id, f4m_id='hds', fatal=False)) f4m_url, video_id, f4m_id='hds', fatal=False))
for asset in api_data['assets']:
for asset in (video.get('assets') or []):
asset_url = asset.get('url')
if not asset_url:
continue
formats.append({ formats.append({
'url': asset['url'], 'url': asset_url,
'tbr': asset.get('actual_bitrate_kbps'), 'tbr': int_or_none(asset.get('bitrate'), 1000),
'fps': asset.get('frame_rate'), 'fps': int_or_none(asset.get('frame_rate')),
'height': int_or_none(asset.get('height')), 'height': int_or_none(asset.get('height')),
'width': int_or_none(asset.get('width')), 'width': int_or_none(asset.get('width')),
}) })
mezzanine_url = try_get(video, lambda x: x['system']['mezzanineUrl'])
if mezzanine_url:
formats.append({
'ext': determine_ext(mezzanine_url, 'mp4'),
'format_id': 'mezzanine',
'preference': 1,
'url': mezzanine_url,
})
self._sort_formats(formats) self._sort_formats(formats)
thumbnails = [{ thumbnails = []
'url': thumbnail['url'] for thumbnail in (video.get('thumbnails') or []):
} for thumbnail in api_data.get('thumbnails', [])] thumbnail_url = thumbnail.get('url')
if not thumbnail_url:
continue
thumbnails.append({
'url': thumbnail_url,
})
metadata = api_data['metadata'] tags = []
for tag in (video.get('tags') or []):
display_name = tag.get('displayName')
if not display_name:
continue
tags.append(display_name)
return { return {
'id': api_data.get('videoId') or video_id, 'id': video_id,
'title': metadata.get('longTitle') or metadata.get('name') or metadata.get['title'], 'title': title,
'description': metadata.get('description'), 'description': strip_or_none(metadata.get('description')),
'timestamp': parse_iso8601(metadata.get('publishDate')), 'timestamp': parse_iso8601(metadata.get('publishDate')),
'duration': int_or_none(metadata.get('duration')), 'duration': int_or_none(metadata.get('duration')),
'display_id': metadata.get('slug') or video_id, 'display_id': display_id,
'uploader_id': metadata.get('creator'),
'thumbnails': thumbnails, 'thumbnails': thumbnails,
'formats': formats, 'formats': formats,
'tags': tags,
} }
class OneUPIE(IGNIE): class IGNVideoIE(InfoExtractor):
_VALID_URL = r'https?://gamevideos\.1up\.com/(?P<type>video)/id/(?P<name_or_id>.+)\.html' _VALID_URL = r'https?://.+?\.ign\.com/(?:[a-z]{2}/)?[^/]+/(?P<id>\d+)/(?:video|trailer)/'
IE_NAME = '1up.com'
_TESTS = [{ _TESTS = [{
'url': 'http://gamevideos.1up.com/video/id/34976.html', 'url': 'http://me.ign.com/en/videos/112203/video/how-hitman-aims-to-be-different-than-every-other-s',
'md5': 'c9cc69e07acb675c31a16719f909e347', 'md5': 'dd9aca7ed2657c4e118d8b261e5e9de1',
'info_dict': { 'info_dict': {
'id': '34976', 'id': 'e9be7ea899a9bbfc0674accc22a36cc8',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Sniper Elite V2 - Trailer', 'title': 'How Hitman Aims to Be Different Than Every Other Stealth Game - NYCC 2015',
'description': 'md5:bf0516c5ee32a3217aa703e9b1bc7826', 'description': 'Taking out assassination targets in Hitman has never been more stylish.',
'timestamp': 1313099220, 'timestamp': 1444665600,
'upload_date': '20110811', 'upload_date': '20151012',
'uploader_id': 'IGN',
} }
}, {
'url': 'http://me.ign.com/ar/angry-birds-2/106533/video/lrd-ldyy-lwl-lfylm-angry-birds',
'only_matching': True,
}, {
# Youtube embed
'url': 'https://me.ign.com/ar/ratchet-clank-rift-apart/144327/trailer/embed',
'only_matching': True,
}, {
# Twitter embed
'url': 'http://adria.ign.com/sherlock-season-4/9687/trailer/embed',
'only_matching': True,
}, {
# Vimeo embed
'url': 'https://kr.ign.com/bic-2018/3307/trailer/embed',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) video_id = self._match_id(url)
result = super(OneUPIE, self)._real_extract(url) req = HEADRequest(url.rsplit('/', 1)[0] + '/embed')
result['id'] = mobj.group('name_or_id') url = self._request_webpage(req, video_id).geturl()
return result ign_url = compat_parse_qs(
compat_urllib_parse_urlparse(url).query).get('url', [None])[0]
if ign_url:
return self.url_result(ign_url, IGNIE.ie_key())
return self.url_result(url)
class PCMagIE(IGNIE): class IGNArticleIE(IGNBaseIE):
_VALID_URL = r'https?://(?:www\.)?pcmag\.com/(?P<type>videos|article2)(/.+)?/(?P<name_or_id>.+)' _VALID_URL = r'https?://.+?\.ign\.com/(?:articles(?:/\d{4}/\d{2}/\d{2})?|(?:[a-z]{2}/)?feature/\d+)/(?P<id>[^/?&#]+)'
IE_NAME = 'pcmag' _PAGE_TYPE = 'article'
_EMBED_RE = r'iframe\.setAttribute\("src",\s*__util.objToUrlString\("http://widgets\.ign\.com/video/embed/content\.html?[^"]*url=([^"]+)["&]'
_TESTS = [{ _TESTS = [{
'url': 'http://www.pcmag.com/videos/2015/01/06/010615-whats-new-now-is-gogo-snooping-on-your-data', 'url': 'http://me.ign.com/en/feature/15775/100-little-things-in-gta-5-that-will-blow-your-mind',
'md5': '212d6154fd0361a2781075f1febbe9ad',
'info_dict': { 'info_dict': {
'id': 'ee10d774b508c9b8ec07e763b9125b91', 'id': '524497489e4e8ff5848ece34',
'ext': 'mp4', 'title': '100 Little Things in GTA 5 That Will Blow Your Mind',
'title': '010615_What\'s New Now: Is GoGo Snooping on Your Data?', },
'description': 'md5:a7071ae64d2f68cc821c729d4ded6bb3', 'playlist': [
'timestamp': 1420571160, {
'upload_date': '20150106', 'info_dict': {
'uploader_id': 'cozzipix@gmail.com', 'id': '5ebbd138523268b93c9141af17bec937',
} 'ext': 'mp4',
'title': 'GTA 5 Video Review',
'description': 'Rockstar drops the mic on this generation of games. Watch our review of the masterly Grand Theft Auto V.',
'timestamp': 1379339880,
'upload_date': '20130916',
},
},
{
'info_dict': {
'id': '638672ee848ae4ff108df2a296418ee2',
'ext': 'mp4',
'title': '26 Twisted Moments from GTA 5 in Slow Motion',
'description': 'The twisted beauty of GTA 5 in stunning slow motion.',
'timestamp': 1386878820,
'upload_date': '20131212',
},
},
],
'params': {
'playlist_items': '2-3',
'skip_download': True,
},
}, { }, {
'url': 'http://www.pcmag.com/article2/0,2817,2470156,00.asp', 'url': 'http://www.ign.com/articles/2014/08/15/rewind-theater-wild-trailer-gamescom-2014?watch',
'md5': '94130c1ca07ba0adb6088350681f16c1',
'info_dict': { 'info_dict': {
'id': '042e560ba94823d43afcb12ddf7142ca', 'id': '53ee806780a81ec46e0790f8',
'ext': 'mp4', 'title': 'Rewind Theater - Wild Trailer Gamescom 2014',
'title': 'HTC\'s Weird New Re Camera - What\'s New Now', },
'description': 'md5:53433c45df96d2ea5d0fda18be2ca908', 'playlist_count': 2,
'timestamp': 1412953920, }, {
'upload_date': '20141010', # videoId pattern
'uploader_id': 'chris_snyder@pcmag.com', 'url': 'http://www.ign.com/articles/2017/06/08/new-ducktales-short-donalds-birthday-doesnt-go-as-planned',
} 'only_matching': True,
}, {
# Youtube embed
'url': 'https://www.ign.com/articles/2021-mvp-named-in-puppy-bowl-xvii',
'only_matching': True,
}, {
# IMDB embed
'url': 'https://www.ign.com/articles/2014/08/07/sons-of-anarchy-final-season-trailer',
'only_matching': True,
}, {
# Facebook embed
'url': 'https://www.ign.com/articles/2017/09/20/marvels-the-punisher-watch-the-new-trailer-for-the-netflix-series',
'only_matching': True,
}, {
# Brightcove embed
'url': 'https://www.ign.com/articles/2016/01/16/supergirl-goes-flying-with-martian-manhunter-in-new-clip',
'only_matching': True,
}] }]
def _real_extract(self, url):
display_id = self._match_id(url)
article = self._call_api(display_id)
def entries():
media_url = try_get(article, lambda x: x['mediaRelations'][0]['media']['metadata']['url'])
if media_url:
yield self.url_result(media_url, IGNIE.ie_key())
for content in (article.get('content') or []):
for video_url in re.findall(r'(?:\[(?:ignvideo\s+url|youtube\s+clip_id)|<iframe[^>]+src)="([^"]+)"', content):
yield self.url_result(video_url)
return self.playlist_result(
entries(), article.get('articleId'),
strip_or_none(try_get(article, lambda x: x['metadata']['headline'])))

View file

@ -42,8 +42,8 @@ def _real_extract(self, url):
url = url.replace('skola.se/Produkter', 'play.se/program') url = url.replace('skola.se/Produkter', 'play.se/program')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
urplayer_data = self._parse_json(self._html_search_regex( urplayer_data = self._parse_json(self._html_search_regex(
r'data-react-class="components/Player/Player"[^>]+data-react-props="({.+?})"', r'data-react-class="routes/Product/components/ProgramContainer/ProgramContainer"[^>]+data-react-props="({.+?})"',
webpage, 'urplayer data'), video_id)['currentProduct'] webpage, 'urplayer data'), video_id)['accessibleEpisodes'][0]
episode = urplayer_data['title'] episode = urplayer_data['title']
host = self._download_json('http://streaming-loadbalancer.ur.se/loadbalancer.json', video_id)['redirect'] host = self._download_json('http://streaming-loadbalancer.ur.se/loadbalancer.json', video_id)['redirect']

View file

@ -11,11 +11,14 @@
dict_get, dict_get,
extract_attributes, extract_attributes,
ExtractorError, ExtractorError,
float_or_none,
int_or_none, int_or_none,
parse_duration, parse_duration,
str_or_none,
try_get, try_get,
unified_strdate, unified_strdate,
url_or_none, url_or_none,
urljoin,
) )
@ -146,36 +149,89 @@ def get_height(s):
video = initials['videoModel'] video = initials['videoModel']
title = video['title'] title = video['title']
formats = [] formats = []
for format_id, formats_dict in video['sources'].items(): format_urls = set()
format_sizes = {}
sources = try_get(video, lambda x: x['sources'], dict) or {}
for format_id, formats_dict in sources.items():
if not isinstance(formats_dict, dict): if not isinstance(formats_dict, dict):
continue continue
download_sources = try_get(sources, lambda x: x['download'], dict) or {}
for quality, format_dict in download_sources.items():
if not isinstance(format_dict, dict):
continue
format_sizes[quality] = float_or_none(format_dict.get('size'))
for quality, format_item in formats_dict.items(): for quality, format_item in formats_dict.items():
if format_id == 'download': if format_id == 'download':
# Download link takes some time to be generated, # Download link takes some time to be generated,
# skipping for now # skipping for now
continue continue
if not isinstance(format_item, dict): format_url = format_item
continue
format_url = format_item.get('link')
filesize = int_or_none(
format_item.get('size'), invscale=1000000)
else:
format_url = format_item
filesize = None
format_url = url_or_none(format_url) format_url = url_or_none(format_url)
if not format_url: if not format_url or format_url in format_urls:
continue continue
format_urls.add(format_url)
formats.append({ formats.append({
'format_id': '%s-%s' % (format_id, quality), 'format_id': '%s-%s' % (format_id, quality),
'url': format_url, 'url': format_url,
'ext': determine_ext(format_url, 'mp4'), 'ext': determine_ext(format_url, 'mp4'),
'height': get_height(quality), 'height': get_height(quality),
'filesize': filesize, 'filesize': format_sizes.get(quality),
'http_headers': { 'http_headers': {
'Referer': urlh.geturl(), 'Referer': urlh.geturl(),
}, },
}) })
self._sort_formats(formats) xplayer_sources = try_get(
initials, lambda x: x['xplayerSettings']['sources'], dict)
if xplayer_sources:
hls_sources = xplayer_sources.get('hls')
if isinstance(hls_sources, dict):
for hls_format_key in ('url', 'fallback'):
hls_url = hls_sources.get(hls_format_key)
if not hls_url:
continue
hls_url = urljoin(url, hls_url)
if not hls_url or hls_url in format_urls:
continue
format_urls.add(hls_url)
formats.extend(self._extract_m3u8_formats(
hls_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
standard_sources = xplayer_sources.get('standard')
if isinstance(standard_sources, dict):
for format_id, formats_list in standard_sources.items():
if not isinstance(formats_list, list):
continue
for standard_format in formats_list:
if not isinstance(standard_format, dict):
continue
for standard_format_key in ('url', 'fallback'):
standard_url = standard_format.get(standard_format_key)
if not standard_url:
continue
standard_url = urljoin(url, standard_url)
if not standard_url or standard_url in format_urls:
continue
format_urls.add(standard_url)
ext = determine_ext(standard_url, 'mp4')
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
standard_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
continue
quality = (str_or_none(standard_format.get('quality'))
or str_or_none(standard_format.get('label'))
or '')
formats.append({
'format_id': '%s-%s' % (format_id, quality),
'url': standard_url,
'ext': ext,
'height': get_height(quality),
'filesize': format_sizes.get(quality),
'http_headers': {
'Referer': standard_url,
},
})
self._sort_formats(formats, field_preference=('height', 'width', 'tbr', 'format_id'))
categories_list = video.get('categories') categories_list = video.get('categories')
if isinstance(categories_list, list): if isinstance(categories_list, list):

View file

@ -32,7 +32,7 @@
mimetype2ext, mimetype2ext,
parse_codecs, parse_codecs,
parse_duration, parse_duration,
# qualities, # qualities, # TODO: Enable this after fixing formatSort
remove_start, remove_start,
smuggle_url, smuggle_url,
str_or_none, str_or_none,
@ -414,7 +414,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
(?(1).+)? # if we found the ID, everything can follow (?(1).+)? # if we found the ID, everything can follow
$""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE} $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
_PLAYER_INFO_RE = ( _PLAYER_INFO_RE = (
r'/(?P<id>[a-zA-Z0-9_-]{8,})/player_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?/base\.js$', r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$', r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
) )
_formats = { _formats = {
@ -621,6 +622,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader': 'AfrojackVEVO', 'uploader': 'AfrojackVEVO',
'uploader_id': 'AfrojackVEVO', 'uploader_id': 'AfrojackVEVO',
'upload_date': '20131011', 'upload_date': '20131011',
'abr': 129.495,
}, },
'params': { 'params': {
'youtube_include_dash_manifest': True, 'youtube_include_dash_manifest': True,
@ -1134,10 +1136,26 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'only_matching': True, 'only_matching': True,
}, },
{ {
# Age-gated video only available with authentication (unavailable # https://github.com/ytdl-org/youtube-dl/pull/28094
# via embed page workaround) 'url': 'OtqTfy26tG0',
'url': 'XgnwCQzjau8', 'info_dict': {
'only_matching': True, 'id': 'OtqTfy26tG0',
'ext': 'mp4',
'title': 'Burn Out',
'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
'upload_date': '20141120',
'uploader': 'The Cinematic Orchestra - Topic',
'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
'artist': 'The Cinematic Orchestra',
'track': 'Burn Out',
'album': 'Every Day',
'release_data': None,
'release_year': None,
},
'params': {
'skip_download': True,
},
}, },
] ]
@ -1230,6 +1248,9 @@ def _parse_sig_js(self, jscode):
funcname = self._search_regex( funcname = self._search_regex(
(r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(', (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(', r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)', r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)', r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
# Obsolete patterns # Obsolete patterns
@ -1493,7 +1514,9 @@ def feed_entry(name):
formats = [] formats = []
itags = [] itags = []
itag_qualities = {}
player_url = None player_url = None
# TODO: Enable this after fixing formatSort
# q = qualities(['tiny', 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres']) # q = qualities(['tiny', 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'])
streaming_data = player_response.get('streamingData') or {} streaming_data = player_response.get('streamingData') or {}
streaming_formats = streaming_data.get('formats') or [] streaming_formats = streaming_data.get('formats') or []
@ -1502,6 +1525,16 @@ def feed_entry(name):
if fmt.get('targetDurationSec') or fmt.get('drmFamilies'): if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
continue continue
itag = str_or_none(fmt.get('itag'))
quality = fmt.get('quality')
if itag and quality:
itag_qualities[itag] = quality
# FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
# (adding `&sq=0` to the URL) and parsing emsg box to determine the
# number of fragment that would subsequently requested with (`&sq=N`)
if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
continue
fmt_url = fmt.get('url') fmt_url = fmt.get('url')
if not fmt_url: if not fmt_url:
sc = compat_parse_qs(fmt.get('signatureCipher')) sc = compat_parse_qs(fmt.get('signatureCipher'))
@ -1521,10 +1554,10 @@ def feed_entry(name):
sp = try_get(sc, lambda x: x['sp'][0]) or 'signature' sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
fmt_url += '&' + sp + '=' + signature fmt_url += '&' + sp + '=' + signature
itag = str_or_none(fmt.get('itag'))
if itag: if itag:
itags.append(itag) itags.append(itag)
quality = fmt.get('quality') tbr = float_or_none(
fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
dct = { dct = {
'asr': int_or_none(fmt.get('audioSampleRate')), 'asr': int_or_none(fmt.get('audioSampleRate')),
'filesize': int_or_none(fmt.get('contentLength')), 'filesize': int_or_none(fmt.get('contentLength')),
@ -1532,9 +1565,8 @@ def feed_entry(name):
'format_note': fmt.get('qualityLabel') or quality, 'format_note': fmt.get('qualityLabel') or quality,
'fps': int_or_none(fmt.get('fps')), 'fps': int_or_none(fmt.get('fps')),
'height': int_or_none(fmt.get('height')), 'height': int_or_none(fmt.get('height')),
# 'quality': q(quality), # This does not correctly reflect the overall quality of the format # 'quality': q(quality), # TODO: Enable this after fixing formatSort
'tbr': float_or_none(fmt.get( 'tbr': tbr,
'averageBitrate') or fmt.get('bitrate'), 1000),
'url': fmt_url, 'url': fmt_url,
'width': fmt.get('width'), 'width': fmt.get('width'),
} }
@ -1545,7 +1577,13 @@ def feed_entry(name):
if mobj: if mobj:
dct['ext'] = mimetype2ext(mobj.group(1)) dct['ext'] = mimetype2ext(mobj.group(1))
dct.update(parse_codecs(mobj.group(2))) dct.update(parse_codecs(mobj.group(2)))
if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none': no_audio = dct.get('acodec') == 'none'
no_video = dct.get('vcodec') == 'none'
if no_audio:
dct['vbr'] = tbr
if no_video:
dct['abr'] = tbr
if no_audio or no_video:
dct['downloader_options'] = { dct['downloader_options'] = {
# Youtube throttles chunks >~10M # Youtube throttles chunks >~10M
'http_chunk_size': 10485760, 'http_chunk_size': 10485760,
@ -1565,22 +1603,19 @@ def feed_entry(name):
if self._downloader.params.get('youtube_include_dash_manifest'): if self._downloader.params.get('youtube_include_dash_manifest'):
dash_manifest_url = streaming_data.get('dashManifestUrl') dash_manifest_url = streaming_data.get('dashManifestUrl')
if dash_manifest_url: if dash_manifest_url:
dash_formats = []
for f in self._extract_mpd_formats( for f in self._extract_mpd_formats(
dash_manifest_url, video_id, fatal=False): dash_manifest_url, video_id, fatal=False):
itag = f['format_id']
if itag in itags:
continue
# if itag in itag_qualities: # TODO: Enable this after fixing formatSort
# f['quality'] = q(itag_qualities[itag])
filesize = int_or_none(self._search_regex( filesize = int_or_none(self._search_regex(
r'/clen/(\d+)', f.get('fragment_base_url') r'/clen/(\d+)', f.get('fragment_base_url')
or f['url'], 'file size', default=None)) or f['url'], 'file size', default=None))
if filesize: if filesize:
f['filesize'] = filesize f['filesize'] = filesize
dash_formats.append(f) formats.append(f)
# Until further investigation prefer DASH formats as non-DASH
# may not be available (see [1])
# 1. https://github.com/ytdl-org/youtube-dl/issues/28070
if dash_formats:
dash_formats_keys = [f['format_id'] for f in dash_formats]
formats = [f for f in formats if f['format_id'] not in dash_formats_keys]
formats.extend(dash_formats)
if not formats: if not formats:
if streaming_data.get('licenseInfos'): if streaming_data.get('licenseInfos'):
@ -1747,7 +1782,7 @@ def process_language(container, base_url, lang_code, query):
'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')), 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
'track': mobj.group('track').strip(), 'track': mobj.group('track').strip(),
'release_date': release_date, 'release_date': release_date,
'release_year': int(release_year), 'release_year': int_or_none(release_year),
}) })
initial_data = None initial_data = None
@ -2597,9 +2632,9 @@ def _extract_continuation(cls, renderer):
next_continuation = cls._extract_next_continuation_data(renderer) next_continuation = cls._extract_next_continuation_data(renderer)
if next_continuation: if next_continuation:
return next_continuation return next_continuation
contents = renderer.get('contents') or renderer.get('items') contents = []
if not isinstance(contents, list): for key in ('contents', 'items'):
return contents.extend(try_get(renderer, lambda x: x[key], list) or [])
for content in contents: for content in contents:
if not isinstance(content, dict): if not isinstance(content, dict):
continue continue