mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-10 01:02:13 +01:00
Update to ytdl-commit-2dd6c6e
[YouTube] Avoid crash if uploader_id extraction fails
2dd6c6edd8
Except:
* 295736c9cba714fb5de7d1c3dd31d86e50091cf8 [jsinterp] Improve parsing
* 384f632e8a9b61e864a26678d85b2b39933b9bae [ITV] Overhaul ITV extractor
* 33db85c571304bbd6863e3407ad8d08764c9e53b [feat]: Add support to external downloader aria2p
This commit is contained in:
parent
a538772969
commit
45b2ee6f4f
19 changed files with 911 additions and 210 deletions
|
@ -76,7 +76,7 @@
|
||||||
|
|
||||||
# NEW FEATURES
|
# NEW FEATURES
|
||||||
|
|
||||||
* Merged with **youtube-dl v2021.12.17+ [commit/195f22f](https://github.com/ytdl-org/youtube-dl/commit/195f22f)** <!--([exceptions](https://github.com/yt-dlp/yt-dlp/issues/21))--> and **youtube-dlc v2020.11.11-3+ [commit/f9401f2](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee)**: You get all the features and patches of [youtube-dlc](https://github.com/blackjack4494/yt-dlc) in addition to the latest [youtube-dl](https://github.com/ytdl-org/youtube-dl)
|
* Merged with **youtube-dl v2021.12.17+ [commit/2dd6c6e](https://github.com/ytdl-org/youtube-dl/commit/2dd6c6e)** ([exceptions](https://github.com/yt-dlp/yt-dlp/issues/21)) and **youtube-dlc v2020.11.11-3+ [commit/f9401f2](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee)**: You get all the features and patches of [youtube-dlc](https://github.com/blackjack4494/yt-dlc) in addition to the latest [youtube-dl](https://github.com/ytdl-org/youtube-dl)
|
||||||
|
|
||||||
* **[SponsorBlock Integration](#sponsorblock-options)**: You can mark/remove sponsor sections in YouTube videos by utilizing the [SponsorBlock](https://sponsor.ajay.app) API
|
* **[SponsorBlock Integration](#sponsorblock-options)**: You can mark/remove sponsor sections in YouTube videos by utilizing the [SponsorBlock](https://sponsor.ajay.app) API
|
||||||
|
|
||||||
|
|
|
@ -69,6 +69,7 @@ def test_opengraph(self):
|
||||||
<meta name="og:test1" content='foo > < bar'/>
|
<meta name="og:test1" content='foo > < bar'/>
|
||||||
<meta name="og:test2" content="foo >//< bar"/>
|
<meta name="og:test2" content="foo >//< bar"/>
|
||||||
<meta property=og-test3 content='Ill-formatted opengraph'/>
|
<meta property=og-test3 content='Ill-formatted opengraph'/>
|
||||||
|
<meta property=og:test4 content=unquoted-value/>
|
||||||
'''
|
'''
|
||||||
self.assertEqual(ie._og_search_title(html), 'Foo')
|
self.assertEqual(ie._og_search_title(html), 'Foo')
|
||||||
self.assertEqual(ie._og_search_description(html), 'Some video\'s description ')
|
self.assertEqual(ie._og_search_description(html), 'Some video\'s description ')
|
||||||
|
@ -81,6 +82,7 @@ def test_opengraph(self):
|
||||||
self.assertEqual(ie._og_search_property(('test0', 'test1'), html), 'foo > < bar')
|
self.assertEqual(ie._og_search_property(('test0', 'test1'), html), 'foo > < bar')
|
||||||
self.assertRaises(RegexNotFoundError, ie._og_search_property, 'test0', html, None, fatal=True)
|
self.assertRaises(RegexNotFoundError, ie._og_search_property, 'test0', html, None, fatal=True)
|
||||||
self.assertRaises(RegexNotFoundError, ie._og_search_property, ('test0', 'test00'), html, None, fatal=True)
|
self.assertRaises(RegexNotFoundError, ie._og_search_property, ('test0', 'test00'), html, None, fatal=True)
|
||||||
|
self.assertEqual(ie._og_search_property('test4', html), 'unquoted-value')
|
||||||
|
|
||||||
def test_html_search_meta(self):
|
def test_html_search_meta(self):
|
||||||
ie = self.ie
|
ie = self.ie
|
||||||
|
|
|
@ -10,6 +10,7 @@
|
||||||
|
|
||||||
from test.helper import is_download_test, try_rm
|
from test.helper import is_download_test, try_rm
|
||||||
from yt_dlp import YoutubeDL
|
from yt_dlp import YoutubeDL
|
||||||
|
from yt_dlp.utils import DownloadError
|
||||||
|
|
||||||
|
|
||||||
def _download_restricted(url, filename, age):
|
def _download_restricted(url, filename, age):
|
||||||
|
@ -25,10 +26,14 @@ def _download_restricted(url, filename, age):
|
||||||
ydl.add_default_info_extractors()
|
ydl.add_default_info_extractors()
|
||||||
json_filename = os.path.splitext(filename)[0] + '.info.json'
|
json_filename = os.path.splitext(filename)[0] + '.info.json'
|
||||||
try_rm(json_filename)
|
try_rm(json_filename)
|
||||||
|
try:
|
||||||
ydl.download([url])
|
ydl.download([url])
|
||||||
res = os.path.exists(json_filename)
|
except DownloadError:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
return os.path.exists(json_filename)
|
||||||
|
finally:
|
||||||
try_rm(json_filename)
|
try_rm(json_filename)
|
||||||
return res
|
|
||||||
|
|
||||||
|
|
||||||
@is_download_test
|
@is_download_test
|
||||||
|
@ -38,12 +43,12 @@ def _assert_restricted(self, url, filename, age, old_age=None):
|
||||||
self.assertFalse(_download_restricted(url, filename, age))
|
self.assertFalse(_download_restricted(url, filename, age))
|
||||||
|
|
||||||
def test_youtube(self):
|
def test_youtube(self):
|
||||||
self._assert_restricted('07FYdnEawAQ', '07FYdnEawAQ.mp4', 10)
|
self._assert_restricted('HtVdAasjOgU', 'HtVdAasjOgU.mp4', 10)
|
||||||
|
|
||||||
def test_youporn(self):
|
def test_youporn(self):
|
||||||
self._assert_restricted(
|
self._assert_restricted(
|
||||||
'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
|
'https://www.youporn.com/watch/16715086/sex-ed-in-detention-18-asmr/',
|
||||||
'505835.mp4', 2, old_age=25)
|
'16715086.mp4', 2, old_age=25)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
""" Do not use! """
|
""" Do not use! """
|
||||||
|
|
||||||
|
import base64
|
||||||
import collections
|
import collections
|
||||||
import ctypes
|
import ctypes
|
||||||
import getpass
|
import getpass
|
||||||
|
@ -29,6 +30,7 @@
|
||||||
from re import Pattern as compat_Pattern # noqa: F401
|
from re import Pattern as compat_Pattern # noqa: F401
|
||||||
from re import match as compat_Match # noqa: F401
|
from re import match as compat_Match # noqa: F401
|
||||||
|
|
||||||
|
from . import compat_expanduser, compat_HTMLParseError, compat_realpath
|
||||||
from .compat_utils import passthrough_module
|
from .compat_utils import passthrough_module
|
||||||
from ..dependencies import Cryptodome_AES as compat_pycrypto_AES # noqa: F401
|
from ..dependencies import Cryptodome_AES as compat_pycrypto_AES # noqa: F401
|
||||||
from ..dependencies import brotli as compat_brotli # noqa: F401
|
from ..dependencies import brotli as compat_brotli # noqa: F401
|
||||||
|
@ -47,23 +49,25 @@ def compat_setenv(key, value, env=os.environ):
|
||||||
env[key] = value
|
env[key] = value
|
||||||
|
|
||||||
|
|
||||||
|
compat_base64_b64decode = base64.b64decode
|
||||||
compat_basestring = str
|
compat_basestring = str
|
||||||
compat_casefold = str.casefold
|
compat_casefold = str.casefold
|
||||||
compat_chr = chr
|
compat_chr = chr
|
||||||
compat_collections_abc = collections.abc
|
compat_collections_abc = collections.abc
|
||||||
compat_cookiejar = http.cookiejar
|
compat_cookiejar = compat_http_cookiejar = http.cookiejar
|
||||||
compat_cookiejar_Cookie = http.cookiejar.Cookie
|
compat_cookiejar_Cookie = compat_http_cookiejar_Cookie = http.cookiejar.Cookie
|
||||||
compat_cookies = http.cookies
|
compat_cookies = compat_http_cookies = http.cookies
|
||||||
compat_cookies_SimpleCookie = http.cookies.SimpleCookie
|
compat_cookies_SimpleCookie = compat_http_cookies_SimpleCookie = http.cookies.SimpleCookie
|
||||||
compat_etree_Element = etree.Element
|
compat_etree_Element = compat_xml_etree_ElementTree_Element = etree.Element
|
||||||
compat_etree_register_namespace = etree.register_namespace
|
compat_etree_register_namespace = compat_xml_etree_register_namespace = etree.register_namespace
|
||||||
compat_filter = filter
|
compat_filter = filter
|
||||||
compat_get_terminal_size = shutil.get_terminal_size
|
compat_get_terminal_size = shutil.get_terminal_size
|
||||||
compat_getenv = os.getenv
|
compat_getenv = os.getenv
|
||||||
compat_getpass = getpass.getpass
|
compat_getpass = compat_getpass_getpass = getpass.getpass
|
||||||
compat_html_entities = html.entities
|
compat_html_entities = html.entities
|
||||||
compat_html_entities_html5 = html.entities.html5
|
compat_html_entities_html5 = html.entities.html5
|
||||||
compat_HTMLParser = html.parser.HTMLParser
|
compat_html_parser_HTMLParseError = compat_HTMLParseError
|
||||||
|
compat_HTMLParser = compat_html_parser_HTMLParser = html.parser.HTMLParser
|
||||||
compat_http_client = http.client
|
compat_http_client = http.client
|
||||||
compat_http_server = http.server
|
compat_http_server = http.server
|
||||||
compat_input = input
|
compat_input = input
|
||||||
|
@ -72,6 +76,8 @@ def compat_setenv(key, value, env=os.environ):
|
||||||
compat_kwargs = lambda kwargs: kwargs
|
compat_kwargs = lambda kwargs: kwargs
|
||||||
compat_map = map
|
compat_map = map
|
||||||
compat_numeric_types = (int, float, complex)
|
compat_numeric_types = (int, float, complex)
|
||||||
|
compat_os_path_expanduser = compat_expanduser
|
||||||
|
compat_os_path_realpath = compat_realpath
|
||||||
compat_print = print
|
compat_print = print
|
||||||
compat_shlex_split = shlex.split
|
compat_shlex_split = shlex.split
|
||||||
compat_socket_create_connection = socket.create_connection
|
compat_socket_create_connection = socket.create_connection
|
||||||
|
@ -81,7 +87,9 @@ def compat_setenv(key, value, env=os.environ):
|
||||||
compat_subprocess_get_DEVNULL = lambda: DEVNULL
|
compat_subprocess_get_DEVNULL = lambda: DEVNULL
|
||||||
compat_tokenize_tokenize = tokenize.tokenize
|
compat_tokenize_tokenize = tokenize.tokenize
|
||||||
compat_urllib_error = urllib.error
|
compat_urllib_error = urllib.error
|
||||||
|
compat_urllib_HTTPError = urllib.error.HTTPError
|
||||||
compat_urllib_parse = urllib.parse
|
compat_urllib_parse = urllib.parse
|
||||||
|
compat_urllib_parse_parse_qs = urllib.parse.parse_qs
|
||||||
compat_urllib_parse_quote = urllib.parse.quote
|
compat_urllib_parse_quote = urllib.parse.quote
|
||||||
compat_urllib_parse_quote_plus = urllib.parse.quote_plus
|
compat_urllib_parse_quote_plus = urllib.parse.quote_plus
|
||||||
compat_urllib_parse_unquote_plus = urllib.parse.unquote_plus
|
compat_urllib_parse_unquote_plus = urllib.parse.unquote_plus
|
||||||
|
@ -90,8 +98,10 @@ def compat_setenv(key, value, env=os.environ):
|
||||||
compat_urllib_request = urllib.request
|
compat_urllib_request = urllib.request
|
||||||
compat_urllib_request_DataHandler = urllib.request.DataHandler
|
compat_urllib_request_DataHandler = urllib.request.DataHandler
|
||||||
compat_urllib_response = urllib.response
|
compat_urllib_response = urllib.response
|
||||||
compat_urlretrieve = urllib.request.urlretrieve
|
compat_urlretrieve = compat_urllib_request_urlretrieve = urllib.request.urlretrieve
|
||||||
compat_xml_parse_error = etree.ParseError
|
compat_xml_parse_error = compat_xml_etree_ElementTree_ParseError = etree.ParseError
|
||||||
compat_xpath = lambda xpath: xpath
|
compat_xpath = lambda xpath: xpath
|
||||||
compat_zip = zip
|
compat_zip = zip
|
||||||
workaround_optparse_bug9161 = lambda: None
|
workaround_optparse_bug9161 = lambda: None
|
||||||
|
|
||||||
|
legacy = []
|
||||||
|
|
|
@ -239,6 +239,7 @@
|
||||||
BleacherReportIE,
|
BleacherReportIE,
|
||||||
BleacherReportCMSIE,
|
BleacherReportCMSIE,
|
||||||
)
|
)
|
||||||
|
from .blerp import BlerpIE
|
||||||
from .blogger import BloggerIE
|
from .blogger import BloggerIE
|
||||||
from .bloomberg import BloombergIE
|
from .bloomberg import BloombergIE
|
||||||
from .bokecc import BokeCCIE
|
from .bokecc import BokeCCIE
|
||||||
|
@ -861,6 +862,7 @@
|
||||||
from .kickstarter import KickStarterIE
|
from .kickstarter import KickStarterIE
|
||||||
from .kinja import KinjaEmbedIE
|
from .kinja import KinjaEmbedIE
|
||||||
from .kinopoisk import KinoPoiskIE
|
from .kinopoisk import KinoPoiskIE
|
||||||
|
from .kommunetv import KommunetvIE
|
||||||
from .kompas import KompasVideoIE
|
from .kompas import KompasVideoIE
|
||||||
from .konserthusetplay import KonserthusetPlayIE
|
from .konserthusetplay import KonserthusetPlayIE
|
||||||
from .koo import KooIE
|
from .koo import KooIE
|
||||||
|
@ -1460,6 +1462,7 @@
|
||||||
PuhuTVIE,
|
PuhuTVIE,
|
||||||
PuhuTVSerieIE,
|
PuhuTVSerieIE,
|
||||||
)
|
)
|
||||||
|
from .pr0gramm import Pr0grammStaticIE, Pr0grammIE
|
||||||
from .prankcast import PrankCastIE
|
from .prankcast import PrankCastIE
|
||||||
from .premiershiprugby import PremiershipRugbyIE
|
from .premiershiprugby import PremiershipRugbyIE
|
||||||
from .presstv import PressTVIE
|
from .presstv import PressTVIE
|
||||||
|
@ -1521,6 +1524,10 @@
|
||||||
RayWenderlichCourseIE,
|
RayWenderlichCourseIE,
|
||||||
)
|
)
|
||||||
from .rbmaradio import RBMARadioIE
|
from .rbmaradio import RBMARadioIE
|
||||||
|
from .rbgtum import (
|
||||||
|
RbgTumIE,
|
||||||
|
RbgTumCourseIE,
|
||||||
|
)
|
||||||
from .rcs import (
|
from .rcs import (
|
||||||
RCSIE,
|
RCSIE,
|
||||||
RCSEmbedsIE,
|
RCSEmbedsIE,
|
||||||
|
|
|
@ -11,7 +11,7 @@
|
||||||
|
|
||||||
|
|
||||||
class AmericasTestKitchenIE(InfoExtractor):
|
class AmericasTestKitchenIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?americastestkitchen\.com/(?:cooks(?:country|illustrated)/)?(?P<resource_type>episode|videos)/(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?(?:americastestkitchen|cooks(?:country|illustrated))\.com/(?:cooks(?:country|illustrated)/)?(?P<resource_type>episode|videos)/(?P<id>\d+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.americastestkitchen.com/episode/582-weeknight-japanese-suppers',
|
'url': 'https://www.americastestkitchen.com/episode/582-weeknight-japanese-suppers',
|
||||||
'md5': 'b861c3e365ac38ad319cfd509c30577f',
|
'md5': 'b861c3e365ac38ad319cfd509c30577f',
|
||||||
|
@ -72,6 +72,12 @@ class AmericasTestKitchenIE(InfoExtractor):
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.americastestkitchen.com/cooksillustrated/videos/4478-beef-wellington',
|
'url': 'https://www.americastestkitchen.com/cooksillustrated/videos/4478-beef-wellington',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.cookscountry.com/episode/564-when-only-chocolate-will-do',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.cooksillustrated.com/videos/4478-beef-wellington',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -100,7 +106,7 @@ def _real_extract(self, url):
|
||||||
|
|
||||||
|
|
||||||
class AmericasTestKitchenSeasonIE(InfoExtractor):
|
class AmericasTestKitchenSeasonIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?americastestkitchen\.com(?P<show>/cookscountry)?/episodes/browse/season_(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?(?P<show>americastestkitchen|(?P<cooks>cooks(?:country|illustrated)))\.com(?:(?:/(?P<show2>cooks(?:country|illustrated)))?(?:/?$|(?<!ated)(?<!ated\.com)/episodes/browse/season_(?P<season>\d+)))'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# ATK Season
|
# ATK Season
|
||||||
'url': 'https://www.americastestkitchen.com/episodes/browse/season_1',
|
'url': 'https://www.americastestkitchen.com/episodes/browse/season_1',
|
||||||
|
@ -117,29 +123,73 @@ class AmericasTestKitchenSeasonIE(InfoExtractor):
|
||||||
'title': 'Season 12',
|
'title': 'Season 12',
|
||||||
},
|
},
|
||||||
'playlist_count': 13,
|
'playlist_count': 13,
|
||||||
|
}, {
|
||||||
|
# America's Test Kitchen Series
|
||||||
|
'url': 'https://www.americastestkitchen.com/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'americastestkitchen',
|
||||||
|
'title': 'America\'s Test Kitchen',
|
||||||
|
},
|
||||||
|
'playlist_count': 558,
|
||||||
|
}, {
|
||||||
|
# Cooks Country Series
|
||||||
|
'url': 'https://www.americastestkitchen.com/cookscountry',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'cookscountry',
|
||||||
|
'title': 'Cook\'s Country',
|
||||||
|
},
|
||||||
|
'playlist_count': 199,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.americastestkitchen.com/cookscountry/',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.cookscountry.com/episodes/browse/season_12',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.cookscountry.com',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.americastestkitchen.com/cooksillustrated/',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.cooksillustrated.com',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
show_path, season_number = self._match_valid_url(url).group('show', 'id')
|
season_number, show1, show = self._match_valid_url(url).group('season', 'show', 'show2')
|
||||||
season_number = int(season_number)
|
show_path = ('/' + show) if show else ''
|
||||||
|
show = show or show1
|
||||||
|
season_number = int_or_none(season_number)
|
||||||
|
|
||||||
slug = 'cco' if show_path == '/cookscountry' else 'atk'
|
slug, title = {
|
||||||
|
'americastestkitchen': ('atk', 'America\'s Test Kitchen'),
|
||||||
|
'cookscountry': ('cco', 'Cook\'s Country'),
|
||||||
|
'cooksillustrated': ('cio', 'Cook\'s Illustrated'),
|
||||||
|
}[show]
|
||||||
|
|
||||||
season = 'Season %d' % season_number
|
facet_filters = [
|
||||||
|
'search_document_klass:episode',
|
||||||
|
'search_show_slug:' + slug,
|
||||||
|
]
|
||||||
|
|
||||||
|
if season_number:
|
||||||
|
playlist_id = 'season_%d' % season_number
|
||||||
|
playlist_title = 'Season %d' % season_number
|
||||||
|
facet_filters.append('search_season_list:' + playlist_title)
|
||||||
|
else:
|
||||||
|
playlist_id = show
|
||||||
|
playlist_title = title
|
||||||
|
|
||||||
season_search = self._download_json(
|
season_search = self._download_json(
|
||||||
'https://y1fnzxui30-dsn.algolia.net/1/indexes/everest_search_%s_season_desc_production' % slug,
|
'https://y1fnzxui30-dsn.algolia.net/1/indexes/everest_search_%s_season_desc_production' % slug,
|
||||||
season, headers={
|
playlist_id, headers={
|
||||||
'Origin': 'https://www.americastestkitchen.com',
|
'Origin': 'https://www.americastestkitchen.com',
|
||||||
'X-Algolia-API-Key': '8d504d0099ed27c1b73708d22871d805',
|
'X-Algolia-API-Key': '8d504d0099ed27c1b73708d22871d805',
|
||||||
'X-Algolia-Application-Id': 'Y1FNZXUI30',
|
'X-Algolia-Application-Id': 'Y1FNZXUI30',
|
||||||
}, query={
|
}, query={
|
||||||
'facetFilters': json.dumps([
|
'facetFilters': json.dumps(facet_filters),
|
||||||
'search_season_list:' + season,
|
'attributesToRetrieve': 'description,search_%s_episode_number,search_document_date,search_url,title,search_atk_episode_season' % slug,
|
||||||
'search_document_klass:episode',
|
|
||||||
'search_show_slug:' + slug,
|
|
||||||
]),
|
|
||||||
'attributesToRetrieve': 'description,search_%s_episode_number,search_document_date,search_url,title' % slug,
|
|
||||||
'attributesToHighlight': '',
|
'attributesToHighlight': '',
|
||||||
'hitsPerPage': 1000,
|
'hitsPerPage': 1000,
|
||||||
})
|
})
|
||||||
|
@ -162,4 +212,4 @@ def entries():
|
||||||
}
|
}
|
||||||
|
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
entries(), 'season_%d' % season_number, season)
|
entries(), playlist_id, playlist_title)
|
||||||
|
|
167
yt_dlp/extractor/blerp.py
Normal file
167
yt_dlp/extractor/blerp.py
Normal file
|
@ -0,0 +1,167 @@
|
||||||
|
import json
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import strip_or_none, traverse_obj
|
||||||
|
|
||||||
|
|
||||||
|
class BlerpIE(InfoExtractor):
|
||||||
|
IE_NAME = 'blerp'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?blerp\.com/soundbites/(?P<id>[0-9a-zA-Z]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://blerp.com/soundbites/6320fe8745636cb4dd677a5a',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6320fe8745636cb4dd677a5a',
|
||||||
|
'title': 'Samsung Galaxy S8 Over the Horizon Ringtone 2016',
|
||||||
|
'uploader': 'luminousaj',
|
||||||
|
'uploader_id': '5fb81e51aa66ae000c395478',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'tags': ['samsung', 'galaxy', 's8', 'over the horizon', '2016', 'ringtone'],
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://blerp.com/soundbites/5bc94ef4796001000498429f',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5bc94ef4796001000498429f',
|
||||||
|
'title': 'Yee',
|
||||||
|
'uploader': '179617322678353920',
|
||||||
|
'uploader_id': '5ba99cf71386730004552c42',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'tags': ['YEE', 'YEET', 'wo ha haah catchy tune yee', 'yee']
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
|
_GRAPHQL_OPERATIONNAME = "webBitePageGetBite"
|
||||||
|
_GRAPHQL_QUERY = (
|
||||||
|
'''query webBitePageGetBite($_id: MongoID!) {
|
||||||
|
web {
|
||||||
|
biteById(_id: $_id) {
|
||||||
|
...bitePageFrag
|
||||||
|
__typename
|
||||||
|
}
|
||||||
|
__typename
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fragment bitePageFrag on Bite {
|
||||||
|
_id
|
||||||
|
title
|
||||||
|
userKeywords
|
||||||
|
keywords
|
||||||
|
color
|
||||||
|
visibility
|
||||||
|
isPremium
|
||||||
|
owned
|
||||||
|
price
|
||||||
|
extraReview
|
||||||
|
isAudioExists
|
||||||
|
image {
|
||||||
|
filename
|
||||||
|
original {
|
||||||
|
url
|
||||||
|
__typename
|
||||||
|
}
|
||||||
|
__typename
|
||||||
|
}
|
||||||
|
userReactions {
|
||||||
|
_id
|
||||||
|
reactions
|
||||||
|
createdAt
|
||||||
|
__typename
|
||||||
|
}
|
||||||
|
topReactions
|
||||||
|
totalSaveCount
|
||||||
|
saved
|
||||||
|
blerpLibraryType
|
||||||
|
license
|
||||||
|
licenseMetaData
|
||||||
|
playCount
|
||||||
|
totalShareCount
|
||||||
|
totalFavoriteCount
|
||||||
|
totalAddedToBoardCount
|
||||||
|
userCategory
|
||||||
|
userAudioQuality
|
||||||
|
audioCreationState
|
||||||
|
transcription
|
||||||
|
userTranscription
|
||||||
|
description
|
||||||
|
createdAt
|
||||||
|
updatedAt
|
||||||
|
author
|
||||||
|
listingType
|
||||||
|
ownerObject {
|
||||||
|
_id
|
||||||
|
username
|
||||||
|
profileImage {
|
||||||
|
filename
|
||||||
|
original {
|
||||||
|
url
|
||||||
|
__typename
|
||||||
|
}
|
||||||
|
__typename
|
||||||
|
}
|
||||||
|
__typename
|
||||||
|
}
|
||||||
|
transcription
|
||||||
|
favorited
|
||||||
|
visibility
|
||||||
|
isCurated
|
||||||
|
sourceUrl
|
||||||
|
audienceRating
|
||||||
|
strictAudienceRating
|
||||||
|
ownerId
|
||||||
|
reportObject {
|
||||||
|
reportedContentStatus
|
||||||
|
__typename
|
||||||
|
}
|
||||||
|
giphy {
|
||||||
|
mp4
|
||||||
|
gif
|
||||||
|
__typename
|
||||||
|
}
|
||||||
|
audio {
|
||||||
|
filename
|
||||||
|
original {
|
||||||
|
url
|
||||||
|
__typename
|
||||||
|
}
|
||||||
|
mp3 {
|
||||||
|
url
|
||||||
|
__typename
|
||||||
|
}
|
||||||
|
__typename
|
||||||
|
}
|
||||||
|
__typename
|
||||||
|
}
|
||||||
|
|
||||||
|
''')
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
audio_id = self._match_id(url)
|
||||||
|
|
||||||
|
data = {
|
||||||
|
'operationName': self._GRAPHQL_OPERATIONNAME,
|
||||||
|
'query': self._GRAPHQL_QUERY,
|
||||||
|
'variables': {
|
||||||
|
'_id': audio_id
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
headers = {
|
||||||
|
'Content-Type': 'application/json'
|
||||||
|
}
|
||||||
|
|
||||||
|
json_result = self._download_json('https://api.blerp.com/graphql',
|
||||||
|
audio_id, data=json.dumps(data).encode('utf-8'), headers=headers)
|
||||||
|
|
||||||
|
bite_json = json_result['data']['web']['biteById']
|
||||||
|
|
||||||
|
info_dict = {
|
||||||
|
'id': bite_json['_id'],
|
||||||
|
'url': bite_json['audio']['mp3']['url'],
|
||||||
|
'title': bite_json['title'],
|
||||||
|
'uploader': traverse_obj(bite_json, ('ownerObject', 'username'), expected_type=strip_or_none),
|
||||||
|
'uploader_id': traverse_obj(bite_json, ('ownerObject', '_id'), expected_type=strip_or_none),
|
||||||
|
'ext': 'mp3',
|
||||||
|
'tags': list(filter(None, map(strip_or_none, (traverse_obj(bite_json, 'userKeywords', expected_type=list) or []))) or None)
|
||||||
|
}
|
||||||
|
|
||||||
|
return info_dict
|
|
@ -1,9 +1,5 @@
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import float_or_none, int_or_none, make_archive_id, traverse_obj
|
||||||
traverse_obj,
|
|
||||||
float_or_none,
|
|
||||||
int_or_none
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class CallinIE(InfoExtractor):
|
class CallinIE(InfoExtractor):
|
||||||
|
@ -35,6 +31,54 @@ class CallinIE(InfoExtractor):
|
||||||
'episode_number': 1,
|
'episode_number': 1,
|
||||||
'episode_id': '218b979630a35ead12c6fd096f2996c56c37e4d0dc1f6dc0feada32dcf7b31cd'
|
'episode_id': '218b979630a35ead12c6fd096f2996c56c37e4d0dc1f6dc0feada32dcf7b31cd'
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.callin.com/episode/fcc-commissioner-brendan-carr-on-elons-PrumRdSQJW',
|
||||||
|
'md5': '14ede27ee2c957b7e4db93140fc0745c',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'c3dab47f237bf953d180d3f243477a84302798be0e0b29bc9ade6d60a69f04f5',
|
||||||
|
'ext': 'ts',
|
||||||
|
'title': 'FCC Commissioner Brendan Carr on Elon’s Starlink',
|
||||||
|
'description': 'Or, why the government doesn’t like SpaceX',
|
||||||
|
'channel': 'The Pull Request',
|
||||||
|
'channel_url': 'https://callin.com/show/the-pull-request-ucnDJmEKAa',
|
||||||
|
'duration': 3182.472,
|
||||||
|
'series_id': '7e9c23156e4aecfdcaef46bfb2ed7ca268509622ec006c0f0f25d90e34496638',
|
||||||
|
'uploader_url': 'http://thepullrequest.com',
|
||||||
|
'upload_date': '20220902',
|
||||||
|
'episode': 'FCC Commissioner Brendan Carr on Elon’s Starlink',
|
||||||
|
'display_id': 'fcc-commissioner-brendan-carr-on-elons-PrumRdSQJW',
|
||||||
|
'series': 'The Pull Request',
|
||||||
|
'channel_id': '7e9c23156e4aecfdcaef46bfb2ed7ca268509622ec006c0f0f25d90e34496638',
|
||||||
|
'view_count': int,
|
||||||
|
'uploader': 'Antonio García Martínez',
|
||||||
|
'thumbnail': 'https://d1z76fhpoqkd01.cloudfront.net/shows/legacy/1ade9142625344045dc17cf523469ced1d93610762f4c886d06aa190a2f979e8.png',
|
||||||
|
'episode_id': 'c3dab47f237bf953d180d3f243477a84302798be0e0b29bc9ade6d60a69f04f5',
|
||||||
|
'timestamp': 1662100688.005,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.callin.com/episode/episode-81-elites-melt-down-over-student-debt-lzxMidUnjA',
|
||||||
|
'md5': '16f704ddbf82a27e3930533b12062f07',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '8d06f869798f93a7814e380bceabea72d501417e620180416ff6bd510596e83c',
|
||||||
|
'ext': 'ts',
|
||||||
|
'title': 'Episode 81- Elites MELT DOWN over Student Debt Victory? Rumble in NYC?',
|
||||||
|
'description': 'Let’s talk todays episode about the primary election shake up in NYC and the elites melting down over student debt cancelation.',
|
||||||
|
'channel': 'The DEBRIEF With Briahna Joy Gray',
|
||||||
|
'channel_url': 'https://callin.com/show/the-debrief-with-briahna-joy-gray-siiFDzGegm',
|
||||||
|
'duration': 10043.16,
|
||||||
|
'series_id': '61cea58444465fd26674069703bd8322993bc9e5b4f1a6d0872690554a046ff7',
|
||||||
|
'uploader_url': 'http://patreon.com/badfaithpodcast',
|
||||||
|
'upload_date': '20220826',
|
||||||
|
'episode': 'Episode 81- Elites MELT DOWN over Student Debt Victory? Rumble in NYC?',
|
||||||
|
'display_id': 'episode-',
|
||||||
|
'series': 'The DEBRIEF With Briahna Joy Gray',
|
||||||
|
'channel_id': '61cea58444465fd26674069703bd8322993bc9e5b4f1a6d0872690554a046ff7',
|
||||||
|
'view_count': int,
|
||||||
|
'uploader': 'Briahna Gray',
|
||||||
|
'thumbnail': 'https://d1z76fhpoqkd01.cloudfront.net/shows/legacy/461ea0d86172cb6aff7d6c80fd49259cf5e64bdf737a4650f8bc24cf392ca218.png',
|
||||||
|
'episode_id': '8d06f869798f93a7814e380bceabea72d501417e620180416ff6bd510596e83c',
|
||||||
|
'timestamp': 1661476708.282,
|
||||||
|
}
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def try_get_user_name(self, d):
|
def try_get_user_name(self, d):
|
||||||
|
@ -86,6 +130,7 @@ def _real_extract(self, url):
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': id,
|
'id': id,
|
||||||
|
'_old_archive_ids': [make_archive_id(self, display_id.rsplit('-', 1)[-1])],
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
|
|
|
@ -1,9 +1,5 @@
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import int_or_none, url_or_none
|
||||||
ExtractorError,
|
|
||||||
int_or_none,
|
|
||||||
url_or_none,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class CamModelsIE(InfoExtractor):
|
class CamModelsIE(InfoExtractor):
|
||||||
|
@ -17,32 +13,11 @@ class CamModelsIE(InfoExtractor):
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
user_id = self._match_id(url)
|
user_id = self._match_id(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(
|
|
||||||
url, user_id, headers=self.geo_verification_headers())
|
|
||||||
|
|
||||||
manifest_root = self._html_search_regex(
|
|
||||||
r'manifestUrlRoot=([^&\']+)', webpage, 'manifest', default=None)
|
|
||||||
|
|
||||||
if not manifest_root:
|
|
||||||
ERRORS = (
|
|
||||||
("I'm offline, but let's stay connected", 'This user is currently offline'),
|
|
||||||
('in a private show', 'This user is in a private show'),
|
|
||||||
('is currently performing LIVE', 'This model is currently performing live'),
|
|
||||||
)
|
|
||||||
for pattern, message in ERRORS:
|
|
||||||
if pattern in webpage:
|
|
||||||
error = message
|
|
||||||
expected = True
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
error = 'Unable to find manifest URL root'
|
|
||||||
expected = False
|
|
||||||
raise ExtractorError(error, expected=expected)
|
|
||||||
|
|
||||||
manifest = self._download_json(
|
manifest = self._download_json(
|
||||||
'%s%s.json' % (manifest_root, user_id), user_id)
|
'https://manifest-server.naiadsystems.com/live/s:%s.json' % user_id, user_id)
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
|
thumbnails = []
|
||||||
for format_id, format_dict in manifest['formats'].items():
|
for format_id, format_dict in manifest['formats'].items():
|
||||||
if not isinstance(format_dict, dict):
|
if not isinstance(format_dict, dict):
|
||||||
continue
|
continue
|
||||||
|
@ -82,12 +57,20 @@ def _real_extract(self, url):
|
||||||
'quality': -10,
|
'quality': -10,
|
||||||
})
|
})
|
||||||
else:
|
else:
|
||||||
|
if format_id == 'jpeg':
|
||||||
|
thumbnails.append({
|
||||||
|
'url': f['url'],
|
||||||
|
'width': f['width'],
|
||||||
|
'height': f['height'],
|
||||||
|
'format_id': f['format_id'],
|
||||||
|
})
|
||||||
continue
|
continue
|
||||||
formats.append(f)
|
formats.append(f)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': user_id,
|
'id': user_id,
|
||||||
'title': user_id,
|
'title': user_id,
|
||||||
|
'thumbnails': thumbnails,
|
||||||
'is_live': True,
|
'is_live': True,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'age_limit': 18
|
'age_limit': 18
|
||||||
|
|
|
@ -1338,7 +1338,7 @@ def _get_tfa_info(self, note='two-factor verification code'):
|
||||||
# Helper functions for extracting OpenGraph info
|
# Helper functions for extracting OpenGraph info
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _og_regexes(prop):
|
def _og_regexes(prop):
|
||||||
content_re = r'content=(?:"([^"]+?)"|\'([^\']+?)\'|\s*([^\s"\'=<>`]+?))'
|
content_re = r'content=(?:"([^"]+?)"|\'([^\']+?)\'|\s*([^\s"\'=<>`]+?)(?=\s|/?>))'
|
||||||
property_re = (r'(?:name|property)=(?:\'og%(sep)s%(prop)s\'|"og%(sep)s%(prop)s"|\s*og%(sep)s%(prop)s\b)'
|
property_re = (r'(?:name|property)=(?:\'og%(sep)s%(prop)s\'|"og%(sep)s%(prop)s"|\s*og%(sep)s%(prop)s\b)'
|
||||||
% {'prop': re.escape(prop), 'sep': '(?::|[:-])'})
|
% {'prop': re.escape(prop), 'sep': '(?::|[:-])'})
|
||||||
template = r'<meta[^>]+?%s[^>]+?%s'
|
template = r'<meta[^>]+?%s[^>]+?%s'
|
||||||
|
|
|
@ -1,17 +1,20 @@
|
||||||
import re
|
import re
|
||||||
|
import urllib.error
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import compat_parse_qs
|
||||||
compat_parse_qs,
|
|
||||||
compat_urllib_parse_urlparse,
|
|
||||||
)
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
HEADRequest,
|
ExtractorError,
|
||||||
determine_ext,
|
determine_ext,
|
||||||
|
error_to_compat_str,
|
||||||
|
extract_attributes,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
merge_dicts,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
strip_or_none,
|
strip_or_none,
|
||||||
try_get,
|
traverse_obj,
|
||||||
|
url_or_none,
|
||||||
|
urljoin,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -20,14 +23,90 @@ def _call_api(self, slug):
|
||||||
return self._download_json(
|
return self._download_json(
|
||||||
'http://apis.ign.com/{0}/v3/{0}s/slug/{1}'.format(self._PAGE_TYPE, slug), slug)
|
'http://apis.ign.com/{0}/v3/{0}s/slug/{1}'.format(self._PAGE_TYPE, slug), slug)
|
||||||
|
|
||||||
|
def _checked_call_api(self, slug):
|
||||||
|
try:
|
||||||
|
return self._call_api(slug)
|
||||||
|
except ExtractorError as e:
|
||||||
|
if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 404:
|
||||||
|
e.cause.args = e.cause.args or [
|
||||||
|
e.cause.geturl(), e.cause.getcode(), e.cause.reason]
|
||||||
|
raise ExtractorError(
|
||||||
|
'Content not found: expired?', cause=e.cause,
|
||||||
|
expected=True)
|
||||||
|
raise
|
||||||
|
|
||||||
|
def _extract_video_info(self, video, fatal=True):
|
||||||
|
video_id = video['videoId']
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
refs = traverse_obj(video, 'refs', expected_type=dict) or {}
|
||||||
|
|
||||||
|
m3u8_url = url_or_none(refs.get('m3uUrl'))
|
||||||
|
if m3u8_url:
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
m3u8_url, video_id, 'mp4', 'm3u8_native',
|
||||||
|
m3u8_id='hls', fatal=False))
|
||||||
|
|
||||||
|
f4m_url = url_or_none(refs.get('f4mUrl'))
|
||||||
|
if f4m_url:
|
||||||
|
formats.extend(self._extract_f4m_formats(
|
||||||
|
f4m_url, video_id, f4m_id='hds', fatal=False))
|
||||||
|
|
||||||
|
for asset in (video.get('assets') or []):
|
||||||
|
asset_url = url_or_none(asset.get('url'))
|
||||||
|
if not asset_url:
|
||||||
|
continue
|
||||||
|
formats.append({
|
||||||
|
'url': asset_url,
|
||||||
|
'tbr': int_or_none(asset.get('bitrate'), 1000),
|
||||||
|
'fps': int_or_none(asset.get('frame_rate')),
|
||||||
|
'height': int_or_none(asset.get('height')),
|
||||||
|
'width': int_or_none(asset.get('width')),
|
||||||
|
})
|
||||||
|
|
||||||
|
mezzanine_url = traverse_obj(
|
||||||
|
video, ('system', 'mezzanineUrl'), expected_type=url_or_none)
|
||||||
|
if mezzanine_url:
|
||||||
|
formats.append({
|
||||||
|
'ext': determine_ext(mezzanine_url, 'mp4'),
|
||||||
|
'format_id': 'mezzanine',
|
||||||
|
'quality': 1,
|
||||||
|
'url': mezzanine_url,
|
||||||
|
})
|
||||||
|
|
||||||
|
thumbnails = traverse_obj(
|
||||||
|
video, ('thumbnails', ..., {'url': 'url'}), expected_type=url_or_none)
|
||||||
|
tags = traverse_obj(
|
||||||
|
video, ('tags', ..., 'displayName'),
|
||||||
|
expected_type=lambda x: x.strip() or None)
|
||||||
|
|
||||||
|
metadata = traverse_obj(video, 'metadata', expected_type=dict) or {}
|
||||||
|
title = traverse_obj(
|
||||||
|
metadata, 'longTitle', 'title', 'name',
|
||||||
|
expected_type=lambda x: x.strip() or None)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': strip_or_none(metadata.get('description')),
|
||||||
|
'timestamp': parse_iso8601(metadata.get('publishDate')),
|
||||||
|
'duration': int_or_none(metadata.get('duration')),
|
||||||
|
'thumbnails': thumbnails,
|
||||||
|
'formats': formats,
|
||||||
|
'tags': tags,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
class IGNIE(IGNBaseIE):
|
class IGNIE(IGNBaseIE):
|
||||||
"""
|
"""
|
||||||
Extractor for some of the IGN sites, like www.ign.com, es.ign.com de.ign.com.
|
Extractor for some of the IGN sites, like www.ign.com, es.ign.com de.ign.com.
|
||||||
Some videos of it.ign.com are also supported
|
Some videos of it.ign.com are also supported
|
||||||
"""
|
"""
|
||||||
|
_VIDEO_PATH_RE = r'/(?:\d{4}/\d{2}/\d{2}/)?(?P<id>.+?)'
|
||||||
_VALID_URL = r'https?://(?:.+?\.ign|www\.pcmag)\.com/videos/(?:\d{4}/\d{2}/\d{2}/)?(?P<id>[^/?&#]+)'
|
_PLAYLIST_PATH_RE = r'(?:/?\?(?P<filt>[^&#]+))?'
|
||||||
|
_VALID_URL = (
|
||||||
|
r'https?://(?:.+?\.ign|www\.pcmag)\.com/videos(?:%s)'
|
||||||
|
% '|'.join((_VIDEO_PATH_RE + r'(?:[/?&#]|$)', _PLAYLIST_PATH_RE)))
|
||||||
IE_NAME = 'ign.com'
|
IE_NAME = 'ign.com'
|
||||||
_PAGE_TYPE = 'video'
|
_PAGE_TYPE = 'video'
|
||||||
|
|
||||||
|
@ -42,7 +121,13 @@ class IGNIE(IGNBaseIE):
|
||||||
'timestamp': 1370440800,
|
'timestamp': 1370440800,
|
||||||
'upload_date': '20130605',
|
'upload_date': '20130605',
|
||||||
'tags': 'count:9',
|
'tags': 'count:9',
|
||||||
}
|
'display_id': 'the-last-of-us-review',
|
||||||
|
'thumbnail': 'https://assets1.ignimgs.com/vid/thumbnails/user/2014/03/26/lastofusreviewmimig2.jpg',
|
||||||
|
'duration': 440,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'nocheckcertificate': True,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.pcmag.com/videos/2015/01/06/010615-whats-new-now-is-gogo-snooping-on-your-data',
|
'url': 'http://www.pcmag.com/videos/2015/01/06/010615-whats-new-now-is-gogo-snooping-on-your-data',
|
||||||
'md5': 'f1581a6fe8c5121be5b807684aeac3f6',
|
'md5': 'f1581a6fe8c5121be5b807684aeac3f6',
|
||||||
|
@ -54,84 +139,48 @@ class IGNIE(IGNBaseIE):
|
||||||
'timestamp': 1420571160,
|
'timestamp': 1420571160,
|
||||||
'upload_date': '20150106',
|
'upload_date': '20150106',
|
||||||
'tags': 'count:4',
|
'tags': 'count:4',
|
||||||
}
|
},
|
||||||
|
'skip': '404 Not Found',
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.ign.com/videos/is-a-resident-evil-4-remake-on-the-way-ign-daily-fix',
|
'url': 'https://www.ign.com/videos/is-a-resident-evil-4-remake-on-the-way-ign-daily-fix',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _extract_embed_urls(cls, url, webpage):
|
||||||
|
grids = re.findall(
|
||||||
|
r'''(?s)<section\b[^>]+\bclass\s*=\s*['"](?:[\w-]+\s+)*?content-feed-grid(?!\B|-)[^>]+>(.+?)</section[^>]*>''',
|
||||||
|
webpage)
|
||||||
|
return filter(None,
|
||||||
|
(urljoin(url, m.group('path')) for m in re.finditer(
|
||||||
|
r'''<a\b[^>]+\bhref\s*=\s*('|")(?P<path>/videos%s)\1'''
|
||||||
|
% cls._VIDEO_PATH_RE, grids[0] if grids else '')))
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
display_id, filt = self._match_valid_url(url).group('id', 'filt')
|
||||||
video = self._call_api(display_id)
|
if display_id:
|
||||||
video_id = video['videoId']
|
return self._extract_video(url, display_id)
|
||||||
metadata = video['metadata']
|
return self._extract_playlist(url, filt or 'all')
|
||||||
title = metadata.get('longTitle') or metadata.get('title') or metadata['name']
|
|
||||||
|
|
||||||
formats = []
|
def _extract_playlist(self, url, display_id):
|
||||||
refs = video.get('refs') or {}
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
m3u8_url = refs.get('m3uUrl')
|
return self.playlist_result(
|
||||||
if m3u8_url:
|
(self.url_result(u, self.ie_key())
|
||||||
formats.extend(self._extract_m3u8_formats(
|
for u in self._extract_embed_urls(url, webpage)),
|
||||||
m3u8_url, video_id, 'mp4', 'm3u8_native',
|
playlist_id=display_id)
|
||||||
m3u8_id='hls', fatal=False))
|
|
||||||
|
|
||||||
f4m_url = refs.get('f4mUrl')
|
def _extract_video(self, url, display_id):
|
||||||
if f4m_url:
|
video = self._checked_call_api(display_id)
|
||||||
formats.extend(self._extract_f4m_formats(
|
|
||||||
f4m_url, video_id, f4m_id='hds', fatal=False))
|
|
||||||
|
|
||||||
for asset in (video.get('assets') or []):
|
info = self._extract_video_info(video)
|
||||||
asset_url = asset.get('url')
|
|
||||||
if not asset_url:
|
|
||||||
continue
|
|
||||||
formats.append({
|
|
||||||
'url': asset_url,
|
|
||||||
'tbr': int_or_none(asset.get('bitrate'), 1000),
|
|
||||||
'fps': int_or_none(asset.get('frame_rate')),
|
|
||||||
'height': int_or_none(asset.get('height')),
|
|
||||||
'width': int_or_none(asset.get('width')),
|
|
||||||
})
|
|
||||||
|
|
||||||
mezzanine_url = try_get(video, lambda x: x['system']['mezzanineUrl'])
|
return merge_dicts({
|
||||||
if mezzanine_url:
|
|
||||||
formats.append({
|
|
||||||
'ext': determine_ext(mezzanine_url, 'mp4'),
|
|
||||||
'format_id': 'mezzanine',
|
|
||||||
'quality': 1,
|
|
||||||
'url': mezzanine_url,
|
|
||||||
})
|
|
||||||
|
|
||||||
thumbnails = []
|
|
||||||
for thumbnail in (video.get('thumbnails') or []):
|
|
||||||
thumbnail_url = thumbnail.get('url')
|
|
||||||
if not thumbnail_url:
|
|
||||||
continue
|
|
||||||
thumbnails.append({
|
|
||||||
'url': thumbnail_url,
|
|
||||||
})
|
|
||||||
|
|
||||||
tags = []
|
|
||||||
for tag in (video.get('tags') or []):
|
|
||||||
display_name = tag.get('displayName')
|
|
||||||
if not display_name:
|
|
||||||
continue
|
|
||||||
tags.append(display_name)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': title,
|
|
||||||
'description': strip_or_none(metadata.get('description')),
|
|
||||||
'timestamp': parse_iso8601(metadata.get('publishDate')),
|
|
||||||
'duration': int_or_none(metadata.get('duration')),
|
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'thumbnails': thumbnails,
|
}, info)
|
||||||
'formats': formats,
|
|
||||||
'tags': tags,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class IGNVideoIE(InfoExtractor):
|
class IGNVideoIE(IGNBaseIE):
|
||||||
_VALID_URL = r'https?://.+?\.ign\.com/(?:[a-z]{2}/)?[^/]+/(?P<id>\d+)/(?:video|trailer)/'
|
_VALID_URL = r'https?://.+?\.ign\.com/(?:[a-z]{2}/)?[^/]+/(?P<id>\d+)/(?:video|trailer)/'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://me.ign.com/en/videos/112203/video/how-hitman-aims-to-be-different-than-every-other-s',
|
'url': 'http://me.ign.com/en/videos/112203/video/how-hitman-aims-to-be-different-than-every-other-s',
|
||||||
|
@ -143,7 +192,16 @@ class IGNVideoIE(InfoExtractor):
|
||||||
'description': 'Taking out assassination targets in Hitman has never been more stylish.',
|
'description': 'Taking out assassination targets in Hitman has never been more stylish.',
|
||||||
'timestamp': 1444665600,
|
'timestamp': 1444665600,
|
||||||
'upload_date': '20151012',
|
'upload_date': '20151012',
|
||||||
}
|
'display_id': '112203',
|
||||||
|
'thumbnail': 'https://sm.ign.com/ign_me/video/h/how-hitman/how-hitman-aims-to-be-different-than-every-other-s_8z14.jpg',
|
||||||
|
'duration': 298,
|
||||||
|
'tags': 'count:13',
|
||||||
|
'display_id': '112203',
|
||||||
|
'thumbnail': 'https://sm.ign.com/ign_me/video/h/how-hitman/how-hitman-aims-to-be-different-than-every-other-s_8z14.jpg',
|
||||||
|
'duration': 298,
|
||||||
|
'tags': 'count:13',
|
||||||
|
},
|
||||||
|
'expected_warnings': ['HTTP Error 400: Bad Request'],
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://me.ign.com/ar/angry-birds-2/106533/video/lrd-ldyy-lwl-lfylm-angry-birds',
|
'url': 'http://me.ign.com/ar/angry-birds-2/106533/video/lrd-ldyy-lwl-lfylm-angry-birds',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -163,22 +221,38 @@ class IGNVideoIE(InfoExtractor):
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
req = HEADRequest(url.rsplit('/', 1)[0] + '/embed')
|
parsed_url = urllib.parse.urlparse(url)
|
||||||
url = self._request_webpage(req, video_id).geturl()
|
embed_url = urllib.parse.urlunparse(
|
||||||
|
parsed_url._replace(path=parsed_url.path.rsplit('/', 1)[0] + '/embed'))
|
||||||
|
|
||||||
|
webpage, urlh = self._download_webpage_handle(embed_url, video_id)
|
||||||
|
new_url = urlh.geturl()
|
||||||
ign_url = compat_parse_qs(
|
ign_url = compat_parse_qs(
|
||||||
compat_urllib_parse_urlparse(url).query).get('url', [None])[0]
|
urllib.parse.urlparse(new_url).query).get('url', [None])[-1]
|
||||||
if ign_url:
|
if ign_url:
|
||||||
return self.url_result(ign_url, IGNIE.ie_key())
|
return self.url_result(ign_url, IGNIE.ie_key())
|
||||||
return self.url_result(url)
|
video = self._search_regex(r'(<div\b[^>]+\bdata-video-id\s*=\s*[^>]+>)', webpage, 'video element', fatal=False)
|
||||||
|
if not video:
|
||||||
|
if new_url == url:
|
||||||
|
raise ExtractorError('Redirect loop: ' + url)
|
||||||
|
return self.url_result(new_url)
|
||||||
|
video = extract_attributes(video)
|
||||||
|
video_data = video.get('data-settings') or '{}'
|
||||||
|
video_data = self._parse_json(video_data, video_id)['video']
|
||||||
|
info = self._extract_video_info(video_data)
|
||||||
|
|
||||||
|
return merge_dicts({
|
||||||
|
'display_id': video_id,
|
||||||
|
}, info)
|
||||||
|
|
||||||
|
|
||||||
class IGNArticleIE(IGNBaseIE):
|
class IGNArticleIE(IGNBaseIE):
|
||||||
_VALID_URL = r'https?://.+?\.ign\.com/(?:articles(?:/\d{4}/\d{2}/\d{2})?|(?:[a-z]{2}/)?feature/\d+)/(?P<id>[^/?&#]+)'
|
_VALID_URL = r'https?://.+?\.ign\.com/(?:articles(?:/\d{4}/\d{2}/\d{2})?|(?:[a-z]{2}/)?(?:[\w-]+/)*?feature/\d+)/(?P<id>[^/?&#]+)'
|
||||||
_PAGE_TYPE = 'article'
|
_PAGE_TYPE = 'article'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://me.ign.com/en/feature/15775/100-little-things-in-gta-5-that-will-blow-your-mind',
|
'url': 'http://me.ign.com/en/feature/15775/100-little-things-in-gta-5-that-will-blow-your-mind',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '524497489e4e8ff5848ece34',
|
'id': '72113',
|
||||||
'title': '100 Little Things in GTA 5 That Will Blow Your Mind',
|
'title': '100 Little Things in GTA 5 That Will Blow Your Mind',
|
||||||
},
|
},
|
||||||
'playlist': [
|
'playlist': [
|
||||||
|
@ -186,34 +260,43 @@ class IGNArticleIE(IGNBaseIE):
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '5ebbd138523268b93c9141af17bec937',
|
'id': '5ebbd138523268b93c9141af17bec937',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'GTA 5 Video Review',
|
'title': 'Grand Theft Auto V Video Review',
|
||||||
'description': 'Rockstar drops the mic on this generation of games. Watch our review of the masterly Grand Theft Auto V.',
|
'description': 'Rockstar drops the mic on this generation of games. Watch our review of the masterly Grand Theft Auto V.',
|
||||||
'timestamp': 1379339880,
|
'timestamp': 1379339880,
|
||||||
'upload_date': '20130916',
|
'upload_date': '20130916',
|
||||||
|
'tags': 'count:12',
|
||||||
|
'thumbnail': 'https://assets1.ignimgs.com/thumbs/userUploaded/2021/8/16/gta-v-heistsjpg-e94705-1629138553533.jpeg',
|
||||||
|
'display_id': 'grand-theft-auto-v-video-review',
|
||||||
|
'duration': 501,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '638672ee848ae4ff108df2a296418ee2',
|
'id': '638672ee848ae4ff108df2a296418ee2',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '26 Twisted Moments from GTA 5 in Slow Motion',
|
'title': 'GTA 5 In Slow Motion',
|
||||||
'description': 'The twisted beauty of GTA 5 in stunning slow motion.',
|
'description': 'The twisted beauty of GTA 5 in stunning slow motion.',
|
||||||
'timestamp': 1386878820,
|
'timestamp': 1386878820,
|
||||||
'upload_date': '20131212',
|
'upload_date': '20131212',
|
||||||
|
'duration': 202,
|
||||||
|
'tags': 'count:25',
|
||||||
|
'display_id': 'gta-5-in-slow-motion',
|
||||||
|
'thumbnail': 'https://assets1.ignimgs.com/vid/thumbnails/user/2013/11/03/GTA-SLO-MO-1.jpg',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
'params': {
|
'params': {
|
||||||
'playlist_items': '2-3',
|
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
'expected_warnings': ['Backend fetch failed'],
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.ign.com/articles/2014/08/15/rewind-theater-wild-trailer-gamescom-2014?watch',
|
'url': 'http://www.ign.com/articles/2014/08/15/rewind-theater-wild-trailer-gamescom-2014?watch',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '53ee806780a81ec46e0790f8',
|
'id': '53ee806780a81ec46e0790f8',
|
||||||
'title': 'Rewind Theater - Wild Trailer Gamescom 2014',
|
'title': 'Rewind Theater - Wild Trailer Gamescom 2014',
|
||||||
},
|
},
|
||||||
'playlist_count': 2,
|
'playlist_count': 1,
|
||||||
|
'expected_warnings': ['Backend fetch failed'],
|
||||||
}, {
|
}, {
|
||||||
# videoId pattern
|
# videoId pattern
|
||||||
'url': 'http://www.ign.com/articles/2017/06/08/new-ducktales-short-donalds-birthday-doesnt-go-as-planned',
|
'url': 'http://www.ign.com/articles/2017/06/08/new-ducktales-short-donalds-birthday-doesnt-go-as-planned',
|
||||||
|
@ -236,18 +319,84 @@ class IGNArticleIE(IGNBaseIE):
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
def _checked_call_api(self, slug):
|
||||||
|
try:
|
||||||
|
return self._call_api(slug)
|
||||||
|
except ExtractorError as e:
|
||||||
|
if isinstance(e.cause, urllib.error.HTTPError):
|
||||||
|
e.cause.args = e.cause.args or [
|
||||||
|
e.cause.geturl(), e.cause.getcode(), e.cause.reason]
|
||||||
|
if e.cause.code == 404:
|
||||||
|
raise ExtractorError(
|
||||||
|
'Content not found: expired?', cause=e.cause,
|
||||||
|
expected=True)
|
||||||
|
elif e.cause.code == 503:
|
||||||
|
self.report_warning(error_to_compat_str(e.cause))
|
||||||
|
return
|
||||||
|
raise
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
article = self._call_api(display_id)
|
article = self._checked_call_api(display_id)
|
||||||
|
|
||||||
|
if article:
|
||||||
|
# obsolete ?
|
||||||
def entries():
|
def entries():
|
||||||
media_url = try_get(article, lambda x: x['mediaRelations'][0]['media']['metadata']['url'])
|
media_url = traverse_obj(
|
||||||
|
article, ('mediaRelations', 0, 'media', 'metadata', 'url'),
|
||||||
|
expected_type=url_or_none)
|
||||||
if media_url:
|
if media_url:
|
||||||
yield self.url_result(media_url, IGNIE.ie_key())
|
yield self.url_result(media_url, IGNIE.ie_key())
|
||||||
for content in (article.get('content') or []):
|
for content in (article.get('content') or []):
|
||||||
for video_url in re.findall(r'(?:\[(?:ignvideo\s+url|youtube\s+clip_id)|<iframe[^>]+src)="([^"]+)"', content):
|
for video_url in re.findall(r'(?:\[(?:ignvideo\s+url|youtube\s+clip_id)|<iframe[^>]+src)="([^"]+)"', content):
|
||||||
|
if url_or_none(video_url):
|
||||||
yield self.url_result(video_url)
|
yield self.url_result(video_url)
|
||||||
|
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
entries(), article.get('articleId'),
|
entries(), article.get('articleId'),
|
||||||
strip_or_none(try_get(article, lambda x: x['metadata']['headline'])))
|
traverse_obj(
|
||||||
|
article, ('metadata', 'headline'),
|
||||||
|
expected_type=lambda x: x.strip() or None))
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
playlist_id = self._html_search_meta('dable:item_id', webpage, default=None)
|
||||||
|
if playlist_id:
|
||||||
|
|
||||||
|
def entries():
|
||||||
|
for m in re.finditer(
|
||||||
|
r'''(?s)<object\b[^>]+\bclass\s*=\s*("|')ign-videoplayer\1[^>]*>(?P<params>.+?)</object''',
|
||||||
|
webpage):
|
||||||
|
flashvars = self._search_regex(
|
||||||
|
r'''(<param\b[^>]+\bname\s*=\s*("|')flashvars\2[^>]*>)''',
|
||||||
|
m.group('params'), 'flashvars', default='')
|
||||||
|
flashvars = compat_parse_qs(extract_attributes(flashvars).get('value') or '')
|
||||||
|
v_url = url_or_none((flashvars.get('url') or [None])[-1])
|
||||||
|
if v_url:
|
||||||
|
yield self.url_result(v_url)
|
||||||
|
else:
|
||||||
|
playlist_id = self._search_regex(
|
||||||
|
r'''\bdata-post-id\s*=\s*("|')(?P<id>[\da-f]+)\1''',
|
||||||
|
webpage, 'id', group='id', default=None)
|
||||||
|
|
||||||
|
nextjs_data = self._search_nextjs_data(webpage, display_id)
|
||||||
|
|
||||||
|
def entries():
|
||||||
|
for player in traverse_obj(
|
||||||
|
nextjs_data,
|
||||||
|
('props', 'apolloState', 'ROOT_QUERY', lambda k, _: k.startswith('videoPlayerProps('), '__ref')):
|
||||||
|
# skip promo links (which may not always be served, eg GH CI servers)
|
||||||
|
if traverse_obj(nextjs_data,
|
||||||
|
('props', 'apolloState', player.replace('PlayerProps', 'ModernContent')),
|
||||||
|
expected_type=dict):
|
||||||
|
continue
|
||||||
|
video = traverse_obj(nextjs_data, ('props', 'apolloState', player), expected_type=dict) or {}
|
||||||
|
info = self._extract_video_info(video, fatal=False)
|
||||||
|
if info:
|
||||||
|
yield merge_dicts({
|
||||||
|
'display_id': display_id,
|
||||||
|
}, info)
|
||||||
|
|
||||||
|
return self.playlist_result(
|
||||||
|
entries(), playlist_id or display_id,
|
||||||
|
re.sub(r'\s+-\s+IGN\s*$', '', self._og_search_title(webpage, default='')) or None)
|
||||||
|
|
31
yt_dlp/extractor/kommunetv.py
Normal file
31
yt_dlp/extractor/kommunetv.py
Normal file
|
@ -0,0 +1,31 @@
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import update_url
|
||||||
|
|
||||||
|
|
||||||
|
class KommunetvIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https://(\w+).kommunetv.no/archive/(?P<id>\w+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://oslo.kommunetv.no/archive/921',
|
||||||
|
'md5': '5f102be308ee759be1e12b63d5da4bbc',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '921',
|
||||||
|
'title': 'Bystyremøte',
|
||||||
|
'ext': 'mp4'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
headers = {
|
||||||
|
'Accept': 'application/json'
|
||||||
|
}
|
||||||
|
data = self._download_json('https://oslo.kommunetv.no/api/streams?streamType=1&id=%s' % video_id, video_id, headers=headers)
|
||||||
|
title = data['stream']['title']
|
||||||
|
file = data['playlist'][0]['playlist'][0]['file']
|
||||||
|
url = update_url(file, query=None, fragment=None)
|
||||||
|
formats = self._extract_m3u8_formats(url, video_id, ext='mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
'title': title
|
||||||
|
}
|
|
@ -1,5 +1,16 @@
|
||||||
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import js_to_json
|
from ..utils import (
|
||||||
|
MONTH_NAMES,
|
||||||
|
clean_html,
|
||||||
|
get_element_by_class,
|
||||||
|
get_element_by_id,
|
||||||
|
int_or_none,
|
||||||
|
js_to_json,
|
||||||
|
qualities,
|
||||||
|
unified_strdate,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class MyVideoGeIE(InfoExtractor):
|
class MyVideoGeIE(InfoExtractor):
|
||||||
|
@ -11,37 +22,50 @@ class MyVideoGeIE(InfoExtractor):
|
||||||
'id': '3941048',
|
'id': '3941048',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'The best prikol',
|
'title': 'The best prikol',
|
||||||
|
'upload_date': '20200611',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
'uploader': 'md5:d72addd357b0dd914e704781f7f777d8',
|
'uploader': 'chixa33',
|
||||||
'description': 'md5:5c0371f540f5888d603ebfedd46b6df3'
|
'description': 'md5:5b067801318e33c2e6eea4ab90b1fdd3',
|
||||||
}
|
},
|
||||||
}
|
}
|
||||||
|
_MONTH_NAMES_KA = ['იანვარი', 'თებერვალი', 'მარტი', 'აპრილი', 'მაისი', 'ივნისი', 'ივლისი', 'აგვისტო', 'სექტემბერი', 'ოქტომბერი', 'ნოემბერი', 'დეკემბერი']
|
||||||
|
|
||||||
|
_quality = staticmethod(qualities(('SD', 'HD')))
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
title = self._html_search_regex(r'<h1[^>]*>([^<]+)</h1>', webpage, 'title')
|
title = (
|
||||||
description = self._og_search_description(webpage)
|
self._og_search_title(webpage, default=None)
|
||||||
thumbnail = self._html_search_meta(['og:image'], webpage)
|
or clean_html(get_element_by_class('my_video_title', webpage))
|
||||||
uploader = self._search_regex(r'<a[^>]+class="mv_user_name"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False)
|
or self._html_search_regex(r'<title\b[^>]*>([^<]+)</title\b', webpage, 'title'))
|
||||||
|
|
||||||
jwplayer_sources = self._parse_json(
|
jwplayer_sources = self._parse_json(
|
||||||
self._search_regex(
|
self._search_regex(
|
||||||
r"(?s)jwplayer\(\"mvplayer\"\).setup\(.*?sources: (.*?])", webpage, 'jwplayer sources'),
|
r'''(?s)jwplayer\s*\(\s*['"]mvplayer['"]\s*\)\s*\.\s*setup\s*\(.*?\bsources\s*:\s*(\[.*?])\s*[,});]''', webpage, 'jwplayer sources', fatal=False)
|
||||||
video_id, transform_source=js_to_json)
|
or '',
|
||||||
|
video_id, transform_source=js_to_json, fatal=False)
|
||||||
|
|
||||||
def _formats_key(f):
|
formats = self._parse_jwplayer_formats(jwplayer_sources or [], video_id)
|
||||||
if f['label'] == 'SD':
|
for f in formats or []:
|
||||||
return -1
|
f['quality'] = self._quality(f['format_id'])
|
||||||
elif f['label'] == 'HD':
|
|
||||||
return 1
|
|
||||||
else:
|
|
||||||
return 0
|
|
||||||
|
|
||||||
jwplayer_sources = sorted(jwplayer_sources, key=_formats_key)
|
description = (
|
||||||
|
self._og_search_description(webpage)
|
||||||
|
or get_element_by_id('long_desc_holder', webpage)
|
||||||
|
or self._html_search_meta('description', webpage))
|
||||||
|
|
||||||
formats = self._parse_jwplayer_formats(jwplayer_sources, video_id)
|
uploader = self._search_regex(r'<a[^>]+class="mv_user_name"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False)
|
||||||
|
|
||||||
|
upload_date = get_element_by_class('mv_vid_upl_date', webpage)
|
||||||
|
# as ka locale may not be present roll a local date conversion
|
||||||
|
upload_date = (unified_strdate(
|
||||||
|
# translate any ka month to an en one
|
||||||
|
re.sub('|'.join(self._MONTH_NAMES_KA),
|
||||||
|
lambda m: MONTH_NAMES['en'][self._MONTH_NAMES_KA.index(m.group(0))],
|
||||||
|
upload_date, re.I))
|
||||||
|
if upload_date else None)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
@ -49,5 +73,9 @@ def _formats_key(f):
|
||||||
'description': description,
|
'description': description,
|
||||||
'uploader': uploader,
|
'uploader': uploader,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'thumbnail': thumbnail
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
|
'upload_date': upload_date,
|
||||||
|
'view_count': int_or_none(get_element_by_class('mv_vid_views', webpage)),
|
||||||
|
'like_count': int_or_none(get_element_by_id('likes_count', webpage)),
|
||||||
|
'dislike_count': int_or_none(get_element_by_id('dislikes_count', webpage)),
|
||||||
}
|
}
|
||||||
|
|
97
yt_dlp/extractor/pr0gramm.py
Normal file
97
yt_dlp/extractor/pr0gramm.py
Normal file
|
@ -0,0 +1,97 @@
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import merge_dicts
|
||||||
|
|
||||||
|
|
||||||
|
class Pr0grammStaticIE(InfoExtractor):
|
||||||
|
# Possible urls:
|
||||||
|
# https://pr0gramm.com/static/5466437
|
||||||
|
_VALID_URL = r'https?://pr0gramm\.com/static/(?P<id>[0-9]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://pr0gramm.com/static/5466437',
|
||||||
|
'md5': '52fa540d70d3edc286846f8ca85938aa',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5466437',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'pr0gramm-5466437 by g11st',
|
||||||
|
'uploader': 'g11st',
|
||||||
|
'upload_date': '20221221',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
# Fetch media sources
|
||||||
|
entries = self._parse_html5_media_entries(url, webpage, video_id)
|
||||||
|
media_info = entries[0]
|
||||||
|
|
||||||
|
# Fetch author
|
||||||
|
uploader = self._html_search_regex(r'by\W+([\w-]+)\W+', webpage, 'uploader')
|
||||||
|
|
||||||
|
# Fetch approx upload timestamp from filename
|
||||||
|
# Have None-defaults in case the extraction fails
|
||||||
|
uploadDay = None
|
||||||
|
uploadMon = None
|
||||||
|
uploadYear = None
|
||||||
|
uploadTimestr = None
|
||||||
|
# (//img.pr0gramm.com/2022/12/21/62ae8aa5e2da0ebf.mp4)
|
||||||
|
m = re.search(r'//img\.pr0gramm\.com/(?P<year>[\d]+)/(?P<mon>[\d]+)/(?P<day>[\d]+)/\w+\.\w{,4}', webpage)
|
||||||
|
|
||||||
|
if (m):
|
||||||
|
# Up to a day of accuracy should suffice...
|
||||||
|
uploadDay = m.groupdict().get('day')
|
||||||
|
uploadMon = m.groupdict().get('mon')
|
||||||
|
uploadYear = m.groupdict().get('year')
|
||||||
|
uploadTimestr = uploadYear + uploadMon + uploadDay
|
||||||
|
|
||||||
|
return merge_dicts({
|
||||||
|
'id': video_id,
|
||||||
|
'title': 'pr0gramm-%s%s' % (video_id, (' by ' + uploader) if uploader else ''),
|
||||||
|
'uploader': uploader,
|
||||||
|
'upload_date': uploadTimestr
|
||||||
|
}, media_info)
|
||||||
|
|
||||||
|
|
||||||
|
# This extractor is for the primary url (used for sharing, and appears in the
|
||||||
|
# location bar) Since this page loads the DOM via JS, yt-dl can't find any
|
||||||
|
# video information here. So let's redirect to a compatibility version of
|
||||||
|
# the site, which does contain the <video>-element by itself, without requiring
|
||||||
|
# js to be ran.
|
||||||
|
class Pr0grammIE(InfoExtractor):
|
||||||
|
# Possible urls:
|
||||||
|
# https://pr0gramm.com/new/546637
|
||||||
|
# https://pr0gramm.com/new/video/546637
|
||||||
|
# https://pr0gramm.com/top/546637
|
||||||
|
# https://pr0gramm.com/top/video/546637
|
||||||
|
# https://pr0gramm.com/user/g11st/uploads/5466437
|
||||||
|
# https://pr0gramm.com/user/froschler/dafur-ist-man-hier/5091290
|
||||||
|
# https://pr0gramm.com/user/froschler/reinziehen-1elf/5232030
|
||||||
|
# https://pr0gramm.com/user/froschler/1elf/5232030
|
||||||
|
# https://pr0gramm.com/new/5495710:comment62621020 <- this is not the id!
|
||||||
|
# https://pr0gramm.com/top/fruher war alles damals/5498175
|
||||||
|
|
||||||
|
_VALID_URL = r'https?:\/\/pr0gramm\.com\/(?!static/\d+).+?\/(?P<id>[\d]+)(:|$)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://pr0gramm.com/new/video/5466437',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5466437',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'pr0gramm-5466437 by g11st',
|
||||||
|
'uploader': 'g11st',
|
||||||
|
'upload_date': '20221221',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _generic_title():
|
||||||
|
return "oof"
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
return self.url_result(
|
||||||
|
'https://pr0gramm.com/static/' + video_id,
|
||||||
|
video_id=video_id,
|
||||||
|
ie=Pr0grammStaticIE.ie_key())
|
93
yt_dlp/extractor/rbgtum.py
Normal file
93
yt_dlp/extractor/rbgtum.py
Normal file
|
@ -0,0 +1,93 @@
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class RbgTumIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https://live\.rbg\.tum\.de/w/(?P<id>.+)'
|
||||||
|
_TESTS = [{
|
||||||
|
# Combined view
|
||||||
|
'url': 'https://live.rbg.tum.de/w/cpp/22128',
|
||||||
|
'md5': '53a5e7b3e07128e33bbf36687fe1c08f',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'cpp/22128',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Lecture: October 18. 2022',
|
||||||
|
'series': 'Concepts of C++ programming (IN2377)',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
# Presentation only
|
||||||
|
'url': 'https://live.rbg.tum.de/w/I2DL/12349/PRES',
|
||||||
|
'md5': '36c584272179f3e56b0db5d880639cba',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'I2DL/12349/PRES',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Lecture 3: Introduction to Neural Networks',
|
||||||
|
'series': 'Introduction to Deep Learning (IN2346)',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
# Camera only
|
||||||
|
'url': 'https://live.rbg.tum.de/w/fvv-info/16130/CAM',
|
||||||
|
'md5': 'e04189d92ff2f56aedf5cede65d37aad',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'fvv-info/16130/CAM',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Fachschaftsvollversammlung',
|
||||||
|
'series': 'Fachschaftsvollversammlung Informatik',
|
||||||
|
}
|
||||||
|
}, ]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
m3u8 = self._html_search_regex(r'(https://.+?\.m3u8)', webpage, 'm3u8')
|
||||||
|
lecture_title = self._html_search_regex(r'(?si)<h1.*?>(.*)</h1>', webpage, 'title')
|
||||||
|
lecture_series_title = self._html_search_regex(
|
||||||
|
r'(?s)<title\b[^>]*>\s*(?:TUM-Live\s\|\s?)?([^:]+):?.*?</title>', webpage, 'series')
|
||||||
|
|
||||||
|
formats = self._extract_m3u8_formats(m3u8, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': lecture_title,
|
||||||
|
'series': lecture_series_title,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class RbgTumCourseIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https://live\.rbg\.tum\.de/course/(?P<id>.+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://live.rbg.tum.de/course/2022/S/fpv',
|
||||||
|
'info_dict': {
|
||||||
|
'title': 'Funktionale Programmierung und Verifikation (IN0003)',
|
||||||
|
'id': '2022/S/fpv',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'noplaylist': False,
|
||||||
|
},
|
||||||
|
'playlist_count': 13,
|
||||||
|
}, {
|
||||||
|
'url': 'https://live.rbg.tum.de/course/2022/W/set',
|
||||||
|
'info_dict': {
|
||||||
|
'title': 'SET FSMPIC',
|
||||||
|
'id': '2022/W/set',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'noplaylist': False,
|
||||||
|
},
|
||||||
|
'playlist_count': 6,
|
||||||
|
}, ]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
course_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, course_id)
|
||||||
|
|
||||||
|
lecture_series_title = self._html_search_regex(r'(?si)<h1.*?>(.*)</h1>', webpage, 'title')
|
||||||
|
|
||||||
|
lecture_urls = []
|
||||||
|
for lecture_url in re.findall(r'(?i)href="/w/(.+)(?<!/cam)(?<!/pres)(?<!/chat)"', webpage):
|
||||||
|
lecture_urls.append(self.url_result('https://live.rbg.tum.de/w/' + lecture_url, ie=RbgTumIE.ie_key()))
|
||||||
|
|
||||||
|
return self.playlist_result(lecture_urls, course_id, lecture_series_title)
|
|
@ -130,6 +130,9 @@ class KnownPiracyIE(UnsupportedInfoExtractor):
|
||||||
|
|
||||||
URLS = (
|
URLS = (
|
||||||
r'dood\.(?:to|watch|so|pm|wf|re)',
|
r'dood\.(?:to|watch|so|pm|wf|re)',
|
||||||
|
# Sites youtube-dl supports, but we won't
|
||||||
|
r'https://viewsb\.com',
|
||||||
|
r'https://filemoon\.sx',
|
||||||
)
|
)
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
|
|
@ -313,17 +313,23 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||||
\.
|
\.
|
||||||
)?
|
)?
|
||||||
vimeo\.com/
|
vimeo\.com/
|
||||||
(?!(?:channels|album|showcase)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/)
|
|
||||||
(?:[^/]+/)*?
|
|
||||||
(?:
|
(?:
|
||||||
|
(?P<u>user)|
|
||||||
|
(?!(?:channels|album|showcase)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/)
|
||||||
|
(?:.*?/)??
|
||||||
|
(?P<q>
|
||||||
(?:
|
(?:
|
||||||
play_redirect_hls|
|
play_redirect_hls|
|
||||||
moogaloop\.swf)\?clip_id=
|
moogaloop\.swf)\?clip_id=
|
||||||
)?
|
)?
|
||||||
(?:videos?/)?
|
(?:videos?/)?
|
||||||
|
)
|
||||||
(?P<id>[0-9]+)
|
(?P<id>[0-9]+)
|
||||||
(?:/(?P<unlisted_hash>[\da-f]{10}))?
|
(?(u)
|
||||||
/?(?:[?&].*)?(?:[#].*)?$
|
/(?!videos|likes)[^/?#]+/?|
|
||||||
|
(?(q)|/(?P<unlisted_hash>[\da-f]{10}))?
|
||||||
|
)
|
||||||
|
(?:(?(q)[&]|(?(u)|/?)[?]).*?)?(?:[#].*)?$
|
||||||
'''
|
'''
|
||||||
IE_NAME = 'vimeo'
|
IE_NAME = 'vimeo'
|
||||||
_EMBED_REGEX = [
|
_EMBED_REGEX = [
|
||||||
|
@ -705,7 +711,12 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
}
|
},
|
||||||
|
{
|
||||||
|
# user playlist alias -> https://vimeo.com/258705797
|
||||||
|
'url': 'https://vimeo.com/user26785108/newspiritualguide',
|
||||||
|
'only_matching': True,
|
||||||
|
},
|
||||||
# https://gettingthingsdone.com/workflowmap/
|
# https://gettingthingsdone.com/workflowmap/
|
||||||
# vimeo embed with check-password page protected by Referer header
|
# vimeo embed with check-password page protected by Referer header
|
||||||
]
|
]
|
||||||
|
|
|
@ -21,7 +21,7 @@
|
||||||
|
|
||||||
|
|
||||||
class XHamsterIE(InfoExtractor):
|
class XHamsterIE(InfoExtractor):
|
||||||
_DOMAINS = r'(?:xhamster\.(?:com|one|desi)|xhms\.pro|xhamster\d+\.com|xhday\.com)'
|
_DOMAINS = r'(?:xhamster\.(?:com|one|desi)|xhms\.pro|xhamster\d+\.com|xhday\.com|xhvid\.com)'
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://
|
https?://
|
||||||
(?:.+?\.)?%s/
|
(?:.+?\.)?%s/
|
||||||
|
@ -120,6 +120,9 @@ class XHamsterIE(InfoExtractor):
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://xhday.com/videos/strapless-threesome-xhh7yVf',
|
'url': 'https://xhday.com/videos/strapless-threesome-xhh7yVf',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://xhvid.com/videos/lk-mm-xhc6wn6',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -422,6 +425,9 @@ class XHamsterUserIE(InfoExtractor):
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://xhday.com/users/mobhunter',
|
'url': 'https://xhday.com/users/mobhunter',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://xhvid.com/users/pelushe21',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _entries(self, user_id):
|
def _entries(self, user_id):
|
||||||
|
|
|
@ -3149,14 +3149,28 @@ def urlencode_postdata(*args, **kargs):
|
||||||
return urllib.parse.urlencode(*args, **kargs).encode('ascii')
|
return urllib.parse.urlencode(*args, **kargs).encode('ascii')
|
||||||
|
|
||||||
|
|
||||||
def update_url_query(url, query):
|
def update_url(url, *, query_update=None, **kwargs):
|
||||||
if not query:
|
"""Replace URL components specified by kwargs
|
||||||
|
@param url str or parse url tuple
|
||||||
|
@param query_update update query
|
||||||
|
@returns str
|
||||||
|
"""
|
||||||
|
if isinstance(url, str):
|
||||||
|
if not kwargs and not query_update:
|
||||||
return url
|
return url
|
||||||
parsed_url = urllib.parse.urlparse(url)
|
else:
|
||||||
qs = urllib.parse.parse_qs(parsed_url.query)
|
url = urllib.parse.urlparse(url)
|
||||||
qs.update(query)
|
if query_update:
|
||||||
return urllib.parse.urlunparse(parsed_url._replace(
|
assert 'query' not in kwargs, 'query_update and query cannot be specified at the same time'
|
||||||
query=urllib.parse.urlencode(qs, True)))
|
kwargs['query'] = urllib.parse.urlencode({
|
||||||
|
**urllib.parse.parse_qs(url.query),
|
||||||
|
**query_update
|
||||||
|
}, True)
|
||||||
|
return urllib.parse.urlunparse(url._replace(**kwargs))
|
||||||
|
|
||||||
|
|
||||||
|
def update_url_query(url, query):
|
||||||
|
return update_url(url, query_update=query)
|
||||||
|
|
||||||
|
|
||||||
def update_Request(req, url=None, data=None, headers=None, query=None):
|
def update_Request(req, url=None, data=None, headers=None, query=None):
|
||||||
|
|
Loading…
Reference in a new issue