mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-10 01:02:13 +01:00
Update to ytdl-2021.01.24.1
This commit is contained in:
parent
f74980cbae
commit
a820dc722e
23 changed files with 987 additions and 412 deletions
|
@ -814,7 +814,7 @@ # OUTPUT TEMPLATE
|
||||||
- `disc_number` (numeric): Number of the disc or other physical medium the track belongs to
|
- `disc_number` (numeric): Number of the disc or other physical medium the track belongs to
|
||||||
- `release_year` (numeric): Year (YYYY) when the album was released
|
- `release_year` (numeric): Year (YYYY) when the album was released
|
||||||
|
|
||||||
Each aforementioned sequence when referenced in an output template will be replaced by the actual value corresponding to the sequence name. Note that some of the sequences are not guaranteed to be present since they depend on the metadata obtained by a particular extractor. Such sequences will be replaced with `NA`.
|
Each aforementioned sequence when referenced in an output template will be replaced by the actual value corresponding to the sequence name. Note that some of the sequences are not guaranteed to be present since they depend on the metadata obtained by a particular extractor. Such sequences will be replaced with placeholder value provided with `--output-na-placeholder` (`NA` by default).
|
||||||
|
|
||||||
For example for `-o %(title)s-%(id)s.%(ext)s` and an mp4 video with title `youtube-dlc test video` and id `BaW_jenozKcj`, this will result in a `youtube-dlc test video-BaW_jenozKcj.mp4` file created in the current directory.
|
For example for `-o %(title)s-%(id)s.%(ext)s` and an mp4 video with title `youtube-dlc test video` and id `BaW_jenozKcj`, this will result in a `youtube-dlc test video-BaW_jenozKcj.mp4` file created in the current directory.
|
||||||
|
|
||||||
|
|
|
@ -637,13 +637,20 @@ def test_prepare_filename(self):
|
||||||
'title2': '%PATH%',
|
'title2': '%PATH%',
|
||||||
}
|
}
|
||||||
|
|
||||||
def fname(templ):
|
def fname(templ, na_placeholder='NA'):
|
||||||
ydl = YoutubeDL({'outtmpl': templ})
|
params = {'outtmpl': templ}
|
||||||
|
if na_placeholder != 'NA':
|
||||||
|
params['outtmpl_na_placeholder'] = na_placeholder
|
||||||
|
ydl = YoutubeDL(params)
|
||||||
return ydl.prepare_filename(info)
|
return ydl.prepare_filename(info)
|
||||||
self.assertEqual(fname('%(id)s.%(ext)s'), '1234.mp4')
|
self.assertEqual(fname('%(id)s.%(ext)s'), '1234.mp4')
|
||||||
self.assertEqual(fname('%(id)s-%(width)s.%(ext)s'), '1234-NA.mp4')
|
self.assertEqual(fname('%(id)s-%(width)s.%(ext)s'), '1234-NA.mp4')
|
||||||
# Replace missing fields with 'NA'
|
NA_TEST_OUTTMPL = '%(uploader_date)s-%(width)d-%(id)s.%(ext)s'
|
||||||
self.assertEqual(fname('%(uploader_date)s-%(id)s.%(ext)s'), 'NA-1234.mp4')
|
# Replace missing fields with 'NA' by default
|
||||||
|
self.assertEqual(fname(NA_TEST_OUTTMPL), 'NA-NA-1234.mp4')
|
||||||
|
# Or by provided placeholder
|
||||||
|
self.assertEqual(fname(NA_TEST_OUTTMPL, na_placeholder='none'), 'none-none-1234.mp4')
|
||||||
|
self.assertEqual(fname(NA_TEST_OUTTMPL, na_placeholder=''), '--1234.mp4')
|
||||||
self.assertEqual(fname('%(height)d.%(ext)s'), '1080.mp4')
|
self.assertEqual(fname('%(height)d.%(ext)s'), '1080.mp4')
|
||||||
self.assertEqual(fname('%(height)6d.%(ext)s'), ' 1080.mp4')
|
self.assertEqual(fname('%(height)6d.%(ext)s'), ' 1080.mp4')
|
||||||
self.assertEqual(fname('%(height)-6d.%(ext)s'), '1080 .mp4')
|
self.assertEqual(fname('%(height)-6d.%(ext)s'), '1080 .mp4')
|
||||||
|
|
|
@ -181,9 +181,12 @@ class YoutubeDL(object):
|
||||||
allow_multiple_video_streams: Allow multiple video streams to be merged into a single file
|
allow_multiple_video_streams: Allow multiple video streams to be merged into a single file
|
||||||
allow_multiple_audio_streams: Allow multiple audio streams to be merged into a single file
|
allow_multiple_audio_streams: Allow multiple audio streams to be merged into a single file
|
||||||
outtmpl: Template for output names.
|
outtmpl: Template for output names.
|
||||||
restrictfilenames: Do not allow "&" and spaces in file names.
|
outtmpl_na_placeholder: Placeholder for unavailable meta fields.
|
||||||
trim_file_name: Limit length of filename (extension excluded).
|
restrictfilenames: Do not allow "&" and spaces in file names
|
||||||
ignoreerrors: Do not stop on download errors. (Default True when running youtube-dlc, but False when directly accessing YoutubeDL class)
|
trim_file_name: Limit length of filename (extension excluded)
|
||||||
|
ignoreerrors: Do not stop on download errors
|
||||||
|
(Default True when running youtube-dlc,
|
||||||
|
but False when directly accessing YoutubeDL class)
|
||||||
force_generic_extractor: Force downloader to use the generic extractor
|
force_generic_extractor: Force downloader to use the generic extractor
|
||||||
overwrites: Overwrite all video and metadata files if True,
|
overwrites: Overwrite all video and metadata files if True,
|
||||||
overwrite only non-video files if None
|
overwrite only non-video files if None
|
||||||
|
@ -741,7 +744,7 @@ def prepare_filename(self, info_dict, warn=False):
|
||||||
template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
|
template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
|
||||||
for k, v in template_dict.items()
|
for k, v in template_dict.items()
|
||||||
if v is not None and not isinstance(v, (list, tuple, dict)))
|
if v is not None and not isinstance(v, (list, tuple, dict)))
|
||||||
template_dict = collections.defaultdict(lambda: 'NA', template_dict)
|
template_dict = collections.defaultdict(lambda: self.params.get('outtmpl_na_placeholder', 'NA'), template_dict)
|
||||||
|
|
||||||
outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
|
outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
|
||||||
|
|
||||||
|
@ -761,8 +764,8 @@ def prepare_filename(self, info_dict, warn=False):
|
||||||
|
|
||||||
# Missing numeric fields used together with integer presentation types
|
# Missing numeric fields used together with integer presentation types
|
||||||
# in format specification will break the argument substitution since
|
# in format specification will break the argument substitution since
|
||||||
# string 'NA' is returned for missing fields. We will patch output
|
# string NA placeholder is returned for missing fields. We will patch
|
||||||
# template for missing fields to meet string presentation type.
|
# output template for missing fields to meet string presentation type.
|
||||||
for numeric_field in self._NUMERIC_FIELDS:
|
for numeric_field in self._NUMERIC_FIELDS:
|
||||||
if numeric_field not in template_dict:
|
if numeric_field not in template_dict:
|
||||||
# As of [1] format syntax is:
|
# As of [1] format syntax is:
|
||||||
|
|
|
@ -373,6 +373,7 @@ def parse_retries(retries):
|
||||||
'listformats': opts.listformats,
|
'listformats': opts.listformats,
|
||||||
'listformats_table': opts.listformats_table,
|
'listformats_table': opts.listformats_table,
|
||||||
'outtmpl': outtmpl,
|
'outtmpl': outtmpl,
|
||||||
|
'outtmpl_na_placeholder': opts.outtmpl_na_placeholder,
|
||||||
'paths': opts.paths,
|
'paths': opts.paths,
|
||||||
'autonumber_size': opts.autonumber_size,
|
'autonumber_size': opts.autonumber_size,
|
||||||
'autonumber_start': opts.autonumber_start,
|
'autonumber_start': opts.autonumber_start,
|
||||||
|
|
|
@ -256,7 +256,7 @@ class AENetworksShowIE(AENetworksListBaseIE):
|
||||||
'title': 'Ancient Aliens',
|
'title': 'Ancient Aliens',
|
||||||
'description': 'md5:3f6d74daf2672ff3ae29ed732e37ea7f',
|
'description': 'md5:3f6d74daf2672ff3ae29ed732e37ea7f',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 168,
|
'playlist_mincount': 150,
|
||||||
}]
|
}]
|
||||||
_RESOURCE = 'series'
|
_RESOURCE = 'series'
|
||||||
_ITEMS_KEY = 'episodes'
|
_ITEMS_KEY = 'episodes'
|
||||||
|
|
|
@ -1,13 +1,16 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
class AlJazeeraIE(InfoExtractor):
|
class AlJazeeraIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?aljazeera\.com/(?:programmes|video)/.*?/(?P<id>[^/]+)\.html'
|
_VALID_URL = r'https?://(?:www\.)?aljazeera\.com/(?P<type>program/[^/]+|(?:feature|video)s)/\d{4}/\d{1,2}/\d{1,2}/(?P<id>[^/?&#]+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.aljazeera.com/programmes/the-slum/2014/08/deliverance-201482883754237240.html',
|
'url': 'https://www.aljazeera.com/program/episode/2014/9/19/deliverance',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '3792260579001',
|
'id': '3792260579001',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
|
@ -20,14 +23,34 @@ class AlJazeeraIE(InfoExtractor):
|
||||||
'add_ie': ['BrightcoveNew'],
|
'add_ie': ['BrightcoveNew'],
|
||||||
'skip': 'Not accessible from Travis CI server',
|
'skip': 'Not accessible from Travis CI server',
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.aljazeera.com/video/news/2017/05/sierra-leone-709-carat-diamond-auctioned-170511100111930.html',
|
'url': 'https://www.aljazeera.com/videos/2017/5/11/sierra-leone-709-carat-diamond-to-be-auctioned-off',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.aljazeera.com/features/2017/8/21/transforming-pakistans-buses-into-art',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/665003303001/default_default/index.html?videoId=%s'
|
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s'
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
program_name = self._match_id(url)
|
post_type, name = re.match(self._VALID_URL, url).groups()
|
||||||
webpage = self._download_webpage(url, program_name)
|
post_type = {
|
||||||
brightcove_id = self._search_regex(
|
'features': 'post',
|
||||||
r'RenderPagesVideo\(\'(.+?)\'', webpage, 'brightcove id')
|
'program': 'episode',
|
||||||
return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id)
|
'videos': 'video',
|
||||||
|
}[post_type.split('/')[0]]
|
||||||
|
video = self._download_json(
|
||||||
|
'https://www.aljazeera.com/graphql', name, query={
|
||||||
|
'operationName': 'SingleArticleQuery',
|
||||||
|
'variables': json.dumps({
|
||||||
|
'name': name,
|
||||||
|
'postType': post_type,
|
||||||
|
}),
|
||||||
|
}, headers={
|
||||||
|
'wp-site': 'aje',
|
||||||
|
})['data']['article']['video']
|
||||||
|
video_id = video['id']
|
||||||
|
account_id = video.get('accountId') or '665003303001'
|
||||||
|
player_id = video.get('playerId') or 'BkeSH5BDb'
|
||||||
|
return self.url_result(
|
||||||
|
self.BRIGHTCOVE_URL_TEMPLATE % (account_id, player_id, video_id),
|
||||||
|
'BrightcoveNew', video_id)
|
||||||
|
|
|
@ -1,13 +1,16 @@
|
||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import json
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
clean_html,
|
clean_html,
|
||||||
|
int_or_none,
|
||||||
try_get,
|
try_get,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
|
unified_timestamp,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -22,8 +25,8 @@ class AmericasTestKitchenIE(InfoExtractor):
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'description': 'md5:64e606bfee910627efc4b5f050de92b3',
|
'description': 'md5:64e606bfee910627efc4b5f050de92b3',
|
||||||
'thumbnail': r're:^https?://',
|
'thumbnail': r're:^https?://',
|
||||||
'timestamp': 1523664000,
|
'timestamp': 1523318400,
|
||||||
'upload_date': '20180414',
|
'upload_date': '20180410',
|
||||||
'release_date': '20180410',
|
'release_date': '20180410',
|
||||||
'series': "America's Test Kitchen",
|
'series': "America's Test Kitchen",
|
||||||
'season_number': 18,
|
'season_number': 18,
|
||||||
|
@ -33,6 +36,27 @@ class AmericasTestKitchenIE(InfoExtractor):
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
# Metadata parsing behaves differently for newer episodes (705) as opposed to older episodes (582 above)
|
||||||
|
'url': 'https://www.americastestkitchen.com/episode/705-simple-chicken-dinner',
|
||||||
|
'md5': '06451608c57651e985a498e69cec17e5',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5fbe8c61bda2010001c6763b',
|
||||||
|
'title': 'Simple Chicken Dinner',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'description': 'md5:eb68737cc2fd4c26ca7db30139d109e7',
|
||||||
|
'thumbnail': r're:^https?://',
|
||||||
|
'timestamp': 1610755200,
|
||||||
|
'upload_date': '20210116',
|
||||||
|
'release_date': '20210116',
|
||||||
|
'series': "America's Test Kitchen",
|
||||||
|
'season_number': 21,
|
||||||
|
'episode': 'Simple Chicken Dinner',
|
||||||
|
'episode_number': 3,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.americastestkitchen.com/videos/3420-pan-seared-salmon',
|
'url': 'https://www.americastestkitchen.com/videos/3420-pan-seared-salmon',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -60,7 +84,76 @@ def _real_extract(self, url):
|
||||||
'url': 'https://player.zype.com/embed/%s.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ' % video['zypeId'],
|
'url': 'https://player.zype.com/embed/%s.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ' % video['zypeId'],
|
||||||
'ie_key': 'Zype',
|
'ie_key': 'Zype',
|
||||||
'description': clean_html(video.get('description')),
|
'description': clean_html(video.get('description')),
|
||||||
|
'timestamp': unified_timestamp(video.get('publishDate')),
|
||||||
'release_date': unified_strdate(video.get('publishDate')),
|
'release_date': unified_strdate(video.get('publishDate')),
|
||||||
|
'episode_number': int_or_none(episode.get('number')),
|
||||||
|
'season_number': int_or_none(episode.get('season')),
|
||||||
'series': try_get(episode, lambda x: x['show']['title']),
|
'series': try_get(episode, lambda x: x['show']['title']),
|
||||||
'episode': episode.get('title'),
|
'episode': episode.get('title'),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class AmericasTestKitchenSeasonIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?(?P<show>americastestkitchen|cookscountry)\.com/episodes/browse/season_(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
# ATK Season
|
||||||
|
'url': 'https://www.americastestkitchen.com/episodes/browse/season_1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'season_1',
|
||||||
|
'title': 'Season 1',
|
||||||
|
},
|
||||||
|
'playlist_count': 13,
|
||||||
|
}, {
|
||||||
|
# Cooks Country Season
|
||||||
|
'url': 'https://www.cookscountry.com/episodes/browse/season_12',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'season_12',
|
||||||
|
'title': 'Season 12',
|
||||||
|
},
|
||||||
|
'playlist_count': 13,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
show_name, season_number = re.match(self._VALID_URL, url).groups()
|
||||||
|
season_number = int(season_number)
|
||||||
|
|
||||||
|
slug = 'atk' if show_name == 'americastestkitchen' else 'cco'
|
||||||
|
|
||||||
|
season = 'Season %d' % season_number
|
||||||
|
|
||||||
|
season_search = self._download_json(
|
||||||
|
'https://y1fnzxui30-dsn.algolia.net/1/indexes/everest_search_%s_season_desc_production' % slug,
|
||||||
|
season, headers={
|
||||||
|
'Origin': 'https://www.%s.com' % show_name,
|
||||||
|
'X-Algolia-API-Key': '8d504d0099ed27c1b73708d22871d805',
|
||||||
|
'X-Algolia-Application-Id': 'Y1FNZXUI30',
|
||||||
|
}, query={
|
||||||
|
'facetFilters': json.dumps([
|
||||||
|
'search_season_list:' + season,
|
||||||
|
'search_document_klass:episode',
|
||||||
|
'search_show_slug:' + slug,
|
||||||
|
]),
|
||||||
|
'attributesToRetrieve': 'description,search_%s_episode_number,search_document_date,search_url,title' % slug,
|
||||||
|
'attributesToHighlight': '',
|
||||||
|
'hitsPerPage': 1000,
|
||||||
|
})
|
||||||
|
|
||||||
|
def entries():
|
||||||
|
for episode in (season_search.get('hits') or []):
|
||||||
|
search_url = episode.get('search_url')
|
||||||
|
if not search_url:
|
||||||
|
continue
|
||||||
|
yield {
|
||||||
|
'_type': 'url',
|
||||||
|
'url': 'https://www.%s.com%s' % (show_name, search_url),
|
||||||
|
'id': try_get(episode, lambda e: e['objectID'].split('_')[-1]),
|
||||||
|
'title': episode.get('title'),
|
||||||
|
'description': episode.get('description'),
|
||||||
|
'timestamp': unified_timestamp(episode.get('search_document_date')),
|
||||||
|
'season_number': season_number,
|
||||||
|
'episode_number': int_or_none(episode.get('search_%s_episode_number' % slug)),
|
||||||
|
'ie_key': AmericasTestKitchenIE.ie_key(),
|
||||||
|
}
|
||||||
|
|
||||||
|
return self.playlist_result(
|
||||||
|
entries(), 'season_%d' % season_number, season)
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .yahoo import YahooIE
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_parse_qs,
|
compat_parse_qs,
|
||||||
compat_urllib_parse_urlparse,
|
compat_urllib_parse_urlparse,
|
||||||
|
@ -15,9 +15,9 @@
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class AolIE(InfoExtractor):
|
class AolIE(YahooIE):
|
||||||
IE_NAME = 'aol.com'
|
IE_NAME = 'aol.com'
|
||||||
_VALID_URL = r'(?:aol-video:|https?://(?:www\.)?aol\.(?:com|ca|co\.uk|de|jp)/video/(?:[^/]+/)*)(?P<id>[0-9a-f]+)'
|
_VALID_URL = r'(?:aol-video:|https?://(?:www\.)?aol\.(?:com|ca|co\.uk|de|jp)/video/(?:[^/]+/)*)(?P<id>\d{9}|[0-9a-f]{24}|[0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12})'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# video with 5min ID
|
# video with 5min ID
|
||||||
|
@ -76,10 +76,16 @@ class AolIE(InfoExtractor):
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.aol.jp/video/playlist/5a28e936a1334d000137da0c/5a28f3151e642219fde19831/',
|
'url': 'https://www.aol.jp/video/playlist/5a28e936a1334d000137da0c/5a28f3151e642219fde19831/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# Yahoo video
|
||||||
|
'url': 'https://www.aol.com/video/play/991e6700-ac02-11ea-99ff-357400036f61/24bbc846-3e30-3c46-915e-fe8ccd7fcc46/',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
if '-' in video_id:
|
||||||
|
return self._extract_yahoo_video(video_id, 'us')
|
||||||
|
|
||||||
response = self._download_json(
|
response = self._download_json(
|
||||||
'https://feedapi.b2c.on.aol.com/v1.0/app/videos/aolon/%s/details' % video_id,
|
'https://feedapi.b2c.on.aol.com/v1.0/app/videos/aolon/%s/details' % video_id,
|
||||||
|
|
|
@ -226,13 +226,13 @@ def _real_extract(self, url):
|
||||||
if doc.tag == 'rss':
|
if doc.tag == 'rss':
|
||||||
return GenericIE()._extract_rss(url, video_id, doc)
|
return GenericIE()._extract_rss(url, video_id, doc)
|
||||||
|
|
||||||
title = self._html_search_regex(
|
title = self._og_search_title(webpage, default=None) or self._html_search_regex(
|
||||||
[r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>',
|
[r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>',
|
||||||
r'<meta name="dcterms\.title" content="(.*?)"/>',
|
r'<meta name="dcterms\.title" content="(.*?)"/>',
|
||||||
r'<h4 class="headline">(.*?)</h4>',
|
r'<h4 class="headline">(.*?)</h4>',
|
||||||
r'<title[^>]*>(.*?)</title>'],
|
r'<title[^>]*>(.*?)</title>'],
|
||||||
webpage, 'title')
|
webpage, 'title')
|
||||||
description = self._html_search_meta(
|
description = self._og_search_description(webpage, default=None) or self._html_search_meta(
|
||||||
'dcterms.abstract', webpage, 'description', default=None)
|
'dcterms.abstract', webpage, 'description', default=None)
|
||||||
if description is None:
|
if description is None:
|
||||||
description = self._html_search_meta(
|
description = self._html_search_meta(
|
||||||
|
@ -289,18 +289,18 @@ def _real_extract(self, url):
|
||||||
|
|
||||||
|
|
||||||
class ARDIE(InfoExtractor):
|
class ARDIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?P<mainurl>https?://(www\.)?daserste\.de/[^?#]+/videos(?:extern)?/(?P<display_id>[^/?#]+)-(?P<id>[0-9]+))\.html'
|
_VALID_URL = r'(?P<mainurl>https?://(?:www\.)?daserste\.de/[^?#]+/videos(?:extern)?/(?P<display_id>[^/?#]+)-(?:video-?)?(?P<id>[0-9]+))\.html'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# available till 14.02.2019
|
# available till 7.01.2022
|
||||||
'url': 'http://www.daserste.de/information/talk/maischberger/videos/das-groko-drama-zerlegen-sich-die-volksparteien-video-102.html',
|
'url': 'https://www.daserste.de/information/talk/maischberger/videos/maischberger-die-woche-video100.html',
|
||||||
'md5': '8e4ec85f31be7c7fc08a26cdbc5a1f49',
|
'md5': '867d8aa39eeaf6d76407c5ad1bb0d4c1',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'display_id': 'das-groko-drama-zerlegen-sich-die-volksparteien-video',
|
'display_id': 'maischberger-die-woche',
|
||||||
'id': '102',
|
'id': '100',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'duration': 4435.0,
|
'duration': 3687.0,
|
||||||
'title': 'Das GroKo-Drama: Zerlegen sich die Volksparteien?',
|
'title': 'maischberger. die woche vom 7. Januar 2021',
|
||||||
'upload_date': '20180214',
|
'upload_date': '20210107',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
|
@ -355,17 +355,17 @@ def _real_extract(self, url):
|
||||||
class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||||
_VALID_URL = r'https://(?:(?:beta|www)\.)?ardmediathek\.de/(?P<client>[^/]+)/(?P<mode>player|live|video|sendung|sammlung)/(?P<display_id>(?:[^/]+/)*)(?P<video_id>[a-zA-Z0-9]+)'
|
_VALID_URL = r'https://(?:(?:beta|www)\.)?ardmediathek\.de/(?P<client>[^/]+)/(?P<mode>player|live|video|sendung|sammlung)/(?P<display_id>(?:[^/]+/)*)(?P<video_id>[a-zA-Z0-9]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://ardmediathek.de/ard/video/die-robuste-roswita/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
|
'url': 'https://www.ardmediathek.de/mdr/video/die-robuste-roswita/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy84MWMxN2MzZC0wMjkxLTRmMzUtODk4ZS0wYzhlOWQxODE2NGI/',
|
||||||
'md5': 'dfdc87d2e7e09d073d5a80770a9ce88f',
|
'md5': 'a1dc75a39c61601b980648f7c9f9f71d',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'display_id': 'die-robuste-roswita',
|
'display_id': 'die-robuste-roswita',
|
||||||
'id': '70153354',
|
'id': '78566716',
|
||||||
'title': 'Die robuste Roswita',
|
'title': 'Die robuste Roswita',
|
||||||
'description': r're:^Der Mord.*trüber ist als die Ilm.',
|
'description': r're:^Der Mord.*totgeglaubte Ehefrau Roswita',
|
||||||
'duration': 5316,
|
'duration': 5316,
|
||||||
'thumbnail': 'https://img.ardmediathek.de/standard/00/70/15/33/90/-1852531467/16x9/960?mandant=ard',
|
'thumbnail': 'https://img.ardmediathek.de/standard/00/78/56/67/84/575672121/16x9/960?mandant=ard',
|
||||||
'timestamp': 1577047500,
|
'timestamp': 1596658200,
|
||||||
'upload_date': '20191222',
|
'upload_date': '20200805',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
|
|
|
@ -1,142 +1,51 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .mtv import MTVServicesInfoExtractor
|
from .mtv import MTVServicesInfoExtractor
|
||||||
from .common import InfoExtractor
|
|
||||||
|
|
||||||
|
|
||||||
class ComedyCentralIE(MTVServicesInfoExtractor):
|
class ComedyCentralIE(MTVServicesInfoExtractor):
|
||||||
_VALID_URL = r'''(?x)https?://(?:www\.)?cc\.com/
|
_VALID_URL = r'https?://(?:www\.)?cc\.com/(?:episodes|video(?:-clips)?)/(?P<id>[0-9a-z]{6})'
|
||||||
(video-clips|episodes|cc-studios|video-collections|shows(?=/[^/]+/(?!full-episodes)))
|
|
||||||
/(?P<title>.*)'''
|
|
||||||
_FEED_URL = 'http://comedycentral.com/feeds/mrss/'
|
_FEED_URL = 'http://comedycentral.com/feeds/mrss/'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.cc.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother',
|
'url': 'http://www.cc.com/video-clips/5ke9v2/the-daily-show-with-trevor-noah-doc-rivers-and-steve-ballmer---the-nba-player-strike',
|
||||||
'md5': 'c4f48e9eda1b16dd10add0744344b6d8',
|
'md5': 'b8acb347177c680ff18a292aa2166f80',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'cef0cbb3-e776-4bc9-b62e-8016deccb354',
|
'id': '89ccc86e-1b02-4f83-b0c9-1d9592ecd025',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'CC:Stand-Up|August 18, 2013|1|0101|Uncensored - Too Good of a Mother',
|
'title': 'The Daily Show with Trevor Noah|August 28, 2020|25|25149|Doc Rivers and Steve Ballmer - The NBA Player Strike',
|
||||||
'description': 'After a certain point, breastfeeding becomes c**kblocking.',
|
'description': 'md5:5334307c433892b85f4f5e5ac9ef7498',
|
||||||
'timestamp': 1376798400,
|
'timestamp': 1598670000,
|
||||||
'upload_date': '20130818',
|
'upload_date': '20200829',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/interviews/6yx39d/exclusive-rand-paul-extended-interview',
|
'url': 'http://www.cc.com/episodes/pnzzci/drawn-together--american-idol--parody-clip-show-season-3-ep-314',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
|
||||||
|
|
||||||
|
|
||||||
class ComedyCentralFullEpisodesIE(MTVServicesInfoExtractor):
|
|
||||||
_VALID_URL = r'''(?x)https?://(?:www\.)?cc\.com/
|
|
||||||
(?:full-episodes|shows(?=/[^/]+/full-episodes))
|
|
||||||
/(?P<id>[^?]+)'''
|
|
||||||
_FEED_URL = 'http://comedycentral.com/feeds/mrss/'
|
|
||||||
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'http://www.cc.com/full-episodes/pv391a/the-daily-show-with-trevor-noah-november-28--2016---ryan-speedo-green-season-22-ep-22028',
|
|
||||||
'info_dict': {
|
|
||||||
'description': 'Donald Trump is accused of exploiting his president-elect status for personal gain, Cuban leader Fidel Castro dies, and Ryan Speedo Green discusses "Sing for Your Life."',
|
|
||||||
'title': 'November 28, 2016 - Ryan Speedo Green',
|
|
||||||
},
|
|
||||||
'playlist_count': 4,
|
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes',
|
'url': 'https://www.cc.com/video/k3sdvm/the-daily-show-with-jon-stewart-exclusive-the-fourth-estate',
|
||||||
'only_matching': True,
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
playlist_id = self._match_id(url)
|
|
||||||
webpage = self._download_webpage(url, playlist_id)
|
|
||||||
mgid = self._extract_mgid(webpage, url, data_zone='t2_lc_promo1')
|
|
||||||
videos_info = self._get_videos_info(mgid)
|
|
||||||
return videos_info
|
|
||||||
|
|
||||||
|
|
||||||
class ToshIE(MTVServicesInfoExtractor):
|
|
||||||
IE_DESC = 'Tosh.0'
|
|
||||||
_VALID_URL = r'^https?://tosh\.cc\.com/video-(?:clips|collections)/[^/]+/(?P<videotitle>[^/?#]+)'
|
|
||||||
_FEED_URL = 'http://tosh.cc.com/feeds/mrss'
|
|
||||||
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'http://tosh.cc.com/video-clips/68g93d/twitter-users-share-summer-plans',
|
|
||||||
'info_dict': {
|
|
||||||
'description': 'Tosh asked fans to share their summer plans.',
|
|
||||||
'title': 'Twitter Users Share Summer Plans',
|
|
||||||
},
|
|
||||||
'playlist': [{
|
|
||||||
'md5': 'f269e88114c1805bb6d7653fecea9e06',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '90498ec2-ed00-11e0-aca6-0026b9414f30',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Tosh.0|June 9, 2077|2|211|Twitter Users Share Summer Plans',
|
|
||||||
'description': 'Tosh asked fans to share their summer plans.',
|
|
||||||
'thumbnail': r're:^https?://.*\.jpg',
|
|
||||||
# It's really reported to be published on year 2077
|
|
||||||
'upload_date': '20770610',
|
|
||||||
'timestamp': 3390510600,
|
|
||||||
'subtitles': {
|
|
||||||
'en': 'mincount:3',
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}]
|
|
||||||
}, {
|
|
||||||
'url': 'http://tosh.cc.com/video-collections/x2iz7k/just-plain-foul/m5q4fp',
|
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
|
||||||
class ComedyCentralTVIE(MTVServicesInfoExtractor):
|
class ComedyCentralTVIE(MTVServicesInfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?comedycentral\.tv/(?:staffeln|shows)/(?P<id>[^/?#&]+)'
|
_VALID_URL = r'https?://(?:www\.)?comedycentral\.tv/folgen/(?P<id>[0-9a-z]{6})'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.comedycentral.tv/staffeln/7436-the-mindy-project-staffel-4',
|
'url': 'https://www.comedycentral.tv/folgen/pxdpec/josh-investigates-klimawandel-staffel-1-ep-1',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'local_playlist-f99b626bdfe13568579a',
|
'id': '15907dc3-ec3c-11e8-a442-0e40cf2fc285',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': 'Episode_the-mindy-project_shows_season-4_episode-3_full-episode_part1',
|
'title': 'Josh Investigates',
|
||||||
|
'description': 'Steht uns das Ende der Welt bevor?',
|
||||||
},
|
},
|
||||||
'params': {
|
|
||||||
# rtmp download
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'http://www.comedycentral.tv/shows/1074-workaholics',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'http://www.comedycentral.tv/shows/1727-the-mindy-project/bonus',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
}]
|
||||||
|
_FEED_URL = 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed'
|
||||||
|
_GEO_COUNTRIES = ['DE']
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _get_feed_query(self, uri):
|
||||||
video_id = self._match_id(url)
|
return {
|
||||||
|
'accountOverride': 'intl.mtvi.com',
|
||||||
webpage = self._download_webpage(url, video_id)
|
'arcEp': 'web.cc.tv',
|
||||||
|
'ep': 'b9032c3a',
|
||||||
mrss_url = self._search_regex(
|
'imageEp': 'web.cc.tv',
|
||||||
r'data-mrss=(["\'])(?P<url>(?:(?!\1).)+)\1',
|
'mgid': uri,
|
||||||
webpage, 'mrss url', group='url')
|
|
||||||
|
|
||||||
return self._get_videos_info_from_url(mrss_url, video_id)
|
|
||||||
|
|
||||||
|
|
||||||
class ComedyCentralShortnameIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'^:(?P<id>tds|thedailyshow|theopposition)$'
|
|
||||||
_TESTS = [{
|
|
||||||
'url': ':tds',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': ':thedailyshow',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': ':theopposition',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url)
|
|
||||||
shortcut_map = {
|
|
||||||
'tds': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes',
|
|
||||||
'thedailyshow': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes',
|
|
||||||
'theopposition': 'http://www.cc.com/shows/the-opposition-with-jordan-klepper/full-episodes',
|
|
||||||
}
|
}
|
||||||
return self.url_result(shortcut_map[video_id])
|
|
||||||
|
|
|
@ -50,7 +50,10 @@
|
||||||
AnimeLabIE,
|
AnimeLabIE,
|
||||||
AnimeLabShowsIE,
|
AnimeLabShowsIE,
|
||||||
)
|
)
|
||||||
from .americastestkitchen import AmericasTestKitchenIE
|
from .americastestkitchen import (
|
||||||
|
AmericasTestKitchenIE,
|
||||||
|
AmericasTestKitchenSeasonIE,
|
||||||
|
)
|
||||||
from .animeondemand import AnimeOnDemandIE
|
from .animeondemand import AnimeOnDemandIE
|
||||||
from .anvato import AnvatoIE
|
from .anvato import AnvatoIE
|
||||||
from .aol import AolIE
|
from .aol import AolIE
|
||||||
|
@ -244,11 +247,8 @@
|
||||||
)
|
)
|
||||||
from .coub import CoubIE
|
from .coub import CoubIE
|
||||||
from .comedycentral import (
|
from .comedycentral import (
|
||||||
ComedyCentralFullEpisodesIE,
|
|
||||||
ComedyCentralIE,
|
ComedyCentralIE,
|
||||||
ComedyCentralShortnameIE,
|
|
||||||
ComedyCentralTVIE,
|
ComedyCentralTVIE,
|
||||||
ToshIE,
|
|
||||||
)
|
)
|
||||||
from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
|
from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
|
||||||
from .commonprotocols import (
|
from .commonprotocols import (
|
||||||
|
@ -682,6 +682,11 @@
|
||||||
MildomVodIE,
|
MildomVodIE,
|
||||||
MildomUserVodIE,
|
MildomUserVodIE,
|
||||||
)
|
)
|
||||||
|
from .minds import (
|
||||||
|
MindsIE,
|
||||||
|
MindsChannelIE,
|
||||||
|
MindsGroupIE,
|
||||||
|
)
|
||||||
from .ministrygrid import MinistryGridIE
|
from .ministrygrid import MinistryGridIE
|
||||||
from .minoto import MinotoIE
|
from .minoto import MinotoIE
|
||||||
from .miomio import MioMioIE
|
from .miomio import MioMioIE
|
||||||
|
@ -1162,6 +1167,10 @@
|
||||||
from .sport5 import Sport5IE
|
from .sport5 import Sport5IE
|
||||||
from .sportbox import SportBoxIE
|
from .sportbox import SportBoxIE
|
||||||
from .sportdeutschland import SportDeutschlandIE
|
from .sportdeutschland import SportDeutschlandIE
|
||||||
|
from .spotify import (
|
||||||
|
SpotifyIE,
|
||||||
|
SpotifyShowIE,
|
||||||
|
)
|
||||||
from .spreaker import (
|
from .spreaker import (
|
||||||
SpreakerIE,
|
SpreakerIE,
|
||||||
SpreakerPageIE,
|
SpreakerPageIE,
|
||||||
|
@ -1270,7 +1279,10 @@
|
||||||
from .toypics import ToypicsUserIE, ToypicsIE
|
from .toypics import ToypicsUserIE, ToypicsIE
|
||||||
from .traileraddict import TrailerAddictIE
|
from .traileraddict import TrailerAddictIE
|
||||||
from .trilulilu import TriluliluIE
|
from .trilulilu import TriluliluIE
|
||||||
from .trovolive import TrovoLiveIE
|
from .trovo import (
|
||||||
|
TrovoIE,
|
||||||
|
TrovoVodIE,
|
||||||
|
)
|
||||||
from .trunews import TruNewsIE
|
from .trunews import TruNewsIE
|
||||||
from .trutv import TruTVIE
|
from .trutv import TruTVIE
|
||||||
from .tube8 import Tube8IE
|
from .tube8 import Tube8IE
|
||||||
|
|
|
@ -11,7 +11,7 @@
|
||||||
|
|
||||||
class FranceCultureIE(InfoExtractor):
|
class FranceCultureIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?franceculture\.fr/emissions/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
_VALID_URL = r'https?://(?:www\.)?franceculture\.fr/emissions/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.franceculture.fr/emissions/carnet-nomade/rendez-vous-au-pays-des-geeks',
|
'url': 'http://www.franceculture.fr/emissions/carnet-nomade/rendez-vous-au-pays-des-geeks',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'rendez-vous-au-pays-des-geeks',
|
'id': 'rendez-vous-au-pays-des-geeks',
|
||||||
|
@ -20,10 +20,14 @@ class FranceCultureIE(InfoExtractor):
|
||||||
'title': 'Rendez-vous au pays des geeks',
|
'title': 'Rendez-vous au pays des geeks',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
'upload_date': '20140301',
|
'upload_date': '20140301',
|
||||||
'timestamp': 1393642916,
|
'timestamp': 1393700400,
|
||||||
'vcodec': 'none',
|
'vcodec': 'none',
|
||||||
}
|
}
|
||||||
}
|
}, {
|
||||||
|
# no thumbnail
|
||||||
|
'url': 'https://www.franceculture.fr/emissions/la-recherche-montre-en-main/la-recherche-montre-en-main-du-mercredi-10-octobre-2018',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
|
@ -36,19 +40,19 @@ def _real_extract(self, url):
|
||||||
</h1>|
|
</h1>|
|
||||||
<div[^>]+class="[^"]*?(?:title-zone-diffusion|heading-zone-(?:wrapper|player-button))[^"]*?"[^>]*>
|
<div[^>]+class="[^"]*?(?:title-zone-diffusion|heading-zone-(?:wrapper|player-button))[^"]*?"[^>]*>
|
||||||
).*?
|
).*?
|
||||||
(<button[^>]+data-asset-source="[^"]+"[^>]+>)
|
(<button[^>]+data-(?:url|asset-source)="[^"]+"[^>]+>)
|
||||||
''',
|
''',
|
||||||
webpage, 'video data'))
|
webpage, 'video data'))
|
||||||
|
|
||||||
video_url = video_data['data-asset-source']
|
video_url = video_data.get('data-url') or video_data['data-asset-source']
|
||||||
title = video_data.get('data-asset-title') or self._og_search_title(webpage)
|
title = video_data.get('data-asset-title') or video_data.get('data-diffusion-title') or self._og_search_title(webpage)
|
||||||
|
|
||||||
description = self._html_search_regex(
|
description = self._html_search_regex(
|
||||||
r'(?s)<div[^>]+class="intro"[^>]*>.*?<h2>(.+?)</h2>',
|
r'(?s)<div[^>]+class="intro"[^>]*>.*?<h2>(.+?)</h2>',
|
||||||
webpage, 'description', default=None)
|
webpage, 'description', default=None)
|
||||||
thumbnail = self._search_regex(
|
thumbnail = self._search_regex(
|
||||||
r'(?s)<figure[^>]+itemtype="https://schema.org/ImageObject"[^>]*>.*?<img[^>]+(?:data-dejavu-)?src="([^"]+)"',
|
r'(?s)<figure[^>]+itemtype="https://schema.org/ImageObject"[^>]*>.*?<img[^>]+(?:data-dejavu-)?src="([^"]+)"',
|
||||||
webpage, 'thumbnail', fatal=False)
|
webpage, 'thumbnail', default=None)
|
||||||
uploader = self._html_search_regex(
|
uploader = self._html_search_regex(
|
||||||
r'(?s)<span class="author">(.*?)</span>',
|
r'(?s)<span class="author">(.*?)</span>',
|
||||||
webpage, 'uploader', default=None)
|
webpage, 'uploader', default=None)
|
||||||
|
@ -64,6 +68,6 @@ def _real_extract(self, url):
|
||||||
'ext': ext,
|
'ext': ext,
|
||||||
'vcodec': 'none' if ext == 'mp3' else None,
|
'vcodec': 'none' if ext == 'mp3' else None,
|
||||||
'uploader': uploader,
|
'uploader': uploader,
|
||||||
'timestamp': int_or_none(video_data.get('data-asset-created-date')),
|
'timestamp': int_or_none(video_data.get('data-start-time')) or int_or_none(video_data.get('data-asset-created-date')),
|
||||||
'duration': int_or_none(video_data.get('data-duration')),
|
'duration': int_or_none(video_data.get('data-duration')),
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,7 +5,10 @@
|
||||||
import json
|
import json
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_str
|
from ..compat import (
|
||||||
|
compat_str,
|
||||||
|
compat_urllib_parse_unquote,
|
||||||
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
@ -131,6 +134,9 @@ class LBRYIE(LBRYBaseIE):
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://lbry.tv/$/download/Episode-1/e7d93d772bd87e2b62d5ab993c1c3ced86ebb396',
|
'url': 'https://lbry.tv/$/download/Episode-1/e7d93d772bd87e2b62d5ab993c1c3ced86ebb396',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://lbry.tv/@lacajadepandora:a/TRUMP-EST%C3%81-BIEN-PUESTO-con-Pilar-Baselga,-Carlos-Senra,-Luis-Palacios-(720p_30fps_H264-192kbit_AAC):1',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -139,6 +145,7 @@ def _real_extract(self, url):
|
||||||
display_id = display_id.split('/', 2)[-1].replace('/', ':')
|
display_id = display_id.split('/', 2)[-1].replace('/', ':')
|
||||||
else:
|
else:
|
||||||
display_id = display_id.replace(':', '#')
|
display_id = display_id.replace(':', '#')
|
||||||
|
display_id = compat_urllib_parse_unquote(display_id)
|
||||||
uri = 'lbry://' + display_id
|
uri = 'lbry://' + display_id
|
||||||
result = self._resolve_url(uri, display_id, 'stream')
|
result = self._resolve_url(uri, display_id, 'stream')
|
||||||
result_value = result['value']
|
result_value = result['value']
|
||||||
|
|
196
youtube_dlc/extractor/minds.py
Normal file
196
youtube_dlc/extractor/minds.py
Normal file
|
@ -0,0 +1,196 @@
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
|
from ..utils import (
|
||||||
|
clean_html,
|
||||||
|
int_or_none,
|
||||||
|
str_or_none,
|
||||||
|
strip_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class MindsBaseIE(InfoExtractor):
|
||||||
|
_VALID_URL_BASE = r'https?://(?:www\.)?minds\.com/'
|
||||||
|
|
||||||
|
def _call_api(self, path, video_id, resource, query=None):
|
||||||
|
api_url = 'https://www.minds.com/api/' + path
|
||||||
|
token = self._get_cookies(api_url).get('XSRF-TOKEN')
|
||||||
|
return self._download_json(
|
||||||
|
api_url, video_id, 'Downloading %s JSON metadata' % resource, headers={
|
||||||
|
'Referer': 'https://www.minds.com/',
|
||||||
|
'X-XSRF-TOKEN': token.value if token else '',
|
||||||
|
}, query=query)
|
||||||
|
|
||||||
|
|
||||||
|
class MindsIE(MindsBaseIE):
|
||||||
|
IE_NAME = 'minds'
|
||||||
|
_VALID_URL = MindsBaseIE._VALID_URL_BASE + r'(?:media|newsfeed|archive/view)/(?P<id>[0-9]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.minds.com/media/100000000000086822',
|
||||||
|
'md5': '215a658184a419764852239d4970b045',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '100000000000086822',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Minds intro sequence',
|
||||||
|
'thumbnail': r're:https?://.+\.png',
|
||||||
|
'uploader_id': 'ottman',
|
||||||
|
'upload_date': '20130524',
|
||||||
|
'timestamp': 1369404826,
|
||||||
|
'uploader': 'Bill Ottman',
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'dislike_count': int,
|
||||||
|
'tags': ['animation'],
|
||||||
|
'comment_count': int,
|
||||||
|
'license': 'attribution-cc',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# entity.type == 'activity' and empty title
|
||||||
|
'url': 'https://www.minds.com/newsfeed/798025111988506624',
|
||||||
|
'md5': 'b2733a74af78d7fd3f541c4cbbaa5950',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '798022190320226304',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '798022190320226304',
|
||||||
|
'uploader': 'ColinFlaherty',
|
||||||
|
'upload_date': '20180111',
|
||||||
|
'timestamp': 1515639316,
|
||||||
|
'uploader_id': 'ColinFlaherty',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.minds.com/archive/view/715172106794442752',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# youtube perma_url
|
||||||
|
'url': 'https://www.minds.com/newsfeed/1197131838022602752',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
entity_id = self._match_id(url)
|
||||||
|
entity = self._call_api(
|
||||||
|
'v1/entities/entity/' + entity_id, entity_id, 'entity')['entity']
|
||||||
|
if entity.get('type') == 'activity':
|
||||||
|
if entity.get('custom_type') == 'video':
|
||||||
|
video_id = entity['entity_guid']
|
||||||
|
else:
|
||||||
|
return self.url_result(entity['perma_url'])
|
||||||
|
else:
|
||||||
|
assert(entity['subtype'] == 'video')
|
||||||
|
video_id = entity_id
|
||||||
|
# 1080p and webm formats available only on the sources array
|
||||||
|
video = self._call_api(
|
||||||
|
'v2/media/video/' + video_id, video_id, 'video')
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for source in (video.get('sources') or []):
|
||||||
|
src = source.get('src')
|
||||||
|
if not src:
|
||||||
|
continue
|
||||||
|
formats.append({
|
||||||
|
'format_id': source.get('label'),
|
||||||
|
'height': int_or_none(source.get('size')),
|
||||||
|
'url': src,
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
entity = video.get('entity') or entity
|
||||||
|
owner = entity.get('ownerObj') or {}
|
||||||
|
uploader_id = owner.get('username')
|
||||||
|
|
||||||
|
tags = entity.get('tags')
|
||||||
|
if tags and isinstance(tags, compat_str):
|
||||||
|
tags = [tags]
|
||||||
|
|
||||||
|
thumbnail = None
|
||||||
|
poster = video.get('poster') or entity.get('thumbnail_src')
|
||||||
|
if poster:
|
||||||
|
urlh = self._request_webpage(poster, video_id, fatal=False)
|
||||||
|
if urlh:
|
||||||
|
thumbnail = urlh.geturl()
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': entity.get('title') or video_id,
|
||||||
|
'formats': formats,
|
||||||
|
'description': clean_html(entity.get('description')) or None,
|
||||||
|
'license': str_or_none(entity.get('license')),
|
||||||
|
'timestamp': int_or_none(entity.get('time_created')),
|
||||||
|
'uploader': strip_or_none(owner.get('name')),
|
||||||
|
'uploader_id': uploader_id,
|
||||||
|
'uploader_url': 'https://www.minds.com/' + uploader_id if uploader_id else None,
|
||||||
|
'view_count': int_or_none(entity.get('play:count')),
|
||||||
|
'like_count': int_or_none(entity.get('thumbs:up:count')),
|
||||||
|
'dislike_count': int_or_none(entity.get('thumbs:down:count')),
|
||||||
|
'tags': tags,
|
||||||
|
'comment_count': int_or_none(entity.get('comments:count')),
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class MindsFeedBaseIE(MindsBaseIE):
|
||||||
|
_PAGE_SIZE = 150
|
||||||
|
|
||||||
|
def _entries(self, feed_id):
|
||||||
|
query = {'limit': self._PAGE_SIZE, 'sync': 1}
|
||||||
|
i = 1
|
||||||
|
while True:
|
||||||
|
data = self._call_api(
|
||||||
|
'v2/feeds/container/%s/videos' % feed_id,
|
||||||
|
feed_id, 'page %s' % i, query)
|
||||||
|
entities = data.get('entities') or []
|
||||||
|
for entity in entities:
|
||||||
|
guid = entity.get('guid')
|
||||||
|
if not guid:
|
||||||
|
continue
|
||||||
|
yield self.url_result(
|
||||||
|
'https://www.minds.com/newsfeed/' + guid,
|
||||||
|
MindsIE.ie_key(), guid)
|
||||||
|
query['from_timestamp'] = data['load-next']
|
||||||
|
if not (query['from_timestamp'] and len(entities) == self._PAGE_SIZE):
|
||||||
|
break
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
feed_id = self._match_id(url)
|
||||||
|
feed = self._call_api(
|
||||||
|
'v1/%s/%s' % (self._FEED_PATH, feed_id),
|
||||||
|
feed_id, self._FEED_TYPE)[self._FEED_TYPE]
|
||||||
|
|
||||||
|
return self.playlist_result(
|
||||||
|
self._entries(feed['guid']), feed_id,
|
||||||
|
strip_or_none(feed.get('name')),
|
||||||
|
feed.get('briefdescription'))
|
||||||
|
|
||||||
|
|
||||||
|
class MindsChannelIE(MindsFeedBaseIE):
|
||||||
|
_FEED_TYPE = 'channel'
|
||||||
|
IE_NAME = 'minds:' + _FEED_TYPE
|
||||||
|
_VALID_URL = MindsBaseIE._VALID_URL_BASE + r'(?!(?:newsfeed|media|api|archive|groups)/)(?P<id>[^/?&#]+)'
|
||||||
|
_FEED_PATH = 'channel'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://www.minds.com/ottman',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'ottman',
|
||||||
|
'title': 'Bill Ottman',
|
||||||
|
'description': 'Co-creator & CEO @minds',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 54,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class MindsGroupIE(MindsFeedBaseIE):
|
||||||
|
_FEED_TYPE = 'group'
|
||||||
|
IE_NAME = 'minds:' + _FEED_TYPE
|
||||||
|
_VALID_URL = MindsBaseIE._VALID_URL_BASE + r'groups/profile/(?P<id>[0-9]+)'
|
||||||
|
_FEED_PATH = 'groups/group'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://www.minds.com/groups/profile/785582576369672204/feed/videos',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '785582576369672204',
|
||||||
|
'title': 'Cooking Videos',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 1,
|
||||||
|
}
|
|
@ -255,6 +255,10 @@ def _extract_triforce_mgid(self, webpage, data_zone=None, video_id=None):
|
||||||
|
|
||||||
return try_get(feed, lambda x: x['result']['data']['id'], compat_str)
|
return try_get(feed, lambda x: x['result']['data']['id'], compat_str)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_child_with_type(parent, t):
|
||||||
|
return next(c for c in parent['children'] if c.get('type') == t)
|
||||||
|
|
||||||
def _extract_new_triforce_mgid(self, webpage, url='', video_id=None):
|
def _extract_new_triforce_mgid(self, webpage, url='', video_id=None):
|
||||||
if url == '':
|
if url == '':
|
||||||
return
|
return
|
||||||
|
@ -332,6 +336,13 @@ def _extract_mgid(self, webpage, url, title=None, data_zone=None):
|
||||||
if not mgid:
|
if not mgid:
|
||||||
mgid = self._extract_triforce_mgid(webpage, data_zone)
|
mgid = self._extract_triforce_mgid(webpage, data_zone)
|
||||||
|
|
||||||
|
if not mgid:
|
||||||
|
data = self._parse_json(self._search_regex(
|
||||||
|
r'__DATA__\s*=\s*({.+?});', webpage, 'data'), None)
|
||||||
|
main_container = self._extract_child_with_type(data, 'MainContainer')
|
||||||
|
video_player = self._extract_child_with_type(main_container, 'VideoPlayer')
|
||||||
|
mgid = video_player['props']['media']['video']['config']['uri']
|
||||||
|
|
||||||
return mgid
|
return mgid
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -403,18 +414,6 @@ class MTVIE(MTVServicesInfoExtractor):
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def extract_child_with_type(parent, t):
|
|
||||||
children = parent['children']
|
|
||||||
return next(c for c in children if c.get('type') == t)
|
|
||||||
|
|
||||||
def _extract_mgid(self, webpage):
|
|
||||||
data = self._parse_json(self._search_regex(
|
|
||||||
r'__DATA__\s*=\s*({.+?});', webpage, 'data'), None)
|
|
||||||
main_container = self.extract_child_with_type(data, 'MainContainer')
|
|
||||||
video_player = self.extract_child_with_type(main_container, 'VideoPlayer')
|
|
||||||
return video_player['props']['media']['video']['config']['uri']
|
|
||||||
|
|
||||||
|
|
||||||
class MTVJapanIE(MTVServicesInfoExtractor):
|
class MTVJapanIE(MTVServicesInfoExtractor):
|
||||||
IE_NAME = 'mtvjapan'
|
IE_NAME = 'mtvjapan'
|
||||||
|
|
|
@ -1,104 +1,125 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import str_to_int
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
|
ExtractorError,
|
||||||
|
int_or_none,
|
||||||
|
try_get,
|
||||||
|
url_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class NineGagIE(InfoExtractor):
|
class NineGagIE(InfoExtractor):
|
||||||
IE_NAME = '9gag'
|
IE_NAME = '9gag'
|
||||||
_VALID_URL = r'https?://(?:www\.)?9gag(?:\.com/tv|\.tv)/(?:p|embed)/(?P<id>[a-zA-Z0-9]+)(?:/(?P<display_id>[^?#/]+))?'
|
_VALID_URL = r'https?://(?:www\.)?9gag\.com/gag/(?P<id>[^/?&#]+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TEST = {
|
||||||
'url': 'http://9gag.com/tv/p/Kk2X5/people-are-awesome-2013-is-absolutely-awesome',
|
'url': 'https://9gag.com/gag/ae5Ag7B',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'kXzwOKyGlSA',
|
'id': 'ae5Ag7B',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'description': 'This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)',
|
'title': 'Capybara Agility Training',
|
||||||
'title': '\"People Are Awesome 2013\" Is Absolutely Awesome',
|
'upload_date': '20191108',
|
||||||
'uploader_id': 'UCdEH6EjDKwtTe-sO2f0_1XA',
|
'timestamp': 1573237208,
|
||||||
'uploader': 'CompilationChannel',
|
'categories': ['Awesome'],
|
||||||
'upload_date': '20131110',
|
'tags': ['Weimaraner', 'American Pit Bull Terrier'],
|
||||||
'view_count': int,
|
'duration': 44,
|
||||||
},
|
'like_count': int,
|
||||||
'add_ie': ['Youtube'],
|
'dislike_count': int,
|
||||||
}, {
|
'comment_count': int,
|
||||||
'url': 'http://9gag.com/tv/p/aKolP3',
|
}
|
||||||
'info_dict': {
|
|
||||||
'id': 'aKolP3',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'This Guy Travelled 11 countries In 44 days Just To Make This Amazing Video',
|
|
||||||
'description': "I just saw more in 1 minute than I've seen in 1 year. This guy's video is epic!!",
|
|
||||||
'uploader_id': 'rickmereki',
|
|
||||||
'uploader': 'Rick Mereki',
|
|
||||||
'upload_date': '20110803',
|
|
||||||
'view_count': int,
|
|
||||||
},
|
|
||||||
'add_ie': ['Vimeo'],
|
|
||||||
}, {
|
|
||||||
'url': 'http://9gag.com/tv/p/KklwM',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'http://9gag.tv/p/Kk2X5',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'http://9gag.com/tv/embed/a5Dmvl',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
|
||||||
|
|
||||||
_EXTERNAL_VIDEO_PROVIDER = {
|
|
||||||
'1': {
|
|
||||||
'url': '%s',
|
|
||||||
'ie_key': 'Youtube',
|
|
||||||
},
|
|
||||||
'2': {
|
|
||||||
'url': 'http://player.vimeo.com/video/%s',
|
|
||||||
'ie_key': 'Vimeo',
|
|
||||||
},
|
|
||||||
'3': {
|
|
||||||
'url': 'http://instagram.com/p/%s',
|
|
||||||
'ie_key': 'Instagram',
|
|
||||||
},
|
|
||||||
'4': {
|
|
||||||
'url': 'http://vine.co/v/%s',
|
|
||||||
'ie_key': 'Vine',
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
post_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
post = self._download_json(
|
||||||
display_id = mobj.group('display_id') or video_id
|
'https://9gag.com/v1/post', post_id, query={
|
||||||
|
'id': post_id
|
||||||
|
})['data']['post']
|
||||||
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
if post.get('type') != 'Animated':
|
||||||
|
raise ExtractorError(
|
||||||
|
'The given url does not contain a video',
|
||||||
|
expected=True)
|
||||||
|
|
||||||
post_view = self._parse_json(
|
title = post['title']
|
||||||
self._search_regex(
|
|
||||||
r'var\s+postView\s*=\s*new\s+app\.PostView\({\s*post:\s*({.+?})\s*,\s*posts:\s*prefetchedCurrentPost',
|
|
||||||
webpage, 'post view'),
|
|
||||||
display_id)
|
|
||||||
|
|
||||||
ie_key = None
|
duration = None
|
||||||
source_url = post_view.get('sourceUrl')
|
formats = []
|
||||||
if not source_url:
|
thumbnails = []
|
||||||
external_video_id = post_view['videoExternalId']
|
for key, image in (post.get('images') or {}).items():
|
||||||
external_video_provider = post_view['videoExternalProvider']
|
image_url = url_or_none(image.get('url'))
|
||||||
source_url = self._EXTERNAL_VIDEO_PROVIDER[external_video_provider]['url'] % external_video_id
|
if not image_url:
|
||||||
ie_key = self._EXTERNAL_VIDEO_PROVIDER[external_video_provider]['ie_key']
|
continue
|
||||||
title = post_view['title']
|
ext = determine_ext(image_url)
|
||||||
description = post_view.get('description')
|
image_id = key.strip('image')
|
||||||
view_count = str_to_int(post_view.get('externalView'))
|
common = {
|
||||||
thumbnail = post_view.get('thumbnail_700w') or post_view.get('ogImageUrl') or post_view.get('thumbnail_300w')
|
'url': image_url,
|
||||||
|
'width': int_or_none(image.get('width')),
|
||||||
|
'height': int_or_none(image.get('height')),
|
||||||
|
}
|
||||||
|
if ext in ('jpg', 'png'):
|
||||||
|
webp_url = image.get('webpUrl')
|
||||||
|
if webp_url:
|
||||||
|
t = common.copy()
|
||||||
|
t.update({
|
||||||
|
'id': image_id + '-webp',
|
||||||
|
'url': webp_url,
|
||||||
|
})
|
||||||
|
thumbnails.append(t)
|
||||||
|
common.update({
|
||||||
|
'id': image_id,
|
||||||
|
'ext': ext,
|
||||||
|
})
|
||||||
|
thumbnails.append(common)
|
||||||
|
elif ext in ('webm', 'mp4'):
|
||||||
|
if not duration:
|
||||||
|
duration = int_or_none(image.get('duration'))
|
||||||
|
common['acodec'] = 'none' if image.get('hasAudio') == 0 else None
|
||||||
|
for vcodec in ('vp8', 'vp9', 'h265'):
|
||||||
|
c_url = image.get(vcodec + 'Url')
|
||||||
|
if not c_url:
|
||||||
|
continue
|
||||||
|
c_f = common.copy()
|
||||||
|
c_f.update({
|
||||||
|
'format_id': image_id + '-' + vcodec,
|
||||||
|
'url': c_url,
|
||||||
|
'vcodec': vcodec,
|
||||||
|
})
|
||||||
|
formats.append(c_f)
|
||||||
|
common.update({
|
||||||
|
'ext': ext,
|
||||||
|
'format_id': image_id,
|
||||||
|
})
|
||||||
|
formats.append(common)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
section = try_get(post, lambda x: x['postSection']['name'])
|
||||||
|
|
||||||
|
tags = None
|
||||||
|
post_tags = post.get('tags')
|
||||||
|
if post_tags:
|
||||||
|
tags = []
|
||||||
|
for tag in post_tags:
|
||||||
|
tag_key = tag.get('key')
|
||||||
|
if not tag_key:
|
||||||
|
continue
|
||||||
|
tags.append(tag_key)
|
||||||
|
|
||||||
|
get_count = lambda x: int_or_none(post.get(x + 'Count'))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'_type': 'url_transparent',
|
'id': post_id,
|
||||||
'url': source_url,
|
|
||||||
'ie_key': ie_key,
|
|
||||||
'id': video_id,
|
|
||||||
'display_id': display_id,
|
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': description,
|
'timestamp': int_or_none(post.get('creationTs')),
|
||||||
'view_count': view_count,
|
'duration': duration,
|
||||||
'thumbnail': thumbnail,
|
'formats': formats,
|
||||||
|
'thumbnails': thumbnails,
|
||||||
|
'like_count': get_count('upVote'),
|
||||||
|
'dislike_count': get_count('downVote'),
|
||||||
|
'comment_count': get_count('comments'),
|
||||||
|
'age_limit': 18 if post.get('nsfw') == 1 else None,
|
||||||
|
'categories': [section] if section else None,
|
||||||
|
'tags': tags,
|
||||||
}
|
}
|
||||||
|
|
|
@ -6,30 +6,40 @@
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_urlparse
|
from ..compat import compat_urlparse
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
extract_attributes,
|
|
||||||
get_element_by_class,
|
get_element_by_class,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class NJPWWorldIE(InfoExtractor):
|
class NJPWWorldIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://njpwworld\.com/p/(?P<id>[a-z0-9_]+)'
|
_VALID_URL = r'https?://(front\.)?njpwworld\.com/p/(?P<id>[a-z0-9_]+)'
|
||||||
IE_DESC = '新日本プロレスワールド'
|
IE_DESC = '新日本プロレスワールド'
|
||||||
_NETRC_MACHINE = 'njpwworld'
|
_NETRC_MACHINE = 'njpwworld'
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://njpwworld.com/p/s_series_00155_1_9/',
|
'url': 'http://njpwworld.com/p/s_series_00155_1_9/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 's_series_00155_1_9',
|
'id': 's_series_00155_1_9',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '第9試合 ランディ・サベージ vs リック・スタイナー',
|
'title': '闘強導夢2000 2000年1月4日 東京ドーム 第9試合 ランディ・サベージ VS リック・スタイナー',
|
||||||
'tags': list,
|
'tags': list,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True, # AES-encrypted m3u8
|
'skip_download': True, # AES-encrypted m3u8
|
||||||
},
|
},
|
||||||
'skip': 'Requires login',
|
'skip': 'Requires login',
|
||||||
}
|
}, {
|
||||||
|
'url': 'https://front.njpwworld.com/p/s_series_00563_16_bs',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 's_series_00563_16_bs',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'WORLD TAG LEAGUE 2020 & BEST OF THE SUPER Jr.27 2020年12月6日 福岡・福岡国際センター バックステージコメント(字幕あり)',
|
||||||
|
'tags': ["福岡・福岡国際センター", "バックステージコメント", "2020", "20年代"],
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
_LOGIN_URL = 'https://front.njpwworld.com/auth/login'
|
_LOGIN_URL = 'https://front.njpwworld.com/auth/login'
|
||||||
|
|
||||||
|
@ -64,35 +74,27 @@ def _real_extract(self, url):
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for mobj in re.finditer(r'<a[^>]+\bhref=(["\'])/player.+?[^>]*>', webpage):
|
for kind, vid in re.findall(r'if\s+\(\s*imageQualityType\s*==\s*\'([^\']+)\'\s*\)\s*{\s*video_id\s*=\s*"(\d+)"', webpage):
|
||||||
player = extract_attributes(mobj.group(0))
|
player_path = '/intent?id=%s&type=url' % vid
|
||||||
player_path = player.get('href')
|
|
||||||
if not player_path:
|
|
||||||
continue
|
|
||||||
kind = self._search_regex(
|
|
||||||
r'(low|high)$', player.get('class') or '', 'kind',
|
|
||||||
default='low')
|
|
||||||
player_url = compat_urlparse.urljoin(url, player_path)
|
player_url = compat_urlparse.urljoin(url, player_path)
|
||||||
player_page = self._download_webpage(
|
formats.append({
|
||||||
player_url, video_id, note='Downloading player page')
|
'url': player_url,
|
||||||
entries = self._parse_html5_media_entries(
|
'format_id': kind,
|
||||||
player_url, player_page, video_id, m3u8_id='hls-%s' % kind,
|
'ext': 'mp4',
|
||||||
m3u8_entry_protocol='m3u8_native')
|
'protocol': 'm3u8',
|
||||||
kind_formats = entries[0]['formats']
|
'quality': 2 if kind == 'high' else 1,
|
||||||
for f in kind_formats:
|
})
|
||||||
f['quality'] = 2 if kind == 'high' else 1
|
|
||||||
formats.extend(kind_formats)
|
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
post_content = get_element_by_class('post-content', webpage)
|
tag_block = get_element_by_class('tag-block', webpage)
|
||||||
tags = re.findall(
|
tags = re.findall(
|
||||||
r'<li[^>]+class="tag-[^"]+"><a[^>]*>([^<]+)</a></li>', post_content
|
r'<a[^>]+class="tag-[^"]+"[^>]*>([^<]+)</a>', tag_block
|
||||||
) if post_content else None
|
) if tag_block else None
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': self._og_search_title(webpage),
|
'title': get_element_by_class('article-title', webpage) or self._og_search_title(webpage),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'tags': tags,
|
'tags': tags,
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,19 +20,6 @@ class BellatorIE(MTVServicesInfoExtractor):
|
||||||
_FEED_URL = 'http://www.bellator.com/feeds/mrss/'
|
_FEED_URL = 'http://www.bellator.com/feeds/mrss/'
|
||||||
_GEO_COUNTRIES = ['US']
|
_GEO_COUNTRIES = ['US']
|
||||||
|
|
||||||
def _extract_mgid(self, webpage, url):
|
|
||||||
mgid = None
|
|
||||||
|
|
||||||
if not mgid:
|
|
||||||
mgid = self._extract_triforce_mgid(webpage)
|
|
||||||
|
|
||||||
if not mgid:
|
|
||||||
mgid = self._extract_new_triforce_mgid(webpage, url)
|
|
||||||
|
|
||||||
return mgid
|
|
||||||
|
|
||||||
# TODO Remove - Reason: Outdated Site
|
|
||||||
|
|
||||||
|
|
||||||
class ParamountNetworkIE(MTVServicesInfoExtractor):
|
class ParamountNetworkIE(MTVServicesInfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?paramountnetwork\.com/[^/]+/[\da-z]{6}(?:[/?#&]|$)'
|
_VALID_URL = r'https?://(?:www\.)?paramountnetwork\.com/[^/]+/[\da-z]{6}(?:[/?#&]|$)'
|
||||||
|
@ -56,16 +43,6 @@ class ParamountNetworkIE(MTVServicesInfoExtractor):
|
||||||
def _get_feed_query(self, uri):
|
def _get_feed_query(self, uri):
|
||||||
return {
|
return {
|
||||||
'arcEp': 'paramountnetwork.com',
|
'arcEp': 'paramountnetwork.com',
|
||||||
|
'imageEp': 'paramountnetwork.com',
|
||||||
'mgid': uri,
|
'mgid': uri,
|
||||||
}
|
}
|
||||||
|
|
||||||
def _extract_mgid(self, webpage, url):
|
|
||||||
root_data = self._parse_json(self._search_regex(
|
|
||||||
r'window\.__DATA__\s*=\s*({.+})',
|
|
||||||
webpage, 'data'), None)
|
|
||||||
|
|
||||||
def find_sub_data(data, data_type):
|
|
||||||
return next(c for c in data['children'] if c.get('type') == data_type)
|
|
||||||
|
|
||||||
c = find_sub_data(find_sub_data(root_data, 'MainContainer'), 'VideoPlayer')
|
|
||||||
return c['props']['media']['video']['config']['uri']
|
|
||||||
|
|
156
youtube_dlc/extractor/spotify.py
Normal file
156
youtube_dlc/extractor/spotify.py
Normal file
|
@ -0,0 +1,156 @@
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
clean_podcast_url,
|
||||||
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
|
strip_or_none,
|
||||||
|
try_get,
|
||||||
|
unified_strdate,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class SpotifyBaseIE(InfoExtractor):
|
||||||
|
_ACCESS_TOKEN = None
|
||||||
|
_OPERATION_HASHES = {
|
||||||
|
'Episode': '8276d4423d709ae9b68ec1b74cc047ba0f7479059a37820be730f125189ac2bf',
|
||||||
|
'MinimalShow': '13ee079672fad3f858ea45a55eb109553b4fb0969ed793185b2e34cbb6ee7cc0',
|
||||||
|
'ShowEpisodes': 'e0e5ce27bd7748d2c59b4d44ba245a8992a05be75d6fabc3b20753fc8857444d',
|
||||||
|
}
|
||||||
|
_VALID_URL_TEMPL = r'https?://open\.spotify\.com/%s/(?P<id>[^/?&#]+)'
|
||||||
|
|
||||||
|
def _real_initialize(self):
|
||||||
|
self._ACCESS_TOKEN = self._download_json(
|
||||||
|
'https://open.spotify.com/get_access_token', None)['accessToken']
|
||||||
|
|
||||||
|
def _call_api(self, operation, video_id, variables):
|
||||||
|
return self._download_json(
|
||||||
|
'https://api-partner.spotify.com/pathfinder/v1/query', video_id, query={
|
||||||
|
'operationName': 'query' + operation,
|
||||||
|
'variables': json.dumps(variables),
|
||||||
|
'extensions': json.dumps({
|
||||||
|
'persistedQuery': {
|
||||||
|
'sha256Hash': self._OPERATION_HASHES[operation],
|
||||||
|
},
|
||||||
|
})
|
||||||
|
}, headers={'authorization': 'Bearer ' + self._ACCESS_TOKEN})['data']
|
||||||
|
|
||||||
|
def _extract_episode(self, episode, series):
|
||||||
|
episode_id = episode['id']
|
||||||
|
title = episode['name'].strip()
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
audio_preview = episode.get('audioPreview') or {}
|
||||||
|
audio_preview_url = audio_preview.get('url')
|
||||||
|
if audio_preview_url:
|
||||||
|
f = {
|
||||||
|
'url': audio_preview_url.replace('://p.scdn.co/mp3-preview/', '://anon-podcast.scdn.co/'),
|
||||||
|
'vcodec': 'none',
|
||||||
|
}
|
||||||
|
audio_preview_format = audio_preview.get('format')
|
||||||
|
if audio_preview_format:
|
||||||
|
f['format_id'] = audio_preview_format
|
||||||
|
mobj = re.match(r'([0-9A-Z]{3})_(?:[A-Z]+_)?(\d+)', audio_preview_format)
|
||||||
|
if mobj:
|
||||||
|
f.update({
|
||||||
|
'abr': int(mobj.group(2)),
|
||||||
|
'ext': mobj.group(1).lower(),
|
||||||
|
})
|
||||||
|
formats.append(f)
|
||||||
|
|
||||||
|
for item in (try_get(episode, lambda x: x['audio']['items']) or []):
|
||||||
|
item_url = item.get('url')
|
||||||
|
if not (item_url and item.get('externallyHosted')):
|
||||||
|
continue
|
||||||
|
formats.append({
|
||||||
|
'url': clean_podcast_url(item_url),
|
||||||
|
'vcodec': 'none',
|
||||||
|
})
|
||||||
|
|
||||||
|
thumbnails = []
|
||||||
|
for source in (try_get(episode, lambda x: x['coverArt']['sources']) or []):
|
||||||
|
source_url = source.get('url')
|
||||||
|
if not source_url:
|
||||||
|
continue
|
||||||
|
thumbnails.append({
|
||||||
|
'url': source_url,
|
||||||
|
'width': int_or_none(source.get('width')),
|
||||||
|
'height': int_or_none(source.get('height')),
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': episode_id,
|
||||||
|
'title': title,
|
||||||
|
'formats': formats,
|
||||||
|
'thumbnails': thumbnails,
|
||||||
|
'description': strip_or_none(episode.get('description')),
|
||||||
|
'duration': float_or_none(try_get(
|
||||||
|
episode, lambda x: x['duration']['totalMilliseconds']), 1000),
|
||||||
|
'release_date': unified_strdate(try_get(
|
||||||
|
episode, lambda x: x['releaseDate']['isoString'])),
|
||||||
|
'series': series,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class SpotifyIE(SpotifyBaseIE):
|
||||||
|
IE_NAME = 'spotify'
|
||||||
|
_VALID_URL = SpotifyBaseIE._VALID_URL_TEMPL % 'episode'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://open.spotify.com/episode/4Z7GAJ50bgctf6uclHlWKo',
|
||||||
|
'md5': '74010a1e3fa4d9e1ab3aa7ad14e42d3b',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '4Z7GAJ50bgctf6uclHlWKo',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'From the archive: Why time management is ruining our lives',
|
||||||
|
'description': 'md5:b120d9c4ff4135b42aa9b6d9cde86935',
|
||||||
|
'duration': 2083.605,
|
||||||
|
'release_date': '20201217',
|
||||||
|
'series': "The Guardian's Audio Long Reads",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
episode_id = self._match_id(url)
|
||||||
|
episode = self._call_api('Episode', episode_id, {
|
||||||
|
'uri': 'spotify:episode:' + episode_id
|
||||||
|
})['episode']
|
||||||
|
return self._extract_episode(
|
||||||
|
episode, try_get(episode, lambda x: x['podcast']['name']))
|
||||||
|
|
||||||
|
|
||||||
|
class SpotifyShowIE(SpotifyBaseIE):
|
||||||
|
IE_NAME = 'spotify:show'
|
||||||
|
_VALID_URL = SpotifyBaseIE._VALID_URL_TEMPL % 'show'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://open.spotify.com/show/4PM9Ke6l66IRNpottHKV9M',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '4PM9Ke6l66IRNpottHKV9M',
|
||||||
|
'title': 'The Story from the Guardian',
|
||||||
|
'description': 'The Story podcast is dedicated to our finest audio documentaries, investigations and long form stories',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 36,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
show_id = self._match_id(url)
|
||||||
|
podcast = self._call_api('ShowEpisodes', show_id, {
|
||||||
|
'limit': 1000000000,
|
||||||
|
'offset': 0,
|
||||||
|
'uri': 'spotify:show:' + show_id,
|
||||||
|
})['podcast']
|
||||||
|
podcast_name = podcast.get('name')
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
for item in (try_get(podcast, lambda x: x['episodes']['items']) or []):
|
||||||
|
episode = item.get('episode')
|
||||||
|
if not episode:
|
||||||
|
continue
|
||||||
|
entries.append(self._extract_episode(episode, podcast_name))
|
||||||
|
|
||||||
|
return self.playlist_result(
|
||||||
|
entries, show_id, podcast_name, podcast.get('description'))
|
193
youtube_dlc/extractor/trovo.py
Normal file
193
youtube_dlc/extractor/trovo.py
Normal file
|
@ -0,0 +1,193 @@
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import json
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
int_or_none,
|
||||||
|
str_or_none,
|
||||||
|
try_get,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TrovoBaseIE(InfoExtractor):
|
||||||
|
_VALID_URL_BASE = r'https?://(?:www\.)?trovo\.live/'
|
||||||
|
|
||||||
|
def _extract_streamer_info(self, data):
|
||||||
|
streamer_info = data.get('streamerInfo') or {}
|
||||||
|
username = streamer_info.get('userName')
|
||||||
|
return {
|
||||||
|
'uploader': streamer_info.get('nickName'),
|
||||||
|
'uploader_id': str_or_none(streamer_info.get('uid')),
|
||||||
|
'uploader_url': 'https://trovo.live/' + username if username else None,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class TrovoIE(TrovoBaseIE):
|
||||||
|
_VALID_URL = TrovoBaseIE._VALID_URL_BASE + r'(?!(?:clip|video)/)(?P<id>[^/?&#]+)'
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
username = self._match_id(url)
|
||||||
|
live_info = self._download_json(
|
||||||
|
'https://gql.trovo.live/', username, query={
|
||||||
|
'query': '''{
|
||||||
|
getLiveInfo(params: {userName: "%s"}) {
|
||||||
|
isLive
|
||||||
|
programInfo {
|
||||||
|
coverUrl
|
||||||
|
id
|
||||||
|
streamInfo {
|
||||||
|
desc
|
||||||
|
playUrl
|
||||||
|
}
|
||||||
|
title
|
||||||
|
}
|
||||||
|
streamerInfo {
|
||||||
|
nickName
|
||||||
|
uid
|
||||||
|
userName
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}''' % username,
|
||||||
|
})['data']['getLiveInfo']
|
||||||
|
if live_info.get('isLive') == 0:
|
||||||
|
raise ExtractorError('%s is offline' % username, expected=True)
|
||||||
|
program_info = live_info['programInfo']
|
||||||
|
program_id = program_info['id']
|
||||||
|
title = self._live_title(program_info['title'])
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for stream_info in (program_info.get('streamInfo') or []):
|
||||||
|
play_url = stream_info.get('playUrl')
|
||||||
|
if not play_url:
|
||||||
|
continue
|
||||||
|
format_id = stream_info.get('desc')
|
||||||
|
formats.append({
|
||||||
|
'format_id': format_id,
|
||||||
|
'height': int_or_none(format_id[:-1]) if format_id else None,
|
||||||
|
'url': play_url,
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
info = {
|
||||||
|
'id': program_id,
|
||||||
|
'title': title,
|
||||||
|
'formats': formats,
|
||||||
|
'thumbnail': program_info.get('coverUrl'),
|
||||||
|
'is_live': True,
|
||||||
|
}
|
||||||
|
info.update(self._extract_streamer_info(live_info))
|
||||||
|
return info
|
||||||
|
|
||||||
|
|
||||||
|
class TrovoVodIE(TrovoBaseIE):
|
||||||
|
_VALID_URL = TrovoBaseIE._VALID_URL_BASE + r'(?:clip|video)/(?P<id>[^/?&#]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://trovo.live/video/ltv-100095501_100095501_1609596043',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'ltv-100095501_100095501_1609596043',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Spontaner 12 Stunden Stream! - Ok Boomer!',
|
||||||
|
'uploader': 'Exsl',
|
||||||
|
'timestamp': 1609640305,
|
||||||
|
'upload_date': '20210103',
|
||||||
|
'uploader_id': '100095501',
|
||||||
|
'duration': 43977,
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
'comments': 'mincount:8',
|
||||||
|
'categories': ['Grand Theft Auto V'],
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://trovo.live/clip/lc-5285890810184026005',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
vid = self._match_id(url)
|
||||||
|
resp = self._download_json(
|
||||||
|
'https://gql.trovo.live/', vid, data=json.dumps([{
|
||||||
|
'query': '''{
|
||||||
|
batchGetVodDetailInfo(params: {vids: ["%s"]}) {
|
||||||
|
VodDetailInfos
|
||||||
|
}
|
||||||
|
}''' % vid,
|
||||||
|
}, {
|
||||||
|
'query': '''{
|
||||||
|
getCommentList(params: {appInfo: {postID: "%s"}, pageSize: 1000000000, preview: {}}) {
|
||||||
|
commentList {
|
||||||
|
author {
|
||||||
|
nickName
|
||||||
|
uid
|
||||||
|
}
|
||||||
|
commentID
|
||||||
|
content
|
||||||
|
createdAt
|
||||||
|
parentID
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}''' % vid,
|
||||||
|
}]).encode(), headers={
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
})
|
||||||
|
vod_detail_info = resp[0]['data']['batchGetVodDetailInfo']['VodDetailInfos'][vid]
|
||||||
|
vod_info = vod_detail_info['vodInfo']
|
||||||
|
title = vod_info['title']
|
||||||
|
|
||||||
|
language = vod_info.get('languageName')
|
||||||
|
formats = []
|
||||||
|
for play_info in (vod_info.get('playInfos') or []):
|
||||||
|
play_url = play_info.get('playUrl')
|
||||||
|
if not play_url:
|
||||||
|
continue
|
||||||
|
format_id = play_info.get('desc')
|
||||||
|
formats.append({
|
||||||
|
'ext': 'mp4',
|
||||||
|
'filesize': int_or_none(play_info.get('fileSize')),
|
||||||
|
'format_id': format_id,
|
||||||
|
'height': int_or_none(format_id[:-1]) if format_id else None,
|
||||||
|
'language': language,
|
||||||
|
'protocol': 'm3u8_native',
|
||||||
|
'tbr': int_or_none(play_info.get('bitrate')),
|
||||||
|
'url': play_url,
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
category = vod_info.get('categoryName')
|
||||||
|
get_count = lambda x: int_or_none(vod_info.get(x + 'Num'))
|
||||||
|
|
||||||
|
comment_list = try_get(resp, lambda x: x[1]['data']['getCommentList']['commentList'], list) or []
|
||||||
|
comments = []
|
||||||
|
for comment in comment_list:
|
||||||
|
content = comment.get('content')
|
||||||
|
if not content:
|
||||||
|
continue
|
||||||
|
author = comment.get('author') or {}
|
||||||
|
parent = comment.get('parentID')
|
||||||
|
comments.append({
|
||||||
|
'author': author.get('nickName'),
|
||||||
|
'author_id': str_or_none(author.get('uid')),
|
||||||
|
'id': str_or_none(comment.get('commentID')),
|
||||||
|
'text': content,
|
||||||
|
'timestamp': int_or_none(comment.get('createdAt')),
|
||||||
|
'parent': 'root' if parent == 0 else str_or_none(parent),
|
||||||
|
})
|
||||||
|
|
||||||
|
info = {
|
||||||
|
'id': vid,
|
||||||
|
'title': title,
|
||||||
|
'formats': formats,
|
||||||
|
'thumbnail': vod_info.get('coverUrl'),
|
||||||
|
'timestamp': int_or_none(vod_info.get('publishTs')),
|
||||||
|
'duration': int_or_none(vod_info.get('duration')),
|
||||||
|
'view_count': get_count('watch'),
|
||||||
|
'like_count': get_count('like'),
|
||||||
|
'comment_count': get_count('comment'),
|
||||||
|
'comments': comments,
|
||||||
|
'categories': [category] if category else None,
|
||||||
|
}
|
||||||
|
info.update(self._extract_streamer_info(vod_detail_info))
|
||||||
|
return info
|
|
@ -1,12 +1,9 @@
|
||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_str
|
from ..compat import compat_str
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
HEADRequest,
|
HEADRequest,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
@ -46,15 +43,6 @@ class WatIE(InfoExtractor):
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
_FORMATS = (
|
|
||||||
(200, 416, 234),
|
|
||||||
(400, 480, 270),
|
|
||||||
(600, 640, 360),
|
|
||||||
(1200, 640, 360),
|
|
||||||
(1800, 960, 540),
|
|
||||||
(2500, 1280, 720),
|
|
||||||
)
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
video_id = video_id if video_id.isdigit() and len(video_id) > 6 else compat_str(int(video_id, 36))
|
video_id = video_id if video_id.isdigit() and len(video_id) > 6 else compat_str(int(video_id, 36))
|
||||||
|
@ -97,46 +85,20 @@ def extract_url(path_template, url_type):
|
||||||
return red_url
|
return red_url
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def remove_bitrate_limit(manifest_url):
|
|
||||||
return re.sub(r'(?:max|min)_bitrate=\d+&?', '', manifest_url)
|
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
try:
|
|
||||||
alt_urls = lambda manifest_url: [re.sub(r'(?:wdv|ssm)?\.ism/', repl + '.ism/', manifest_url) for repl in ('', 'ssm')]
|
|
||||||
manifest_urls = self._download_json(
|
manifest_urls = self._download_json(
|
||||||
'http://www.wat.tv/get/webhtml/' + video_id, video_id)
|
'http://www.wat.tv/get/webhtml/' + video_id, video_id)
|
||||||
m3u8_url = manifest_urls.get('hls')
|
m3u8_url = manifest_urls.get('hls')
|
||||||
if m3u8_url:
|
if m3u8_url:
|
||||||
m3u8_url = remove_bitrate_limit(m3u8_url)
|
|
||||||
for m3u8_alt_url in alt_urls(m3u8_url):
|
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
m3u8_alt_url, video_id, 'mp4',
|
m3u8_url, video_id, 'mp4',
|
||||||
'm3u8_native', m3u8_id='hls', fatal=False))
|
'm3u8_native', m3u8_id='hls', fatal=False))
|
||||||
formats.extend(self._extract_f4m_formats(
|
|
||||||
m3u8_alt_url.replace('ios', 'web').replace('.m3u8', '.f4m'),
|
|
||||||
video_id, f4m_id='hds', fatal=False))
|
|
||||||
mpd_url = manifest_urls.get('mpd')
|
mpd_url = manifest_urls.get('mpd')
|
||||||
if mpd_url:
|
if mpd_url:
|
||||||
mpd_url = remove_bitrate_limit(mpd_url)
|
|
||||||
for mpd_alt_url in alt_urls(mpd_url):
|
|
||||||
formats.extend(self._extract_mpd_formats(
|
formats.extend(self._extract_mpd_formats(
|
||||||
mpd_alt_url, video_id, mpd_id='dash', fatal=False))
|
mpd_url.replace('://das-q1.tf1.fr/', '://das-q1-ssl.tf1.fr/'),
|
||||||
|
video_id, mpd_id='dash', fatal=False))
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
except ExtractorError:
|
|
||||||
abr = 64
|
|
||||||
for vbr, width, height in self._FORMATS:
|
|
||||||
tbr = vbr + abr
|
|
||||||
format_id = 'http-%s' % tbr
|
|
||||||
fmt_url = 'http://dnl.adv.tf1.fr/2/USP-0x0/%s/%s/%s/ssm/%s-%s-64k.mp4' % (video_id[-4:-2], video_id[-2:], video_id, video_id, vbr)
|
|
||||||
if self._is_valid_url(fmt_url, video_id, format_id):
|
|
||||||
formats.append({
|
|
||||||
'format_id': format_id,
|
|
||||||
'url': fmt_url,
|
|
||||||
'vbr': vbr,
|
|
||||||
'abr': abr,
|
|
||||||
'width': width,
|
|
||||||
'height': height,
|
|
||||||
})
|
|
||||||
|
|
||||||
date_diffusion = first_chapter.get('date_diffusion') or video_data.get('configv4', {}).get('estatS4')
|
date_diffusion = first_chapter.get('date_diffusion') or video_data.get('configv4', {}).get('estatS4')
|
||||||
upload_date = unified_strdate(date_diffusion) if date_diffusion else None
|
upload_date = unified_strdate(date_diffusion) if date_diffusion else None
|
||||||
|
|
|
@ -177,46 +177,9 @@ class YahooIE(InfoExtractor):
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _extract_yahoo_video(self, video_id, country):
|
||||||
url, country, display_id = re.match(self._VALID_URL, url).groups()
|
|
||||||
if not country:
|
|
||||||
country = 'us'
|
|
||||||
else:
|
|
||||||
country = country.split('-')[0]
|
|
||||||
api_base = 'https://%s.yahoo.com/_td/api/resource/' % country
|
|
||||||
|
|
||||||
for i, uuid in enumerate(['url=' + url, 'ymedia-alias=' + display_id]):
|
|
||||||
content = self._download_json(
|
|
||||||
api_base + 'content;getDetailView=true;uuids=["%s"]' % uuid,
|
|
||||||
display_id, 'Downloading content JSON metadata', fatal=i == 1)
|
|
||||||
if content:
|
|
||||||
item = content['items'][0]
|
|
||||||
break
|
|
||||||
|
|
||||||
if item.get('type') != 'video':
|
|
||||||
entries = []
|
|
||||||
|
|
||||||
cover = item.get('cover') or {}
|
|
||||||
if cover.get('type') == 'yvideo':
|
|
||||||
cover_url = cover.get('url')
|
|
||||||
if cover_url:
|
|
||||||
entries.append(self.url_result(
|
|
||||||
cover_url, 'Yahoo', cover.get('uuid')))
|
|
||||||
|
|
||||||
for e in item.get('body', []):
|
|
||||||
if e.get('type') == 'videoIframe':
|
|
||||||
iframe_url = e.get('url')
|
|
||||||
if not iframe_url:
|
|
||||||
continue
|
|
||||||
entries.append(self.url_result(iframe_url))
|
|
||||||
|
|
||||||
return self.playlist_result(
|
|
||||||
entries, item.get('uuid'),
|
|
||||||
item.get('title'), item.get('summary'))
|
|
||||||
|
|
||||||
video_id = item['uuid']
|
|
||||||
video = self._download_json(
|
video = self._download_json(
|
||||||
api_base + 'VideoService.videos;view=full;video_ids=["%s"]' % video_id,
|
'https://%s.yahoo.com/_td/api/resource/VideoService.videos;view=full;video_ids=["%s"]' % (country, video_id),
|
||||||
video_id, 'Downloading video JSON metadata')[0]
|
video_id, 'Downloading video JSON metadata')[0]
|
||||||
title = video['title']
|
title = video['title']
|
||||||
|
|
||||||
|
@ -298,7 +261,6 @@ def _real_extract(self, url):
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': self._live_title(title) if is_live else title,
|
'title': self._live_title(title) if is_live else title,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'display_id': display_id,
|
|
||||||
'thumbnails': thumbnails,
|
'thumbnails': thumbnails,
|
||||||
'description': clean_html(video.get('description')),
|
'description': clean_html(video.get('description')),
|
||||||
'timestamp': parse_iso8601(video.get('publish_time')),
|
'timestamp': parse_iso8601(video.get('publish_time')),
|
||||||
|
@ -311,6 +273,44 @@ def _real_extract(self, url):
|
||||||
'episode_number': int_or_none(series_info.get('episode_number')),
|
'episode_number': int_or_none(series_info.get('episode_number')),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
url, country, display_id = re.match(self._VALID_URL, url).groups()
|
||||||
|
if not country:
|
||||||
|
country = 'us'
|
||||||
|
else:
|
||||||
|
country = country.split('-')[0]
|
||||||
|
|
||||||
|
item = self._download_json(
|
||||||
|
'https://%s.yahoo.com/caas/content/article' % country, display_id,
|
||||||
|
'Downloading content JSON metadata', query={
|
||||||
|
'url': url
|
||||||
|
})['items'][0]['data']['partnerData']
|
||||||
|
|
||||||
|
if item.get('type') != 'video':
|
||||||
|
entries = []
|
||||||
|
|
||||||
|
cover = item.get('cover') or {}
|
||||||
|
if cover.get('type') == 'yvideo':
|
||||||
|
cover_url = cover.get('url')
|
||||||
|
if cover_url:
|
||||||
|
entries.append(self.url_result(
|
||||||
|
cover_url, 'Yahoo', cover.get('uuid')))
|
||||||
|
|
||||||
|
for e in (item.get('body') or []):
|
||||||
|
if e.get('type') == 'videoIframe':
|
||||||
|
iframe_url = e.get('url')
|
||||||
|
if not iframe_url:
|
||||||
|
continue
|
||||||
|
entries.append(self.url_result(iframe_url))
|
||||||
|
|
||||||
|
return self.playlist_result(
|
||||||
|
entries, item.get('uuid'),
|
||||||
|
item.get('title'), item.get('summary'))
|
||||||
|
|
||||||
|
info = self._extract_yahoo_video(item['uuid'], country)
|
||||||
|
info['display_id'] = display_id
|
||||||
|
return info
|
||||||
|
|
||||||
|
|
||||||
class YahooSearchIE(SearchInfoExtractor):
|
class YahooSearchIE(SearchInfoExtractor):
|
||||||
IE_DESC = 'Yahoo screen search'
|
IE_DESC = 'Yahoo screen search'
|
||||||
|
|
|
@ -842,6 +842,10 @@ def _dict_from_multiple_values_options_callback(
|
||||||
'-o', '--output',
|
'-o', '--output',
|
||||||
dest='outtmpl', metavar='TEMPLATE',
|
dest='outtmpl', metavar='TEMPLATE',
|
||||||
help='Output filename template, see "OUTPUT TEMPLATE" for details')
|
help='Output filename template, see "OUTPUT TEMPLATE" for details')
|
||||||
|
filesystem.add_option(
|
||||||
|
'--output-na-placeholder',
|
||||||
|
dest='outtmpl_na_placeholder', metavar='PLACEHOLDER', default='NA',
|
||||||
|
help=('Placeholder value for unavailable meta fields in output filename template (default is "%default")'))
|
||||||
filesystem.add_option(
|
filesystem.add_option(
|
||||||
'--autonumber-size',
|
'--autonumber-size',
|
||||||
dest='autonumber_size', metavar='NUMBER', type=int,
|
dest='autonumber_size', metavar='NUMBER', type=int,
|
||||||
|
@ -997,7 +1001,7 @@ def _dict_from_multiple_values_options_callback(
|
||||||
postproc.add_option(
|
postproc.add_option(
|
||||||
'-x', '--extract-audio',
|
'-x', '--extract-audio',
|
||||||
action='store_true', dest='extractaudio', default=False,
|
action='store_true', dest='extractaudio', default=False,
|
||||||
help='Convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)')
|
help='Convert video files to audio-only files (requires ffmpeg/avconv and ffprobe/avprobe)')
|
||||||
postproc.add_option(
|
postproc.add_option(
|
||||||
'--audio-format', metavar='FORMAT', dest='audioformat', default='best',
|
'--audio-format', metavar='FORMAT', dest='audioformat', default='best',
|
||||||
help='Specify audio format: "best", "aac", "flac", "mp3", "m4a", "opus", "vorbis", or "wav"; "%default" by default; No effect without -x')
|
help='Specify audio format: "best", "aac", "flac", "mp3", "m4a", "opus", "vorbis", or "wav"; "%default" by default; No effect without -x')
|
||||||
|
|
Loading…
Reference in a new issue