[extractor/youtube] Extract concurrent view count for livestreams (#5152)

Adds new field `concurrent_view_count`
Closes https://github.com/yt-dlp/yt-dlp/issues/4843

Authored by: coletdjnz
This commit is contained in:
Matthew 2022-10-07 20:00:40 +13:00 committed by GitHub
parent f03940963e
commit 867c66ff97
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 21 additions and 8 deletions

View file

@ -1226,6 +1226,7 @@ # OUTPUT TEMPLATE
- `duration` (numeric): Length of the video in seconds
- `duration_string` (string): Length of the video (HH:mm:ss)
- `view_count` (numeric): How many users have watched the video on the platform
- `concurrent_view_count` (numeric): How many users are currently watching the video on the platform.
- `like_count` (numeric): Number of positive ratings of the video
- `dislike_count` (numeric): Number of negative ratings of the video
- `repost_count` (numeric): Number of reposts of the video

View file

@ -284,6 +284,7 @@ class InfoExtractor:
captions instead of normal subtitles
duration: Length of the video in seconds, as an integer or float.
view_count: How many users have watched the video on the platform.
concurrent_view_count: How many users are currently watching the video on the platform.
like_count: Number of positive ratings of the video
dislike_count: Number of negative ratings of the video
repost_count: Number of reposts of the video

View file

@ -912,8 +912,7 @@ def _extract_video(self, renderer):
traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),
video_id, default=None, group='duration'))
view_count = self._get_count(renderer, 'viewCountText')
view_count = self._get_count(renderer, 'viewCountText', 'shortViewCountText')
uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
channel_id = traverse_obj(
renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),
@ -932,6 +931,12 @@ def _extract_video(self, renderer):
if overlay_style == 'SHORTS' or '/shorts/' in navigation_url:
url = f'https://www.youtube.com/shorts/{video_id}'
live_status = (
'is_upcoming' if scheduled_timestamp is not None
else 'was_live' if 'streamed' in time_text.lower()
else 'is_live' if overlay_style == 'LIVE' or self._has_badge(badges, BadgeType.LIVE_NOW)
else None)
return {
'_type': 'url',
'ie_key': YoutubeIE.ie_key(),
@ -940,17 +945,12 @@ def _extract_video(self, renderer):
'title': title,
'description': description,
'duration': duration,
'view_count': view_count,
'uploader': uploader,
'channel_id': channel_id,
'thumbnails': thumbnails,
'upload_date': (strftime_or_none(self._parse_time_text(time_text), '%Y%m%d')
if self._configuration_arg('approximate_date', ie_key='youtubetab')
else None),
'live_status': ('is_upcoming' if scheduled_timestamp is not None
else 'was_live' if 'streamed' in time_text.lower()
else 'is_live' if overlay_style == 'LIVE' or self._has_badge(badges, BadgeType.LIVE_NOW)
else None),
'release_timestamp': scheduled_timestamp,
'availability':
'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
@ -958,7 +958,8 @@ def _extract_video(self, renderer):
is_private=self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE) or None,
needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,
needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,
is_unlisted=self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED) or None)
is_unlisted=self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED) or None),
'concurrent_view_count' if live_status in ('is_live', 'is_upcoming') else 'view_count': view_count,
}
@ -2328,6 +2329,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'view_count': int,
'playable_in_embed': True,
'description': 'md5:2ef1d002cad520f65825346e2084e49d',
'concurrent_view_count': int,
},
'params': {'skip_download': True}
}, {
@ -4115,6 +4117,15 @@ def process_language(container, base_url, lang_code, sub_name, query):
'like_count': str_to_int(like_count),
'dislike_count': str_to_int(dislike_count),
})
vcr = traverse_obj(vpir, ('viewCount', 'videoViewCountRenderer'))
if vcr:
vc = self._get_count(vcr, 'viewCount')
# Upcoming premieres with waiting count are treated as live here
if vcr.get('isLive'):
info['concurrent_view_count'] = vc
elif info.get('view_count') is None:
info['view_count'] = vc
vsir = get_first(contents, 'videoSecondaryInfoRenderer')
if vsir:
vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))