From 6ddbccbc958d7925951ab6a1d035b3764f402759 Mon Sep 17 00:00:00 2001 From: Samantaz Fox Date: Thu, 3 Feb 2022 00:02:24 +0100 Subject: [PATCH 1/9] Add new exception for parsing issues --- src/invidious/exceptions.cr | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 src/invidious/exceptions.cr diff --git a/src/invidious/exceptions.cr b/src/invidious/exceptions.cr new file mode 100644 index 00000000..391a574d --- /dev/null +++ b/src/invidious/exceptions.cr @@ -0,0 +1,8 @@ +# Exception used to hold the name of the missing item +# Should be used in all parsing functions +class BrokenTubeException < InfoException + getter element : String + + def initialize(@element) + end +end From 99091e919c9af56c27ca8aebd790c3b64b564f78 Mon Sep 17 00:00:00 2001 From: Samantaz Fox Date: Thu, 3 Feb 2022 00:10:32 +0100 Subject: [PATCH 2/9] video parsing: raise if major root element is missing --- src/invidious/videos.cr | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/src/invidious/videos.cr b/src/invidious/videos.cr index d77d56d2..b0d8b4d1 100644 --- a/src/invidious/videos.cr +++ b/src/invidious/videos.cr @@ -885,16 +885,24 @@ def extract_video_info(video_id : String, proxy_region : String? = nil, context_ # Top level elements - primary_results = player_response - .dig?("contents", "twoColumnWatchNextResults", "results", "results", "contents") + main_results = player_response.dig?("contents", "twoColumnWatchNextResults") + + raise BrokenTubeException.new("twoColumnWatchNextResults") if !main_results + + primary_results = main_results.dig?("results", "results", "contents") + + raise BrokenTubeException.new("results") if !primary_results video_primary_renderer = primary_results - .try &.as_a.find(&.["videoPrimaryInfoRenderer"]?) - .try &.["videoPrimaryInfoRenderer"] + .as_a.find(&.["videoPrimaryInfoRenderer"]?) + .try &.["videoPrimaryInfoRenderer"] video_secondary_renderer = primary_results - .try &.as_a.find(&.["videoSecondaryInfoRenderer"]?) - .try &.["videoSecondaryInfoRenderer"] + .as_a.find(&.["videoSecondaryInfoRenderer"]?) + .try &.["videoSecondaryInfoRenderer"] + + raise BrokenTubeException.new("videoPrimaryInfoRenderer") if !video_primary_renderer + raise BrokenTubeException.new("videoSecondaryInfoRenderer") if !video_secondary_renderer # Likes/dislikes From d7ebd763f54a3211aac02a2862775bf130029061 Mon Sep 17 00:00:00 2001 From: Samantaz Fox Date: Thu, 3 Feb 2022 00:11:05 +0100 Subject: [PATCH 3/9] video parsing: add secondary_results root element --- src/invidious/videos.cr | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/invidious/videos.cr b/src/invidious/videos.cr index b0d8b4d1..76f7123a 100644 --- a/src/invidious/videos.cr +++ b/src/invidious/videos.cr @@ -890,8 +890,11 @@ def extract_video_info(video_id : String, proxy_region : String? = nil, context_ raise BrokenTubeException.new("twoColumnWatchNextResults") if !main_results primary_results = main_results.dig?("results", "results", "contents") + secondary_results = main_results + .dig?("secondaryResults", "secondaryResults", "results") raise BrokenTubeException.new("results") if !primary_results + raise BrokenTubeException.new("secondaryResults") if !secondary_results video_primary_renderer = primary_results .as_a.find(&.["videoPrimaryInfoRenderer"]?) From e6ddd6d6c1f649f43c5906f1090d800f619f37fd Mon Sep 17 00:00:00 2001 From: Samantaz Fox Date: Thu, 3 Feb 2022 00:44:52 +0100 Subject: [PATCH 4/9] make HelperExtractors non-private --- src/invidious/yt_backend/extractors.cr | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/invidious/yt_backend/extractors.cr b/src/invidious/yt_backend/extractors.cr index 66b3cdef..27ce550b 100644 --- a/src/invidious/yt_backend/extractors.cr +++ b/src/invidious/yt_backend/extractors.cr @@ -505,7 +505,7 @@ end # # Mostly used to extract out repeated structures to deal with code # repetition. -private module HelperExtractors +module HelperExtractors # Retrieves the amount of videos present within the given InnerTube data. # # Returns a 0 when it's unable to do so From 9621175dc91d8f410dbc14d09bc0132e6a33ae6d Mon Sep 17 00:00:00 2001 From: Samantaz Fox Date: Thu, 3 Feb 2022 00:57:44 +0100 Subject: [PATCH 5/9] extractors: Add helper for short view count text --- src/invidious/yt_backend/extractors.cr | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/invidious/yt_backend/extractors.cr b/src/invidious/yt_backend/extractors.cr index 27ce550b..41d95962 100644 --- a/src/invidious/yt_backend/extractors.cr +++ b/src/invidious/yt_backend/extractors.cr @@ -519,6 +519,20 @@ module HelperExtractors end end + # Retrieves the amount of views/viewers a video has. + # Seems to be used on related videos only + # + # Returns "0" when unable to parse + def self.get_short_view_count(container : JSON::Any) : String + box = container["shortViewCountText"]? + return "0" if !box + + # Simpletext: "4M views" + # runs: {"text": "1.1K"},{"text":" watching"} + return box["simpleText"]?.try &.as_s.sub(" views", "") || + box.dig?("runs", 0, "text").try &.as_s || "0" + end + # Retrieve lowest quality thumbnail from InnerTube data # # TODO allow configuration of image quality (-1 is highest) From f124e8cf93ebc745777d1e36b4563e3de2cfad8a Mon Sep 17 00:00:00 2001 From: Samantaz Fox Date: Thu, 3 Feb 2022 01:36:42 +0100 Subject: [PATCH 6/9] Fix parsing of related videos --- src/invidious/videos.cr | 101 +++++++++++++++++++++++++++++----------- 1 file changed, 74 insertions(+), 27 deletions(-) diff --git a/src/invidious/videos.cr b/src/invidious/videos.cr index 76f7123a..d8289506 100644 --- a/src/invidious/videos.cr +++ b/src/invidious/videos.cr @@ -802,23 +802,50 @@ class VideoRedirect < Exception end end -def parse_related(r : JSON::Any) : JSON::Any? - # TODO: r["endScreenPlaylistRenderer"], etc. - return if !r["endScreenVideoRenderer"]? - r = r["endScreenVideoRenderer"].as_h +# Use to parse both "compactVideoRenderer" and "endScreenVideoRenderer". +# The former is preferred as it has more videos in it. The second has +# the same 11 first entries as the compact rendered. +# +# TODO: "compactRadioRenderer" (Mix) and +def parse_related_video(related : JSON::Any) : Hash(String, JSON::Any)? + return nil if !related["videoId"]? - return if !r["lengthInSeconds"]? + # The compact renderer has video length in seconds, where the end + # screen rendered has a full text version ("42:40") + length = related["lengthInSeconds"]?.try &.as_i.to_s + length ||= related.dig?("lengthText", "simpleText").try do |box| + decode_length_seconds(box.as_s).to_s + end - rv = {} of String => JSON::Any - rv["author"] = r["shortBylineText"]["runs"][0]?.try &.["text"] || JSON::Any.new("") - rv["ucid"] = r["shortBylineText"]["runs"][0]?.try &.["navigationEndpoint"]["browseEndpoint"]["browseId"] || JSON::Any.new("") - rv["author_url"] = JSON::Any.new("/channel/#{rv["ucid"]}") - rv["length_seconds"] = JSON::Any.new(r["lengthInSeconds"].as_i.to_s) - rv["title"] = r["title"]["simpleText"] - rv["short_view_count_text"] = JSON::Any.new(r["shortViewCountText"]?.try &.["simpleText"]?.try &.as_s || "") - rv["view_count"] = JSON::Any.new(r["title"]["accessibility"]?.try &.["accessibilityData"]["label"].as_s.match(/(?[1-9](\d+,?)*) views/).try &.["views"].gsub(/\D/, "") || "") - rv["id"] = r["videoId"] - JSON::Any.new(rv) + # Both have "short", so the "long" option shouldn't be required + channel_info = (related["shortBylineText"]? || related["longBylineText"]?) + .try &.dig?("runs", 0) + + author = channel_info.try &.dig?("text") + ucid = channel_info.try { |ci| HelperExtractors.get_browse_id(ci) } + + # "4,088,033 views", only available on compact renderer + # and when video is not a livestream + view_count = related.dig?("viewCountText", "simpleText") + .try &.as_s.gsub(/\D/, "") + + short_view_count = related.try do |r| + HelperExtractors.get_short_view_count(r).to_s + end + + LOGGER.trace("parse_related_video: Found \"watchNextEndScreenRenderer\" container") + + # TODO: when refactoring video types, make a struct for related videos + # or reuse an existing type, if that fits. + return { + "id" => related["videoId"], + "title" => related["title"]["simpleText"], + "author" => author || JSON::Any.new(""), + "ucid" => JSON::Any.new(ucid || ""), + "length_seconds" => JSON::Any.new(length || "0"), + "view_count" => JSON::Any.new(view_count || "0"), + "short_view_count" => JSON::Any.new(short_view_count || "0"), + } end def extract_video_info(video_id : String, proxy_region : String? = nil, context_screen : String? = nil) @@ -871,18 +898,6 @@ def extract_video_info(video_id : String, proxy_region : String? = nil, context_ params[f] = player_response[f] if player_response[f]? end - params["relatedVideos"] = ( - player_response - .dig?("playerOverlays", "playerOverlayRenderer", "endScreen", "watchNextEndScreenRenderer", "results") - .try &.as_a.compact_map { |r| parse_related r } || \ - player_response - .dig?("webWatchNextResponseExtensionData", "relatedVideoArgs") - .try &.as_s.split(",").map { |r| - r = HTTP::Params.parse(r).to_h - JSON::Any.new(Hash.zip(r.keys, r.values.map { |v| JSON::Any.new(v) })) - } - ).try { |a| JSON::Any.new(a) } || JSON::Any.new([] of JSON::Any) - # Top level elements main_results = player_response.dig?("contents", "twoColumnWatchNextResults") @@ -907,6 +922,38 @@ def extract_video_info(video_id : String, proxy_region : String? = nil, context_ raise BrokenTubeException.new("videoPrimaryInfoRenderer") if !video_primary_renderer raise BrokenTubeException.new("videoSecondaryInfoRenderer") if !video_secondary_renderer + # Related videos + + LOGGER.debug("extract_video_info: parsing related videos...") + + related = [] of JSON::Any + + # Parse "compactVideoRenderer" items (under secondary results) + secondary_results.as_a.each do |element| + if item = element["compactVideoRenderer"]? + related_video = parse_related_video(item) + related << JSON::Any.new(related_video) if related_video + end + end + + # If nothing was found previously, fall back to end screen renderer + if related.empty? + # Container for "endScreenVideoRenderer" items + player_overlays = player_response.dig?( + "playerOverlays", "playerOverlayRenderer", + "endScreen", "watchNextEndScreenRenderer", "results" + ) + + secondary_results.try &.as_a.each do |element| + if item = element["endScreenVideoRenderer"]? + related_video = parse_related_video(item) + related << JSON::Any.new(related_video) if related_video + end + end + end + + params["relatedVideos"] = JSON::Any.new(related) + # Likes/dislikes toplevel_buttons = video_primary_renderer From 1ec15dc0736c193330ae9e1eeaff2ae3a5f9e890 Mon Sep 17 00:00:00 2001 From: Samantaz Fox Date: Thu, 3 Feb 2022 01:44:11 +0100 Subject: [PATCH 7/9] Propagate related videos changes to API function --- src/invidious/videos.cr | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/invidious/videos.cr b/src/invidious/videos.cr index d8289506..74e2746c 100644 --- a/src/invidious/videos.cr +++ b/src/invidious/videos.cr @@ -446,7 +446,7 @@ struct Video end json.field "author", rv["author"] - json.field "authorUrl", rv["author_url"]? + json.field "authorUrl", "/channel/#{rv["ucid"]?}" json.field "authorId", rv["ucid"]? if rv["author_thumbnail"]? json.field "authorThumbnails" do @@ -455,7 +455,7 @@ struct Video qualities.each do |quality| json.object do - json.field "url", rv["author_thumbnail"]?.try &.gsub(/s\d+-/, "s#{quality}-") + json.field "url", rv["author_thumbnail"].try &.gsub(/s\d+-/, "s#{quality}-") json.field "width", quality json.field "height", quality end @@ -465,7 +465,7 @@ struct Video end json.field "lengthSeconds", rv["length_seconds"]?.try &.to_i - json.field "viewCountText", rv["short_view_count_text"]? + json.field "viewCountText", rv["short_view_count"]? json.field "viewCount", rv["view_count"]?.try &.empty? ? nil : rv["view_count"].to_i64 end end From eca8d2e7d72d142c7509f2f6cfb8f96a915bb77d Mon Sep 17 00:00:00 2001 From: Samantaz Fox Date: Thu, 3 Feb 2022 03:55:43 +0100 Subject: [PATCH 8/9] Apply suggestions from code review Co-authored-by: Matthew McGarvey --- src/invidious/videos.cr | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/invidious/videos.cr b/src/invidious/videos.cr index 74e2746c..446e8e03 100644 --- a/src/invidious/videos.cr +++ b/src/invidious/videos.cr @@ -455,7 +455,7 @@ struct Video qualities.each do |quality| json.object do - json.field "url", rv["author_thumbnail"].try &.gsub(/s\d+-/, "s#{quality}-") + json.field "url", rv["author_thumbnail"].gsub(/s\d+-/, "s#{quality}-") json.field "width", quality json.field "height", quality end @@ -944,7 +944,7 @@ def extract_video_info(video_id : String, proxy_region : String? = nil, context_ "endScreen", "watchNextEndScreenRenderer", "results" ) - secondary_results.try &.as_a.each do |element| + player_overlays.try &.as_a.each do |element| if item = element["endScreenVideoRenderer"]? related_video = parse_related_video(item) related << JSON::Any.new(related_video) if related_video From ba37259258277aafb6fc700dabecead695bd624e Mon Sep 17 00:00:00 2001 From: Samantaz Fox Date: Thu, 3 Feb 2022 04:24:31 +0100 Subject: [PATCH 9/9] Also propagate changes to watch ECR page --- src/invidious/views/watch.ecr | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/invidious/views/watch.ecr b/src/invidious/views/watch.ecr index 00f5f8b7..2e0aee99 100644 --- a/src/invidious/views/watch.ecr +++ b/src/invidious/views/watch.ecr @@ -321,11 +321,11 @@ we're going to need to do it here in order to allow for translations.
- <% if views = rv["short_view_count_text"]?.try &.delete(", views watching") %> - <% if !views.empty? %> - <%= translate_count(locale, "generic_views_count", views.to_i? || 0) %> - <% end %> - <% end %> + <%= + views = rv["view_count"]?.try &.to_i? + views ||= rv["view_count_short"]?.try { |x| short_text_to_number(x) } + translate_count(locale, "generic_views_count", views || 0, NumberFormatting::Short) + %>