From 64cfd2296c7be46888228a22bbd6dbb320e65798 Mon Sep 17 00:00:00 2001 From: Omar Roth Date: Sun, 3 Mar 2019 19:18:23 -0600 Subject: [PATCH] Add support for subscribing to channels via PubSubHubbub --- config/migrate-scripts/migrate-db-17cf077.sh | 4 ++ src/invidious.cr | 57 +++++++++++++++++++- src/invidious/channels.cr | 34 ++++++++---- src/invidious/helpers/helpers.cr | 9 ++-- src/invidious/jobs.cr | 20 +++++++ 5 files changed, 110 insertions(+), 14 deletions(-) create mode 100755 config/migrate-scripts/migrate-db-17cf077.sh diff --git a/config/migrate-scripts/migrate-db-17cf077.sh b/config/migrate-scripts/migrate-db-17cf077.sh new file mode 100755 index 00000000..a544f763 --- /dev/null +++ b/config/migrate-scripts/migrate-db-17cf077.sh @@ -0,0 +1,4 @@ +#!/bin/sh + +psql invidious -c "ALTER TABLE channels ADD COLUMN subscribed bool;" +psql invidious -c "UPDATE channels SET subscribed = false;" diff --git a/src/invidious.cr b/src/invidious.cr index 88f258c6..46009533 100644 --- a/src/invidious.cr +++ b/src/invidious.cr @@ -28,7 +28,7 @@ require "./invidious/helpers/*" require "./invidious/*" CONFIG = Config.from_yaml(File.read("config/config.yml")) -HMAC_KEY = CONFIG.hmac_key || Random::Secure.random_bytes(32) +HMAC_KEY = CONFIG.hmac_key || Random::Secure.hex(32) config = CONFIG logger = Invidious::LogHandler.new @@ -88,6 +88,7 @@ PG_DB = DB.open PG_URL YT_URL = URI.parse("https://www.youtube.com") REDDIT_URL = URI.parse("https://www.reddit.com") LOGIN_URL = URI.parse("https://accounts.google.com") +PUBSUB_URL = URI.parse("https://pubsubhubbub.appspot.com") TEXTCAPTCHA_URL = URI.parse("http://textcaptcha.com/omarroth@hotmail.com.json") CURRENT_COMMIT = `git rev-list HEAD --max-count=1 --abbrev-commit`.strip CURRENT_VERSION = `git describe --tags $(git rev-list --tags --max-count=1)`.strip @@ -115,6 +116,8 @@ refresh_channels(PG_DB, logger, config.channel_threads, config.full_refresh) refresh_feeds(PG_DB, logger, config.feed_threads) +subscribe_to_feeds(PG_DB, logger, HMAC_KEY, config) + config.video_threads.times do |i| spawn do refresh_videos(PG_DB, logger) @@ -2314,6 +2317,58 @@ get "/feed/playlist/:plid" do |env| document end +# Add support for subscribing to channels via PubSubHubbub + +get "/feed/webhook" do |env| + mode = env.params.query["hub.mode"] + topic = env.params.query["hub.topic"] + challenge = env.params.query["hub.challenge"] + lease_seconds = env.params.query["hub.lease_seconds"] + verify_token = env.params.query["hub.verify_token"] + + time, signature = verify_token.split(":") + + if Time.now.to_unix - time.to_i > 600 + halt env, status_code: 400 + end + + if OpenSSL::HMAC.hexdigest(:sha1, HMAC_KEY, time) != signature + halt env, status_code: 400 + end + + ucid = HTTP::Params.parse(URI.parse(topic).query.not_nil!)["channel_id"] + PG_DB.exec("UPDATE channels SET subscribed = true WHERE ucid = $1", ucid) + + halt env, status_code: 200, response: challenge +end + +post "/feed/webhook" do |env| + body = env.request.body.not_nil!.gets_to_end + signature = env.request.headers["X-Hub-Signature"].lchop("sha1=") + + if signature != OpenSSL::HMAC.hexdigest(:sha1, HMAC_KEY, body) + halt env, status_code: 200 + end + + rss = XML.parse_html(body) + rss.xpath_nodes("//feed/entry").each do |entry| + id = entry.xpath_node("videoid").not_nil!.content + + video = get_video(id, PG_DB, proxies) + video = ChannelVideo.new(id, video.title, video.published, Time.now, video.ucid, video.author, video.length_seconds) + + PG_DB.exec("UPDATE users SET notifications = notifications || $1 \ + WHERE updated < $2 AND $3 = ANY(subscriptions) AND $1 <> ALL(notifications)", video.id, video.published, video.ucid) + + video_array = video.to_a + args = arg_array(video_array) + + PG_DB.exec("INSERT INTO channel_videos VALUES (#{args}) \ + ON CONFLICT (id) DO UPDATE SET title = $2, published = $3, \ + updated = $4, ucid = $5, author = $6, length_seconds = $7", video_array) + end +end + # Channels # YouTube appears to let users set a "brand" URL that diff --git a/src/invidious/channels.cr b/src/invidious/channels.cr index 25a7ce77..097aa764 100644 --- a/src/invidious/channels.cr +++ b/src/invidious/channels.cr @@ -1,9 +1,10 @@ class InvidiousChannel add_mapping({ - id: String, - author: String, - updated: Time, - deleted: Bool, + id: String, + author: String, + updated: Time, + deleted: Bool, + subscribed: {type: Bool, default: false}, }) end @@ -15,10 +16,7 @@ class ChannelVideo updated: Time, ucid: String, author: String, - length_seconds: { - type: Int32, - default: 0, - }, + length_seconds: {type: Int32, default: 0}, }) end @@ -188,11 +186,29 @@ def fetch_channel(ucid, db, pull_all_videos = true, locale = nil) db.exec("DELETE FROM channel_videos * WHERE NOT id = ANY ('{#{ids.map { |id| %("#{id}") }.join(",")}}') AND ucid = $1", ucid) end - channel = InvidiousChannel.new(ucid, author, Time.now, false) + channel = InvidiousChannel.new(ucid, author, Time.now, false, false) return channel end +def subscribe_pubsub(ucid, key, config) + client = make_client(PUBSUB_URL) + time = Time.now.to_unix.to_s + + host_url = make_host_url(Kemal.config.ssl || config.https_only, config.domain) + + body = { + "hub.callback" => "#{host_url}/feed/webhook", + "hub.topic" => "https://www.youtube.com/feeds/videos.xml?channel_id=#{ucid}", + "hub.verify" => "async", + "hub.mode" => "subscribe", + "hub.verify_token" => "#{time}:#{OpenSSL::HMAC.hexdigest(:sha1, key, time)}", + "hub.secret" => key.to_s, + } + + return client.post("/subscribe", form: body) +end + def fetch_channel_playlists(ucid, author, auto_generated, continuation, sort_by) client = make_client(YT_URL) diff --git a/src/invidious/helpers/helpers.cr b/src/invidious/helpers/helpers.cr index 37d88ef8..3574e5cc 100644 --- a/src/invidious/helpers/helpers.cr +++ b/src/invidious/helpers/helpers.cr @@ -11,10 +11,11 @@ user: String, port: Int32, dbname: String, ), - full_refresh: Bool, # Used for crawling channels: threads should check all videos uploaded by a channel - https_only: Bool?, # Used to tell Invidious it is behind a proxy, so links to resources should be https:// - hmac_key: String?, # HMAC signing key for CSRF tokens - domain: String?, # Domain to be used for links to resources on the site where an absolute URL is required + full_refresh: Bool, # Used for crawling channels: threads should check all videos uploaded by a channel + https_only: Bool?, # Used to tell Invidious it is behind a proxy, so links to resources should be https:// + hmac_key: String?, # HMAC signing key for CSRF tokens and verifying pubsub subscriptions + domain: String?, # Domain to be used for links to resources on the site where an absolute URL is required + use_pubsub_feeds: {type: Bool, default: false}, # Subscribe to channels using PubSubHubbub (requires domain, hmac_key) default_home: {type: String, default: "Top"}, feed_menu: {type: Array(String), default: ["Popular", "Top", "Trending"]}, top_enabled: {type: Bool, default: true}, diff --git a/src/invidious/jobs.cr b/src/invidious/jobs.cr index d9bd78d7..ccc5a628 100644 --- a/src/invidious/jobs.cr +++ b/src/invidious/jobs.cr @@ -153,6 +153,26 @@ def refresh_feeds(db, logger, max_threads = 1) max_channel.send(max_threads) end +def subscribe_to_feeds(db, logger, key, config) + if config.use_pubsub_feeds + spawn do + loop do + db.query_all("SELECT ucid FROM channels WHERE subscribed = false") do |rs| + ucid = rs.read(String) + response = subscribe_pubsub(ucid, key, config) + + if response.status_code >= 400 + logger.write("#{ucid} : #{response.body}\n") + end + end + + sleep 1.minute + Fiber.yield + end + end + end +end + def pull_top_videos(config, db) loop do begin