Add overlapping group caching

This commit is contained in:
Ajay 2021-12-20 22:22:45 -05:00
parent 7aa9524835
commit 5ebb638925
7 changed files with 88 additions and 42 deletions

14
package-lock.json generated
View file

@ -16,6 +16,7 @@
"express": "^4.17.1",
"express-promise-router": "^4.1.1",
"express-rate-limit": "^5.5.1",
"lodash": "^4.17.21",
"pg": "^8.7.1",
"redis": "^3.1.2",
"sync-mysql": "^3.0.1"
@ -25,6 +26,7 @@
"@types/cron": "^1.7.3",
"@types/express": "^4.17.13",
"@types/express-rate-limit": "^5.1.3",
"@types/lodash": "^4.14.178",
"@types/mocha": "^9.0.0",
"@types/node": "^16.11.11",
"@types/pg": "^8.6.1",
@ -317,6 +319,12 @@
"integrity": "sha512-qcUXuemtEu+E5wZSJHNxUXeCZhAfXKQ41D+duX+VYPde7xyEVZci+/oXKJL13tnRs9lR2pr4fod59GT6/X1/yQ==",
"dev": true
},
"node_modules/@types/lodash": {
"version": "4.14.178",
"resolved": "https://registry.npmjs.org/@types/lodash/-/lodash-4.14.178.tgz",
"integrity": "sha512-0d5Wd09ItQWH1qFbEyQ7oTQ3GZrMfth5JkbN3EvTKLXcHLRDSXeLnlvlOn0wvxVIwK5o2M8JzP/OWz7T3NRsbw==",
"dev": true
},
"node_modules/@types/mime": {
"version": "1.3.2",
"resolved": "https://registry.npmjs.org/@types/mime/-/mime-1.3.2.tgz",
@ -5337,6 +5345,12 @@
"integrity": "sha512-qcUXuemtEu+E5wZSJHNxUXeCZhAfXKQ41D+duX+VYPde7xyEVZci+/oXKJL13tnRs9lR2pr4fod59GT6/X1/yQ==",
"dev": true
},
"@types/lodash": {
"version": "4.14.178",
"resolved": "https://registry.npmjs.org/@types/lodash/-/lodash-4.14.178.tgz",
"integrity": "sha512-0d5Wd09ItQWH1qFbEyQ7oTQ3GZrMfth5JkbN3EvTKLXcHLRDSXeLnlvlOn0wvxVIwK5o2M8JzP/OWz7T3NRsbw==",
"dev": true
},
"@types/mime": {
"version": "1.3.2",
"resolved": "https://registry.npmjs.org/@types/mime/-/mime-1.3.2.tgz",

View file

@ -24,6 +24,7 @@
"express": "^4.17.1",
"express-promise-router": "^4.1.1",
"express-rate-limit": "^5.5.1",
"lodash": "^4.17.21",
"pg": "^8.7.1",
"redis": "^3.1.2",
"sync-mysql": "^3.0.1"
@ -33,6 +34,7 @@
"@types/cron": "^1.7.3",
"@types/express": "^4.17.13",
"@types/express-rate-limit": "^5.1.3",
"@types/lodash": "^4.14.178",
"@types/mocha": "^9.0.0",
"@types/node": "^16.11.11",
"@types/pg": "^8.6.1",

View file

@ -1,7 +1,8 @@
import { Request, Response } from "express";
import { partition } from "lodash"
import { config } from "../config";
import { db, privateDB } from "../databases/databases";
import { skipSegmentsHashKey, skipSegmentsKey } from "../utils/redisKeys";
import { skipSegmentsHashKey, skipSegmentsKey, skipSegmentGroupsKey } from "../utils/redisKeys";
import { SBRecord } from "../types/lib.model";
import { ActionType, Category, CategoryActionType, DBSegment, HashedIP, IPAddress, OverlappingSegmentGroup, Segment, SegmentCache, SegmentUUID, Service, VideoData, VideoID, VideoIDHash, Visibility, VotableObject } from "../types/segments.model";
import { getCategoryActionType } from "../utils/categoryInfo";
@ -13,7 +14,7 @@ import { getReputation } from "../utils/reputation";
import { getService } from "../utils/getService";
async function prepareCategorySegments(req: Request, videoID: VideoID, category: Category, segments: DBSegment[], cache: SegmentCache = { shadowHiddenSegmentIPs: {} }): Promise<Segment[]> {
async function prepareCategorySegments(req: Request, videoID: VideoID, service: Service, segments: DBSegment[], cache: SegmentCache = { shadowHiddenSegmentIPs: {} }, useCache: boolean): Promise<Segment[]> {
const shouldFilter: boolean[] = await Promise.all(segments.map(async (segment) => {
if (segment.votes < -1 && !segment.required) {
return false; //too untrustworthy, just ignore it
@ -39,14 +40,16 @@ async function prepareCategorySegments(req: Request, videoID: VideoID, category:
cache.userHashedIP = await getHashCache((getIP(req) + config.globalSalt) as IPAddress);
}
//if this isn't their ip, don't send it to them
return cache.shadowHiddenSegmentIPs[videoID][segment.timeSubmitted]?.some(
const shouldShadowHide = cache.shadowHiddenSegmentIPs[videoID][segment.timeSubmitted]?.some(
(shadowHiddenSegment) => shadowHiddenSegment.hashedIP === cache.userHashedIP) ?? false;
if (shouldShadowHide) useCache = false;
return shouldShadowHide;
}));
const filteredSegments = segments.filter((_, index) => shouldFilter[index]);
const maxSegments = getCategoryActionType(category) === CategoryActionType.Skippable ? Infinity : 1;
return (await chooseSegments(filteredSegments, maxSegments)).map((chosenSegment) => ({
return (await chooseSegments(videoID, service, filteredSegments, useCache)).map((chosenSegment) => ({
category: chosenSegment.category,
actionType: chosenSegment.actionType,
segment: [chosenSegment.startTime, chosenSegment.endTime],
@ -62,28 +65,21 @@ async function prepareCategorySegments(req: Request, videoID: VideoID, category:
async function getSegmentsByVideoID(req: Request, videoID: VideoID, categories: Category[],
actionTypes: ActionType[], requiredSegments: SegmentUUID[], service: Service): Promise<Segment[]> {
const cache: SegmentCache = { shadowHiddenSegmentIPs: {} };
const segments: Segment[] = [];
try {
categories = categories.filter((category) => !/[^a-z|_|-]/.test(category));
if (categories.length === 0) return null;
const segmentsByCategory: SBRecord<Category, DBSegment[]> = (await getSegmentsFromDBByVideoID(videoID, service))
.filter((segment: DBSegment) => categories.includes(segment?.category) && actionTypes.includes(segment?.actionType))
.reduce((acc: SBRecord<Category, DBSegment[]>, segment: DBSegment) => {
const segments: DBSegment[] = (await getSegmentsFromDBByVideoID(videoID, service))
.map((segment: DBSegment) => {
if (filterRequiredSegments(segment.UUID, requiredSegments)) segment.required = true;
acc[segment.category] ??= [];
acc[segment.category].push(segment);
return acc;
return segment;
}, {});
for (const [category, categorySegments] of Object.entries(segmentsByCategory)) {
segments.push(...(await prepareCategorySegments(req, videoID, category as Category, categorySegments, cache)));
}
const canUseCache = requiredSegments.length === 0;
const processedSegments: Segment[] = await prepareCategorySegments(req, videoID, service, segments, cache, canUseCache);
return segments;
return processedSegments.filter((segment: Segment) => categories.includes(segment?.category) && actionTypes.includes(segment?.actionType));
} catch (err) {
if (err) {
Logger.error(err as string);
@ -98,34 +94,37 @@ async function getSegmentsByHash(req: Request, hashedVideoIDPrefix: VideoIDHash,
const segments: SBRecord<VideoID, VideoData> = {};
try {
type SegmentWithHashPerVideoID = SBRecord<VideoID, {hash: VideoIDHash, segmentPerCategory: SBRecord<Category, DBSegment[]>}>;
type SegmentWithHashPerVideoID = SBRecord<VideoID, { hash: VideoIDHash, segments: DBSegment[] }>;
categories = categories.filter((category) => !(/[^a-z|_|-]/.test(category)));
if (categories.length === 0) return null;
const segmentPerVideoID: SegmentWithHashPerVideoID = (await getSegmentsFromDBByHash(hashedVideoIDPrefix, service))
.filter((segment: DBSegment) => categories.includes(segment?.category) && actionTypes.includes(segment?.actionType))
.reduce((acc: SegmentWithHashPerVideoID, segment: DBSegment) => {
acc[segment.videoID] = acc[segment.videoID] || {
hash: segment.hashedVideoID,
segmentPerCategory: {}
segments: []
};
if (filterRequiredSegments(segment.UUID, requiredSegments)) segment.required = true;
acc[segment.videoID].segmentPerCategory[segment.category] ??= [];
acc[segment.videoID].segmentPerCategory[segment.category].push(segment);
acc[segment.videoID].segments ??= [];
acc[segment.videoID].segments.push(segment);
return acc;
}, {});
for (const [videoID, videoData] of Object.entries(segmentPerVideoID)) {
segments[videoID] = {
const data: VideoData = {
hash: videoData.hash,
segments: [],
};
for (const [category, segmentPerCategory] of Object.entries(videoData.segmentPerCategory)) {
segments[videoID].segments.push(...(await prepareCategorySegments(req, videoID as VideoID, category as Category, segmentPerCategory, cache)));
const canUseCache = requiredSegments.length === 0;
data.segments = (await prepareCategorySegments(req, videoID as VideoID, service, videoData.segments, cache, canUseCache))
.filter((segment: Segment) => categories.includes(segment?.category) && actionTypes.includes(segment?.actionType));
if (data.segments.length > 0) {
segments[videoID] = data;
}
}
@ -164,10 +163,11 @@ async function getSegmentsFromDBByVideoID(videoID: VideoID, service: Service): P
return await QueryCacher.get(fetchFromDB, skipSegmentsKey(videoID, service));
}
//gets a weighted random choice from the choices array based on their `votes` property.
//amountOfChoices specifies the maximum amount of choices to return, 1 or more.
//choices are unique
function getWeightedRandomChoice<T extends VotableObject>(choices: T[], amountOfChoices: number): T[] {
// Gets a weighted random choice from the choices array based on their `votes` property.
// amountOfChoices specifies the maximum amount of choices to return, 1 or more.
// Choices are unique
// If a predicate is given, it will only filter choices following it, and will leave the rest in the list
function getWeightedRandomChoice<T extends VotableObject>(choices: T[], amountOfChoices: number, predicate?: (choice: T) => void): T[] {
//trivial case: no need to go through the whole process
if (amountOfChoices >= choices.length) {
return choices;
@ -179,7 +179,7 @@ function getWeightedRandomChoice<T extends VotableObject>(choices: T[], amountOf
//assign a weight to each choice
let totalWeight = 0;
const choicesWithWeights: TWithWeight[] = choices.map(choice => {
let choicesWithWeights: TWithWeight[] = choices.map(choice => {
const boost = Math.min(choice.reputation, 4);
//The 3 makes -2 the minimum votes before being ignored completely
@ -190,8 +190,20 @@ function getWeightedRandomChoice<T extends VotableObject>(choices: T[], amountOf
return { ...choice, weight };
});
let forceIncludedChoices: T[] = [];
if (predicate) {
const splitArray = partition(choicesWithWeights, predicate);
choicesWithWeights = splitArray[0];
forceIncludedChoices = splitArray[1];
}
// Nothing to filter for
if (amountOfChoices >= choicesWithWeights.length) {
return choices;
}
//iterate and find amountOfChoices choices
const chosen = [];
const chosen = [...forceIncludedChoices];
while (amountOfChoices-- > 0) {
//weighted random draw of one element of choices
const randomNumber = Math.random() * totalWeight;
@ -210,11 +222,25 @@ function getWeightedRandomChoice<T extends VotableObject>(choices: T[], amountOf
return chosen;
}
async function chooseSegments(videoID: VideoID, service: Service, segments: DBSegment[], useCache: boolean): Promise<DBSegment[]> {
const fetchData = async () => await buildSegmentGroups(segments);
const groups = useCache
? await QueryCacher.get(fetchData, skipSegmentGroupsKey(videoID, service))
: await fetchData();
// Filter for only 1 item for POI categories
return getWeightedRandomChoice(groups, 1, (choice) => getCategoryActionType(choice.segments[0].category) === CategoryActionType.POI)
.map(//randomly choose 1 good segment per group and return them
group => getWeightedRandomChoice(group.segments, 1)[0]
);
}
//This function will find segments that are contained inside of eachother, called similar segments
//Only one similar time will be returned, randomly generated based on the sqrt of votes.
//This allows new less voted items to still sometimes appear to give them a chance at getting votes.
//Segments with less than -1 votes are already ignored before this function is called
async function chooseSegments(segments: DBSegment[], max: number): Promise<DBSegment[]> {
async function buildSegmentGroups(segments: DBSegment[]): Promise<OverlappingSegmentGroup[]> {
//Create groups of segments that are similar to eachother
//Segments must be sorted by their startTime so that we can build groups chronologically:
//1. As long as the segments' startTime fall inside the currentGroup, we keep adding them to that group
@ -265,10 +291,7 @@ async function chooseSegments(segments: DBSegment[], max: number): Promise<DBSeg
overlappingSegmentsGroups = splitPercentOverlap(overlappingSegmentsGroups);
//if there are too many groups, find the best ones
return getWeightedRandomChoice(overlappingSegmentsGroups, max).map(
//randomly choose 1 good segment per group and return them
group => getWeightedRandomChoice(group.segments, 1)[0],
);
return overlappingSegmentsGroups;
}
function splitPercentOverlap(groups: OverlappingSegmentGroup[]): OverlappingSegmentGroup[] {
@ -277,12 +300,14 @@ function splitPercentOverlap(groups: OverlappingSegmentGroup[]): OverlappingSegm
group.segments.forEach((segment) => {
const bestGroup = result.find((group) => {
// At least one segment in the group must have high % overlap or the same action type
// Since POI segments will always have 0 overlap, they will always be in their own groups
return group.segments.some((compareSegment) => {
const overlap = Math.min(segment.endTime, compareSegment.endTime) - Math.max(segment.startTime, compareSegment.startTime);
const overallDuration = Math.max(segment.endTime, compareSegment.endTime) - Math.min(segment.startTime, compareSegment.startTime);
const overlapPercent = overlap / overallDuration;
return (overlapPercent > 0 && segment.actionType === compareSegment.actionType && segment.actionType !== ActionType.Chapter)
|| overlapPercent >= 0.6
return (overlapPercent > 0 && segment.actionType === compareSegment.actionType && segment.category == compareSegment.category && segment.actionType !== ActionType.Chapter)
|| (overlapPercent >= 0.6 && segment.actionType !== compareSegment.actionType && segment.category === compareSegment.category)
|| (overlapPercent >= 0.8 && segment.actionType === compareSegment.actionType && segment.category !== compareSegment.category)
|| (overlapPercent >= 0.8 && segment.actionType === ActionType.Chapter && compareSegment.actionType === ActionType.Chapter);
});
});

View file

@ -1,6 +1,6 @@
import redis from "../utils/redis";
import { Logger } from "../utils/logger";
import { skipSegmentsHashKey, skipSegmentsKey, reputationKey, ratingHashKey } from "./redisKeys";
import { skipSegmentsHashKey, skipSegmentsKey, reputationKey, ratingHashKey, skipSegmentGroupsKey } from "./redisKeys";
import { Service, VideoID, VideoIDHash } from "../types/segments.model";
import { UserID } from "../types/user.model";
@ -82,6 +82,7 @@ async function getAndSplit<T, U extends string>(fetchFromDB: (values: U[]) => Pr
function clearSegmentCache(videoInfo: { videoID: VideoID; hashedVideoID: VideoIDHash; service: Service; userID?: UserID; }): void {
if (videoInfo) {
redis.delAsync(skipSegmentsKey(videoInfo.videoID, videoInfo.service));
redis.delAsync(skipSegmentGroupsKey(videoInfo.videoID, videoInfo.service));
redis.delAsync(skipSegmentsHashKey(videoInfo.hashedVideoID, videoInfo.service));
if (videoInfo.userID) redis.delAsync(reputationKey(videoInfo.userID));
}

View file

@ -7,6 +7,10 @@ export function skipSegmentsKey(videoID: VideoID, service: Service): string {
return `segments.v2.${service}.videoID.${videoID}`;
}
export function skipSegmentGroupsKey(videoID: VideoID, service: Service): string {
return `segments.groups.${service}.videoID.${videoID}`;
}
export function skipSegmentsHashKey(hashedVideoIDPrefix: VideoIDHash, service: Service): string {
hashedVideoIDPrefix = hashedVideoIDPrefix.substring(0, 4) as VideoIDHash;
if (hashedVideoIDPrefix.length !== 4) Logger.warn(`Redis skip segment hash-prefix key is not length 4! ${hashedVideoIDPrefix}`);

View file

@ -25,7 +25,7 @@ describe("getSkipSegments", () => {
await db.prepare("run", query, ["requiredSegmentVid", 80, 90, 2, 0, "requiredSegmentVid4", "testman", 0, 50, "sponsor", "skip", "YouTube", 0, 0, 0, ""]);
await db.prepare("run", query, ["chapterVid", 60, 80, 2, 0, "chapterVid-1", "testman", 0, 50, "chapter", "chapter", "YouTube", 0, 0, 0, "Chapter 1"]);
await db.prepare("run", query, ["chapterVid", 70, 75, 2, 0, "chapterVid-2", "testman", 0, 50, "chapter", "chapter", "YouTube", 0, 0, 0, "Chapter 2"]);
await db.prepare("run", query, ["chapterVid", 71, 76, 2, 0, "chapterVid-3", "testman", 0, 50, "chapter", "chapter", "YouTube", 0, 0, 0, "Chapter 3"]);
await db.prepare("run", query, ["chapterVid", 71, 75, 2, 0, "chapterVid-3", "testman", 0, 50, "chapter", "chapter", "YouTube", 0, 0, 0, "Chapter 3"]);
await db.prepare("run", query, ["requiredSegmentHashVid", 10, 20, -2, 0, "1d04b98f48e8f8bcc15c6ae5ac050801cd6dcfd428fb5f9e65c4e16e7807340fa", "testman", 0, 50, "sponsor", "skip", "YouTube", 0, 0, 0, ""]);
await db.prepare("run", query, ["requiredSegmentHashVid", 20, 30, -2, 0, "1ebde8e8ae03096b6c866aa2c8cc7ee1d720ca1fca27bea3f39a6a1b876577e71", "testman", 0, 50, "sponsor", "skip", "YouTube", 0, 0, 0, ""]);
return;

View file

@ -33,7 +33,7 @@ describe("getSkipSegmentsByHash", () => {
await db.prepare("run", query, ["requiredSegmentVid", 80, 90, 2, "requiredSegmentVid-4", "testman", 0, 50, "sponsor", "skip", "YouTube", 0, 0, requiredSegmentVidHash, ""]);
await db.prepare("run", query, ["chapterVid-hash", 60, 80, 2, "chapterVid-hash-1", "testman", 0, 50, "chapter", "chapter", "YouTube", 0, 0, getHash("chapterVid-hash", 1), "Chapter 1"]); //7258
await db.prepare("run", query, ["chapterVid-hash", 70, 75, 2, "chapterVid-hash-2", "testman", 0, 50, "chapter", "chapter", "YouTube", 0, 0, getHash("chapterVid-hash", 1), "Chapter 2"]); //7258
await db.prepare("run", query, ["chapterVid-hash", 71, 76, 2, "chapterVid-hash-3", "testman", 0, 50, "chapter", "chapter", "YouTube", 0, 0, getHash("chapterVid-hash", 1), "Chapter 3"]); //7258
await db.prepare("run", query, ["chapterVid-hash", 71, 75, 2, "chapterVid-hash-3", "testman", 0, 50, "chapter", "chapter", "YouTube", 0, 0, getHash("chapterVid-hash", 1), "Chapter 3"]); //7258
await db.prepare("run", query, ["longMuteVid-hash", 40, 45, 2, "longMuteVid-hash-1", "testman", 0, 50, "sponsor", "skip", "YouTube", 0, 0, getHash("longMuteVid-hash", 1), ""]); //6613
await db.prepare("run", query, ["longMuteVid-hash", 30, 35, 2, "longMuteVid-hash-2", "testman", 0, 50, "sponsor", "skip", "YouTube", 0, 0, getHash("longMuteVid-hash", 1), ""]); //6613
await db.prepare("run", query, ["longMuteVid-hash", 2, 80, 2, "longMuteVid-hash-3", "testman", 0, 50, "sponsor", "mute", "YouTube", 0, 0, getHash("longMuteVid-hash", 1), ""]); //6613