Merge pull request #217 from MRuy/feature/configurable-database-dump

This commit is contained in:
Ajay Ramachandran 2021-04-17 23:07:08 -04:00 committed by GitHub
commit 5eacfff7ae
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 194 additions and 33 deletions

View file

@ -38,5 +38,31 @@
"max": 20, // 20 requests in 15min time window
"statusCode": 200
}
},
"dumpDatabase": {
"enabled": true,
"minTimeBetweenMs": 60000, // 1 minute between dumps
"appExportPath": "/opt/exports",
"postgresExportPath": "/opt/exports",
"tables": [{
"name": "sponsorTimes",
"order": "timeSubmitted"
},
{
"name": "userNames"
},
{
"name": "categoryVotes"
},
{
"name": "noSegments"
},
{
"name": "warnings",
"order": "issueTime"
},
{
"name": "vipUsers"
}]
}
}

View file

@ -45,7 +45,33 @@ addDefaults(config, {
},
userCounterURL: null,
youtubeAPIKey: null,
postgres: null
postgres: null,
dumpDatabase: {
enabled: true,
minTimeBetweenMs: 60000,
appExportPath: './docker/database-export',
postgresExportPath: '/opt/exports',
tables: [{
name: "sponsorTimes",
order: "timeSubmitted"
},
{
name: "userNames"
},
{
name: "categoryVotes"
},
{
name: "noSegments",
},
{
name: "warnings",
order: "issueTime"
},
{
name: "vipUsers"
}]
}
});
// Add defaults

View file

@ -2,51 +2,110 @@ import {db} from '../databases/databases';
import {Logger} from '../utils/logger';
import {Request, Response} from 'express';
import { config } from '../config';
import util from 'util';
import fs from 'fs';
import path from 'path';
const unlink = util.promisify(fs.unlink);
const ONE_MINUTE = 1000 * 60;
const styleHeader = `<style>body{font-family: sans-serif}</style>`
const styleHeader = `<style>
body {
font-family: sans-serif
}
table th,
table td {
padding: 7px;
}
table th {
text-align: left;
}
table tbody tr:nth-child(odd) {
background: #efefef;
}
</style>`
const licenseHeader = `<p>The API and database follow <a href="https://creativecommons.org/licenses/by-nc-sa/4.0/" rel="nofollow">CC BY-NC-SA 4.0</a> unless you have explicit permission.</p>
<p><a href="https://gist.github.com/ajayyy/4b27dfc66e33941a45aeaadccb51de71">Attribution Template</a></p>
<p>If you need to use the database or API in a way that violates this license, contact me with your reason and I may grant you access under a different license.</p></a></p>`;
const tables = [{
name: "sponsorTimes",
order: "timeSubmitted"
},
{
name: "userNames"
},
{
name: "categoryVotes"
},
{
name: "noSegments",
},
{
name: "warnings",
order: "issueTime"
},
{
name: "vipUsers"
}];
const tables = config?.dumpDatabase?.tables ?? [];
const MILLISECONDS_BETWEEN_DUMPS = config?.dumpDatabase?.minTimeBetweenMs ?? ONE_MINUTE;
const appExportPath = config?.dumpDatabase?.appExportPath ?? './docker/database-export';
const postgresExportPath = config?.dumpDatabase?.postgresExportPath ?? '/opt/exports';
const tableNames = tables.map(table => table.name);
const links: string[] = tables.map((table) => `/database/${table.name}.csv`);
interface TableDumpList {
fileName: string;
tableName: string;
};
let latestDumpFiles: TableDumpList[] = [];
const linksHTML: string = tables.map((table) => `<p><a href="/database/${table.name}.csv">${table.name}.csv</a></p>`)
.reduce((acc, url) => acc + url, "");
if (tables.length === 0) {
Logger.warn('[dumpDatabase] No tables configured');
}
let lastUpdate = 0;
export default function dumpDatabase(req: Request, res: Response, showPage: boolean) {
function removeOutdatedDumps(exportPath: string): Promise<void> {
return new Promise((resolve, reject) => {
// Get list of table names
// Create array for each table
const tableFiles = tableNames.reduce((obj: any, tableName) => {
obj[tableName] = [];
return obj;
}, {});
// read files in export directory
fs.readdir(exportPath, (err: any, files: string[]) => {
if (err) Logger.error(err);
if (err) return resolve();
files.forEach(file => {
// we only care about files that start with "<tablename>_" and ends with .csv
tableNames.forEach(tableName => {
if (file.startsWith(`${tableName}_`) && file.endsWith('.csv')) {
// extract the timestamp from the filename
// we could also use the fs.stat mtime
const timestamp = Number(file.split('_')[1].replace('.csv', ''));
tableFiles[tableName].push({
file: path.join(exportPath, file),
timestamp,
});
}
});
});
const outdatedTime = Math.floor(Date.now() - (MILLISECONDS_BETWEEN_DUMPS * 1.5));
for (let tableName in tableFiles) {
const files = tableFiles[tableName];
files.forEach(async (item: any) => {
if (item.timestamp < outdatedTime) {
// remove old file
await unlink(item.file).catch((error: any) => {
Logger.error(`[dumpDatabase] Garbage collection failed ${error}`);
});
}
});
}
resolve();
});
});
}
export default async function dumpDatabase(req: Request, res: Response, showPage: boolean) {
if (!config?.dumpDatabase?.enabled) {
res.status(404).send("Database dump is disabled");
return;
}
if (!config.postgres) {
res.status(404).send("Not supported on this instance");
return;
}
const now = Date.now();
const updateQueued = now - lastUpdate > ONE_MINUTE;
const updateQueued = now - lastUpdate > MILLISECONDS_BETWEEN_DUMPS;
res.status(200)
@ -57,22 +116,58 @@ export default function dumpDatabase(req: Request, res: Response, showPage: bool
Send a request to <code>https://sponsor.ajay.app/database.json</code>, or visit this page to trigger the database dump to run.
Then, you can download the csv files below, or use the links returned from the JSON request.
<h3>Links</h3>
${linksHTML}<br/>
<table>
<thead>
<tr>
<th>Table</th>
<th>CSV</th>
</tr>
</thead>
<tbody>
${latestDumpFiles.map((item:any) => {
return `
<tr>
<td>${item.tableName}</td>
<td><a href="/database/${item.fileName}">${item.fileName}</a></td>
</tr>
`;
}).join('')}
${latestDumpFiles.length === 0 ? '<tr><td colspan="2">Please wait: Generating files</td></tr>' : ''}
</tbody>
</table>
<hr/>
${updateQueued ? `Update queued.` : ``} Last updated: ${lastUpdate ? new Date(lastUpdate).toUTCString() : `Unknown`}`);
} else {
res.send({
lastUpdated: lastUpdate,
updateQueued,
links
links: latestDumpFiles.map((item:any) => {
return {
table: item.tableName,
url: `/database/${item.fileName}`,
size: item.fileSize,
};
}),
})
}
if (updateQueued) {
lastUpdate = Date.now();
await removeOutdatedDumps(appExportPath);
const dumpFiles = [];
for (const table of tables) {
db.prepare('run', `COPY (SELECT * FROM "${table.name}"${table.order ? ` ORDER BY "${table.order}"` : ``})
TO '/opt/exports/${table.name}.csv' WITH (FORMAT CSV, HEADER true);`);
const fileName = `${table.name}_${lastUpdate}.csv`;
const file = `${postgresExportPath}/${fileName}`;
await db.prepare('run', `COPY (SELECT * FROM "${table.name}"${table.order ? ` ORDER BY "${table.order}"` : ``})
TO '${file}' WITH (FORMAT CSV, HEADER true);`);
dumpFiles.push({
fileName,
tableName: table.name,
});
}
latestDumpFiles = [...dumpFiles];
}
}
}

View file

@ -38,6 +38,7 @@ export interface SBSConfig {
maximumPrefix?: string;
redis?: redis.ClientOpts;
postgres?: PoolConfig;
dumpDatabase?: DumpDatabase;
}
export interface WebhookConfig {
@ -61,4 +62,17 @@ export interface PostgresConfig {
createDbIfNotExists: boolean;
enableWalCheckpointNumber: boolean;
postgres: PoolConfig;
}
}
export interface DumpDatabase {
enabled: boolean;
minTimeBetweenMs: number;
appExportPath: string;
postgresExportPath: string;
tables: DumpDatabaseTable[];
}
export interface DumpDatabaseTable {
name: string;
order?: string;
}