From 7b1915ec4813a4f026286ec554092b931d0eb14f Mon Sep 17 00:00:00 2001 From: Madeline <46743919+MaddyUnderStars@users.noreply.github.com> Date: Sun, 22 Jan 2023 13:41:31 +1100 Subject: [PATCH] Rewrite client cacher (#962) --- assets/cacheMisses | 0 scripts/client.js | 184 ++++++++---------------------- src/api/middlewares/TestClient.ts | 10 +- 3 files changed, 52 insertions(+), 142 deletions(-) delete mode 100644 assets/cacheMisses diff --git a/assets/cacheMisses b/assets/cacheMisses deleted file mode 100644 index e69de29b..00000000 diff --git a/scripts/client.js b/scripts/client.js index 1165d8f4..bc68547b 100644 --- a/scripts/client.js +++ b/scripts/client.js @@ -17,7 +17,7 @@ */ /* - This file downloads a ( mostly ) complete discord.com web client for testing, + This file downloads a complete discord.com web client for testing, and performs some basic patching: * Replaces all mentions of "Server" -> "Guild" * Replaces "Discord" -> `INSTANCE_NAME` variable @@ -25,15 +25,11 @@ * Prevents `localStorage` deletion ( for `plugins`/`preload-plugins` ) * Adds `fast-identify` support ( TODO: add documentation ) - This script can only download javascript client files. - It cannot download images, sounds, video, etc. - For that, a `cacheMisses` file in `fosscord-server/assets` is used. - After running the server for a while, uncached assets will be appended to that file - and downloaded after the next run of this script. - TODO: Make this configurable easily. */ +/*eslint-env node*/ + require("dotenv/config"); const path = require("path"); const fetch = require("node-fetch"); @@ -51,7 +47,7 @@ const agent = (_parsedURL) => const CACHE_PATH = path.join(__dirname, "..", "assets", "cache"); const BASE_URL = "https://discord.com"; -const INSTANCE_NAME = "Fosscord"; +const INSTANCE_NAME = process.env.CLIENT_PATCH_INSTANCE_NAME ?? "Fosscord"; const ONLY_CACHE_JS = process.env.ONLY_CACHE_JS ? true : false; // Manual for now @@ -61,11 +57,12 @@ const INDEX_SCRIPTS = [ "f98a039261c37f892cbf", // 0? "4470c87bb13810847db0", // ~4500. - // also fetch other assets from index, as they aren't cached + // also fetch other assets from index, as we don't have a way to dl old indexes "40532.f4ff6c4a39fa78f07880.css", "b21a783b953e52485dcb.worker.js", "2bbea887c6d07e427a1d.worker.js", "0ec5df6d78ff7a5cc7c8.worker.js", + "625ccb6efce655a7d928.worker.js", "05422eb499ddf5616e44a52c4f1063ae.woff2", "77f603cc7860fcb784e6ef9320a4a9c2.woff2", "e689380400b1f2d2c6320a823a1ab079.svg", @@ -79,7 +76,7 @@ const doPatch = (content) => { content = content.replaceAll(/ Nitro/g, " Premium"); content = content.replaceAll(/\[Nitro\]/g, "[Premium]"); content = content.replaceAll(/\*Nitro\*/g, "*Premium*"); - content = content.replaceAll(/\"Nitro \. /g, '"Premium. '); + content = content.replaceAll(/"Nitro \. /g, '"Premium. '); //remove discord references content = content.replaceAll(/ Discord /g, ` ${INSTANCE_NAME} `); @@ -184,162 +181,77 @@ const doPatch = (content) => { // disable qr code login content = content.replaceAll( - /\w\?\(\d,\w\.jsx\)\(\w*\,{authTokenCallback:this\.handleAuthToken}\):null/g, + /\w\?\(\d,\w\.jsx\)\(\w*,{authTokenCallback:this\.handleAuthToken}\):null/g, "null", ); return content; }; -const processFile = async (name) => { - const url = `${BASE_URL}/assets/${name}${name.includes(".") ? "" : ".js"}`; - if (ONLY_CACHE_JS && !url.endsWith(".js")) return []; +const print = (x, printover = true) => { + var repeat = process.stdout.columns - x.length; + process.stdout.write( + `${x}${" ".repeat(Math.max(0, repeat))}${printover ? "\r" : "\n"}`, + ); +}; + +const processFile = async (asset) => { + // The asset name may not include the file extension. Usually if it doesn't, it's js though. + asset = `${asset}${asset.includes(".") ? "" : ".js"}`; + if (ONLY_CACHE_JS && !asset.endsWith(".js")) return []; + + const url = `${BASE_URL}/assets/${asset}`; const res = await fetch(url, { agent }); if (res.status !== 200) { + print(`${res.status} on ${asset}`, false); return []; } - if (name.includes(".") && !name.includes(".js") && !name.includes(".css")) { - await fs.writeFile(path.join(CACHE_PATH, name), await res.buffer()); + if ( + asset.includes(".") && + !asset.includes(".js") && + !asset.includes(".css") + ) { + await fs.writeFile(path.join(CACHE_PATH, asset), await res.buffer()); return []; } let text = await res.text(); - text = doPatch(text); - await fs.writeFile( - path.join(CACHE_PATH, `${name}${name.includes(".") ? "" : ".js"}`), - text, - ); + await fs.writeFile(path.join(CACHE_PATH, asset), text); - var additional = []; - additional.push(...new Set(text.match(/\"[A-Fa-f0-9]{20}\"/g))); - additional.push( - ...[ - ...new Set(text.matchAll(/\.exports=.\..\+\"(.*?\..{0,5})\"/g)), - ].map((x) => x[1]), - ); + let ret = new Set([ + ...(text.match(/"[A-Fa-f0-9]{20}"/g) ?? []), // These are generally JS assets + ...[...text.matchAll(/\.exports=.\..\+"(.*?\..{0,5})"/g)].map( + (x) => x[1], + ), // anything that looks like e.exports="filename.ext" + ...[...text.matchAll(/\/assets\/(.*?\.[a-z]{0,5})/g)].map((x) => x[1]), // commonly matches `background: url(/assets/blah.svg)` + ]); - return additional.map((x) => x.replaceAll('"', "")); -}; - -const print = (x) => { - var repeat = process.stdout.columns - x.length; - process.stdout.write(`${x}${" ".repeat(Math.max(0, repeat))}\r`); + return [...ret].map((x) => x.replaceAll('"', "")); }; (async () => { - const start = Date.now(); - - // console.log("Deleting previous cache"); - // await fs.rm(CACHE_PATH, { recursive: true }); - if (!existsSync(CACHE_PATH)) await fs.mkdir(CACHE_PATH); - - const assets = []; - - while (INDEX_SCRIPTS.length > 0) { - const asset = INDEX_SCRIPTS.shift(); - - print(`Scraping asset ${asset}. Remaining: ${INDEX_SCRIPTS.length}`); - - const newAssets = await processFile(asset); - assets.push(...newAssets); - } - - console.log(); - - const CACHE_MISSES = ( - await fs.readFile(path.join(CACHE_PATH, "..", "cacheMisses")) - ) - .toString() - .split("\r") - .join("") - .split("\n"); - while (CACHE_MISSES.length > 0) { - const asset = CACHE_MISSES.shift(); - - print( - `Scraping cache misses ${asset}. Remaining: ${CACHE_MISSES.length}`, - ); - - if (existsSync(path.join(CACHE_PATH, `${asset}`))) { - continue; - } - - const newAssets = await processFile(asset); - assets.push(...newAssets); - } - - console.log(); - - var existing = await fs.readdir(CACHE_PATH); - while (existing.length > 0) { - var file = existing.shift(); - - print(`Patching existing ${file}. Remaining: ${existing.length}.`); - - var text = await fs.readFile(path.join(CACHE_PATH, file)); - if (file.includes(".js") || file.includes(".css")) { - text = doPatch(text.toString()); - await fs.writeFile(path.join(CACHE_PATH, file), text.toString()); - - var additional = []; - additional.push(...new Set(text.match(/\"[A-Fa-f0-9]{20}\"/g))); - additional.push( - ...[ - ...new Set( - text.matchAll(/\.exports=.\..\+\"(.*?\..{0,5})\"/g), - ), - ].map((x) => x[1]), - ); - assets.push(additional.map((x) => x.replaceAll('"', ""))); - } - } - - console.log(); - - let rates = []; - let lastFinished = Date.now(); - let previousFinish = Date.now(); + if (!existsSync(CACHE_PATH)) + await fs.mkdir(CACHE_PATH, { recursive: true }); + // Use a set to remove dupes for us + const assets = new Set(INDEX_SCRIPTS); let promises = []; - for (var i = 0; i < assets.length; i++) { - const asset = assets[i]; - - if (existsSync(path.join(CACHE_PATH, `${asset}.js`))) { - continue; - } - - while (rates.length > 50) rates.shift(); - const averageRate = rates.length - ? rates.reduce((prev, curr) => prev + curr) / rates.length - : 1; - const finishTime = averageRate * (assets.length - i); - - print( - `Caching asset ${asset}. ` + - `${i}/${assets.length - 1} = ${Math.floor( - (i / (assets.length - 1)) * 100, - )}% `, - // + `Finish at: ${new Date( - // Date.now() + finishTime, - // ).toLocaleTimeString()}`, - ); + let index = 0; + for (let asset of assets) { + index += 1; + print(`Scraping asset ${asset}. Remaining: ${assets.size - index}`); promises.push(processFile(asset)); - // await processFile(asset); - - if (promises.length > 100) { + if (promises.length > 100 || index == assets.size) { const values = await Promise.all(promises); - assets.push(...values.flat()); promises = []; - lastFinished = Date.now(); - rates.push(lastFinished - previousFinish); - previousFinish = lastFinished; + values.flat().forEach((x) => assets.add(x)); } } - console.log(`\nDone`); + console.log("done"); })(); diff --git a/src/api/middlewares/TestClient.ts b/src/api/middlewares/TestClient.ts index 10d7e8f1..d229bec9 100644 --- a/src/api/middlewares/TestClient.ts +++ b/src/api/middlewares/TestClient.ts @@ -25,6 +25,8 @@ import { Config } from "@fosscord/util"; const ASSET_FOLDER_PATH = path.join(__dirname, "..", "..", "..", "assets"); +let HAS_SHOWN_CACHE_WARNING = false; + export default function TestClient(app: Application) { app.use("/assets", express.static(path.join(ASSET_FOLDER_PATH, "public"))); app.use("/assets", express.static(path.join(ASSET_FOLDER_PATH, "cache"))); @@ -102,15 +104,11 @@ export default function TestClient(app: Application) { assetCache.set(req.params.file, { buffer, response }); - // TODO: I don't like this. Figure out a way to get client cacher to download *all* assets. - if (response.status == 200) { + if (response.status == 200 && !HAS_SHOWN_CACHE_WARNING) { + HAS_SHOWN_CACHE_WARNING = true; console.warn( `[TestClient] Cache miss for file ${req.params.file}! Use 'npm run generate:client' to cache and patch.`, ); - await fs.promises.appendFile( - path.join(ASSET_FOLDER_PATH, "cacheMisses"), - req.params.file + "\n", - ); } return res.send(buffer);