1
0
mirror of https://github.com/phiresky/sql.js-httpvfs.git synced 2024-11-08 20:22:44 +01:00

split server file

This commit is contained in:
phiresky 2021-04-13 17:26:11 +02:00
parent 7716034c20
commit 19b199a3cb
14 changed files with 3314 additions and 540 deletions

1
.gitignore vendored
View File

@ -1,2 +1,3 @@
/node_modules /node_modules
/dist /dist
/data

2
.vscode/settings.json vendored Normal file
View File

@ -0,0 +1,2 @@
{
}

10
create_db.sh Executable file
View File

@ -0,0 +1,10 @@
set -eu
cd "$(dirname "$0")"
rm -rf dist/data
mkdir -p dist/data
cat create_db.sql | sqlite3 -cmd '.echo on' dist/data/db.sqlite3
bytes="$(stat --printf="%s" dist/data/db.sqlite3)"
serverChunkSize=$((50 * 1024 * 1024))
suffixLength=3
split data/db.sqlite3 --bytes=$serverChunkSize dist/data/db.sqlite3. --suffix-length=$suffixLength --numeric-suffixes
echo '{"requestChunkSize": 4096, "databaseLengthBytes": '$bytes', "serverChunkSize": '$serverChunkSize', "urlPrefix": "db.sqlite3.", "suffixLength": '$suffixLength'}' > dist/data/config.json

View File

@ -6,7 +6,7 @@ attach database 'youtube-metadata-pg4096.sqlite3' as ytm;
CREATE TABLE authors (id integer primary key autoincrement, name text not null unique); CREATE TABLE authors (id integer primary key autoincrement, name text not null unique);
insert into authors (name) select distinct author from ytm.videoData; insert into authors (name) select author from ytm.videoData group by author having count(*) >= 3; -- authors with at least 3 vids in database
create table videoData as select * from ytm.videoData order by author; -- important to sort here so it can be fetched quickly by author; create table videoData as select * from ytm.videoData order by author; -- important to sort here so it can be fetched quickly by author;
@ -17,6 +17,7 @@ CREATE TABLE "sponsorTimes" (
"startTime" REAL NOT NULL, "startTime" REAL NOT NULL,
"endTime" REAL NOT NULL, "endTime" REAL NOT NULL,
"votes" INTEGER NOT NULL, "votes" INTEGER NOT NULL,
"category" TEXT NOT NULL,
"shadowHidden" INTEGER NOT NULL, "shadowHidden" INTEGER NOT NULL,
"authorID" INTEGER); "authorID" INTEGER);
@ -24,7 +25,7 @@ CREATE INDEX "sponsorTimes_videoID" ON "sponsorTimes" ("videoID");
CREATE INDEX sptauth on sponsorTimes(authorID); CREATE INDEX sptauth on sponsorTimes(authorID);
insert into sponsorTimes insert into sponsorTimes
select s.videoID, s.startTime, s.endTime, s.votes, s.shadowHidden, authors.id select s.videoID, s.startTime, s.endTime, s.votes, s.category, s.shadowHidden, authors.id
from ytm.sponsorTimes s from ytm.sponsorTimes s
join ytm.videoData v on v.videoID = s.videoID join ytm.videoData v on v.videoID = s.videoID
join authors on authors.name = v.author join authors on authors.name = v.author

View File

@ -2,7 +2,7 @@
<meta charset="utf-8"> <meta charset="utf-8">
<title>Sponsorblock stat by uploader</title> <title>Sponsorship stat by uploader</title>
<meta http-equiv="X-UA-Compatible" content="IE=edge"> <meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1"> <meta name="viewport" content="width=device-width, initial-scale=1">

View File

@ -3,17 +3,20 @@
"dev": "webpack serve" "dev": "webpack serve"
}, },
"dependencies": { "dependencies": {
"@types/better-sqlite3": "^5.4.1", "@types/debounce-promise": "^3.1.3",
"@types/react": "^17.0.3", "@types/react": "^17.0.3",
"@types/react-dom": "^17.0.3", "@types/react-dom": "^17.0.3",
"@types/react-plotly.js": "^2.2.4",
"@types/react-select": "^4.0.14", "@types/react-select": "^4.0.14",
"@types/sql.js": "^1.4.2", "@types/sql.js": "^1.4.2",
"better-sqlite3": "^7.1.4",
"comlink": "^4.3.0", "comlink": "^4.3.0",
"debounce-promise": "^3.1.2",
"mobx": "^6.2.0", "mobx": "^6.2.0",
"mobx-react": "^7.1.0", "mobx-react": "^7.1.0",
"plotly.js": "^1.58.4",
"react": "^17.0.2", "react": "^17.0.2",
"react-dom": "^17.0.2", "react-dom": "^17.0.2",
"react-plotly.js": "^2.5.1",
"react-select": "^4.3.0", "react-select": "^4.3.0",
"ts-loader": "^8.1.0", "ts-loader": "^8.1.0",
"ts-node": "^9.1.1", "ts-node": "^9.1.1",

View File

@ -21,8 +21,7 @@ CFLAGS = \
-O2 \ -O2 \
-DSQLITE_OMIT_LOAD_EXTENSION \ -DSQLITE_OMIT_LOAD_EXTENSION \
-DSQLITE_DISABLE_LFS \ -DSQLITE_DISABLE_LFS \
-DSQLITE_ENABLE_FTS3 \ -DSQLITE_ENABLE_FTS5 \
-DSQLITE_ENABLE_FTS3_PARENTHESIS \
-DSQLITE_ENABLE_JSON1 \ -DSQLITE_ENABLE_JSON1 \
-DSQLITE_THREADSAFE=0 \ -DSQLITE_THREADSAFE=0 \
-DSQLITE_ENABLE_NORMALIZE -DSQLITE_ENABLE_NORMALIZE
@ -54,8 +53,7 @@ EMFLAGS_WASM = \
EMFLAGS_OPTIMIZED= \ EMFLAGS_OPTIMIZED= \
-s INLINING_LIMIT=50 \ -s INLINING_LIMIT=50 \
-O3 \ -O3 \
-flto \ -flto
--closure 1
EMFLAGS_DEBUG = \ EMFLAGS_DEBUG = \
-s INLINING_LIMIT=10 \ -s INLINING_LIMIT=10 \

View File

@ -816,10 +816,10 @@ Module["onRuntimeInitialized"] = function onRuntimeInitialized() {
// (created by create_function call) // (created by create_function call)
this.functions = {}; this.functions = {};
} }
function UrlDatabase(url, chunksize = 4096) {
this.filename = url.replace("/", "_"); function UrlDatabase(filename, lazyFile) {
const {createLazyFile} = require('../../src/lazyFile'); this.filename = filename;
this.lazyFile = createLazyFile(FS, "/", this.filename, url, true, true, chunksize); this.lazyFile = lazyFile;
const ret = sqlite3_open(this.filename, apiTemp, 1, null); const ret = sqlite3_open(this.filename, apiTemp, 1, null);
this.db = getValue(apiTemp, "i32"); this.db = getValue(apiTemp, "i32");
this.handleError(ret); this.handleError(ret);
@ -1231,9 +1231,9 @@ Module["onRuntimeInitialized"] = function onRuntimeInitialized() {
// export Database to Module // export Database to Module
Module.Database = Database; Module.Database = Database;
Module.VfsDatabase = VfsDatabase; Module["VfsDatabase"] = VfsDatabase;
Module.UrlDatabase = UrlDatabase; Module["UrlDatabase"] = UrlDatabase;
Module.FS = FS; Module["FS"] = FS;
VfsDatabase.prototype = Object.create(Database.prototype); VfsDatabase.prototype = Object.create(Database.prototype);
UrlDatabase.prototype = Object.create(Database.prototype); UrlDatabase.prototype = Object.create(Database.prototype);
}; };

View File

@ -1,57 +1,228 @@
import { observer, useLocalObservable } from "mobx-react"; import { observer, useLocalObservable } from "mobx-react";
import * as React from "react"; import * as React from "react";
import { Database } from "sql.js"; import { Database } from "sql.js";
import { authorsSearch, createDbWorker, getForAuthor } from "./db"; import {
import { makeAutoObservable, makeObservable, observable } from "mobx"; authorsSearch,
import AsyncSelect from 'react-select/async'; createDbWorker,
getForAuthor,
SponsorInfo,
SqliteWorker,
VideoMeta,
} from "./db";
import { action, makeAutoObservable, makeObservable, observable } from "mobx";
import AsyncSelect from "react-select/async";
import debounce from "debounce-promise";
import Plot from "react-plotly.js";
function formatDuration(sec_num: number) {
const hours = Math.floor(sec_num / 3600);
const minutes = Math.floor((sec_num - hours * 3600) / 60);
const seconds = Math.round(sec_num - hours * 3600 - minutes * 60);
return (
String(hours).padStart(2, "0") +
":" +
String(minutes).padStart(2, "0") +
":" +
String(seconds).padStart(2, "0")
);
}
const SponsorPlot: React.FC<{
data: SponsorInfo[];
onHover: (m: SponsorInfo) => void;
}> = observer((p) => {
console.log("RERENTDERING PLOT");
return (
<Plot
style={{ width: "100%", maxWidth: "1200px", margin: "0 auto" }}
onClick={(e) => {
console.log("hover", e);
const element = p.data[e.points[0].pointIndex];
if (element) p.onHover(element);
}}
data={[
{
x: p.data.map((e) => new Date(e.meta.published * 1000)),
y: p.data.map((e) => e.percentSponsor / 100),
text: p.data.map(
(e) =>
`<b>${e.meta.title}</b><br>
published ${new Date(
e.meta.published * 1000
).toLocaleDateString()}<br>
Length: ${formatDuration(e.meta.lengthSeconds)}<br>
Sponsor duration: ${formatDuration(
e.durationSponsor
)} (<b>${e.percentSponsor.toFixed(0)}%</b>)`
),
hovertemplate: "%{text}",
type: "scatter",
mode: "markers",
},
]}
layout={{
autosize: true,
yaxis: { tickformat: ",.0%", title: "Part that is Sponsorship" },
xaxis: { title: "Upload date" },
}}
/>
);
});
type SqliteStats = {
filename: string;
totalBytes: number;
totalFetchedBytes: number;
totalRequests: number;
};
function formatBytes(b: number) {
if (b > 1e6) {
return (b / 1e6).toFixed(2) + "MB";
}
if (b > 1e3) {
return (b / 1e3).toFixed(2) + "KB";
}
return b + "B";
}
const SqliteStats: React.FC<{ stats: SqliteStats }> = observer(({ stats }) => {
return (
<>
Sqlite stats: fetched {formatBytes(stats.totalFetchedBytes)} in{" "}
{stats.totalRequests} requests (DB size: {formatBytes(stats.totalBytes)})
</>
);
});
const VideoMetaDisplay: React.FC<{ video: SponsorInfo }> = observer(
({ video }) => {
return (
<div>
<a href={`https://youtube.com/watch?v=${video.meta.videoID}`}>
<img
src={video.meta.maxresdefault_thumbnail}
width={200}
style={{ float: "left" }}
></img>
<h4>{video.meta.title}</h4>
</a>
{video.meta.viewCount} views
<br />
published {new Date(video.meta.published * 1000).toLocaleDateString()}
<br />
Length: {formatDuration(video.meta.lengthSeconds)}
<br />
Sponsor duration: {formatDuration(video.durationSponsor)} (
<b>{video.percentSponsor.toFixed(0)}%</b>)
</div>
);
}
);
@observer @observer
export class UI extends React.Component { export class UI extends React.Component {
worker: SqliteWorker | null = null;
db: Database | null = null; db: Database | null = null;
@observable authorSearch = "";
@observable @observable
suggestions = { data:
error: "type more", | { state: "noinput" }
results: [] as string[], | { state: "loading"; author: string }
| { state: "loaded"; author: string; segs: SponsorInfo[] } = {
state: "noinput",
}; };
@observable @observable
data: { author: string; segs: any[] } | null = null; stats: SqliteStats | null = null;
@observable
focussedVideo: SponsorInfo | null = null;
@observable searchInput: string = "";
constructor(p: {}) { constructor(p: {}) {
super(p); super(p);
this.init(); this.init();
makeObservable(this); makeObservable(this);
} }
async init() { interval: number = 0;
this.db = await createDbWorker(); componentDidMount() {
this.interval = setInterval(async () => {
this.stats = (await this.worker?.getStats()) || null;
}, 1000);
} }
async setAuthor(t: string) { componentWillUnmount() {
this.authorSearch = t; clearInterval(this.interval);
}
async init() {
console.log("INIT");
const res = await createDbWorker();
this.db = res.db;
this.worker = res.worker;
const initialAuthor = new URLSearchParams(location.search).get("uploader");
if (initialAuthor) this.setAuthor(initialAuthor);
}
async setAuthor(search: string) {
this.searchInput = search;
this.focussedVideo = null;
if (this.db) { if (this.db) {
const search = this.authorSearch;
this.data = { this.data = {
state: "loading",
author: search,
};
this.data = {
state: "loaded",
author: search, author: search,
segs: await getForAuthor(this.db, search), segs: await getForAuthor(this.db, search),
}; };
console.log("data", this.data); console.log("data", this.data);
{
const searchParams = new URLSearchParams(location.search);
searchParams.set("uploader", search);
window.history.replaceState(null, document.title, "?" + searchParams);
}
} }
} }
async authorsSearch(search: string) { async authorsSearch(search: string) {
return (await authorsSearch(this.db!, search)) if (this.db) {
return await authorsSearch(this.db, search);
} }
return [];
}
authorsSearchDebounce = debounce(this.authorsSearch.bind(this), 250, {
leading: true,
});
@action
setFocussed = (e: SponsorInfo) => (this.focussedVideo = e);
render() { render() {
return ( return (
<div> <div>
<div> <div>
Search for author:{" "} Search for YouTuber:{" "}
<AsyncSelect<{name: string}> cacheOptions defaultOptions loadOptions={this.authorsSearch.bind(this)} /> <AsyncSelect<{ name: string }>
cacheOptions
inputValue={this.searchInput}
onInputChange={(e) => (this.searchInput = e)}
loadOptions={this.authorsSearchDebounce}
getOptionLabel={(e) => e.name}
getOptionValue={(e) => e.name}
onChange={(e) => this.setAuthor(e.name)}
/>
</div> </div>
{this.data && ( {this.data.state === "noinput" ? (
<></>
) : this.data.state === "loading" ? (
<div>Loading videos for author "{this.data.author}"</div>
) : (
<div> <div>
Found {this.data.segs.length} videos for author "{this.data.author}" <p>
Found {this.data.segs.length} videos with sponsorships from{" "}
{this.data.author}
</p>{" "}
<SponsorPlot data={this.data.segs} onHover={this.setFocussed} />
</div> </div>
)} )}
{this.focussedVideo && <VideoMetaDisplay video={this.focussedVideo} />}
<footer style={{ marginTop: "5em", color: "gray" }}>
{this.stats ? <SqliteStats stats={this.stats} /> : ""}{" "}
</footer>
</div> </div>
); );
} }

View File

@ -1,7 +1,5 @@
import initSqlJs from "../sql.js/dist/sql-wasm-debug.js";
import wasmfile from "../sql.js/dist/sql-wasm-debug.wasm";
import * as Comlink from "comlink"; import * as Comlink from "comlink";
import SqliteWorker from "./sqlite.worker"; import SqliteWorker, { SplitFileConfig } from "./sqlite.worker";
import { chooseSegments, DBSegment } from "./util"; import { chooseSegments, DBSegment } from "./util";
import { SqliteMod } from "./sqlite.worker.js"; import { SqliteMod } from "./sqlite.worker.js";
@ -19,11 +17,17 @@ Comlink.transferHandlers.set("WORKERSQLPROXIES", {
return Comlink.wrap(port); return Comlink.wrap(port);
}, },
}); });
export type SqliteWorker = Comlink.Remote<SqliteMod>;
export async function createDbWorker() { export async function createDbWorker() {
const sqlite = Comlink.wrap<SqliteMod>(new SqliteWorker()); const sqlite = Comlink.wrap<SqliteMod>(new SqliteWorker());
const chunkSize = 4096; const chunkSize = 4096;
const db = await sqlite.UrlDatabase("db.sqlite3", chunkSize); const configUrl = new URL("dist/data/config.json", location.href);
const config: SplitFileConfig = await fetch(configUrl.toString()).then(e => e.json());
const db = await sqlite.SplitFileHttpDatabase({
...config,
urlPrefix: new URL(config.urlPrefix, configUrl).toString(),
});
const pageSizeResp = await db.exec("pragma page_size"); const pageSizeResp = await db.exec("pragma page_size");
const pageSize = pageSizeResp[0].values[0][0]; const pageSize = pageSizeResp[0].values[0][0];
if (pageSize !== chunkSize) if (pageSize !== chunkSize)
@ -31,8 +35,7 @@ export async function createDbWorker() {
`Chunk size does not match page size: pragma page_size = ${pageSize} but chunkSize = ${chunkSize}` `Chunk size does not match page size: pragma page_size = ${pageSize} but chunkSize = ${chunkSize}`
); );
return { worker: sqlite, db };
return db;
} }
async function testLoop(metaDb: Database) { async function testLoop(metaDb: Database) {
@ -66,17 +69,29 @@ function toObjects<T>(res: QueryExecResult[]): T[] {
return o as T; return o as T;
}); });
} }
type VideoMeta = { export type VideoMeta = {
videoID: string; videoID: string;
published: number;
lengthSeconds: number;
title: string; title: string;
maxresdefault_thumbnail: string;
published: number;
publishedText: string;
viewCount: number;
likeCount: number;
author: string;
authorURL: string;
channelThumbnail: string;
lengthSeconds: number;
category: string;
}; };
export async function authorsSearch(db: Database, author: string) { export async function authorsSearch(db: Database, author: string) {
try { try {
const query_inner = author.split(" ").map(n => n.replace(/"/g, "")).map(e => `"${e}"`).join(" "); const query_inner = author
.split(" ")
.map((n) => n.replace(/"/g, ""))
.map((e) => `"${e}"*`)
.join(" ");
const query = `NEAR(${query_inner})`; const query = `NEAR(${query_inner})`;
const sql_query = `select name from authors_search where name match ? order by rank limit 20`; const sql_query = `select name from authors_search where name match ? limit 20`;
console.log("executing search query", query, sql_query); console.log("executing search query", query, sql_query);
const ret = toObjects<{ name: string }>(await db.exec(sql_query, [query])); const ret = toObjects<{ name: string }>(await db.exec(sql_query, [query]));
return ret; return ret;
@ -85,7 +100,15 @@ export async function authorsSearch(db: Database, author: string) {
throw e; throw e;
} }
} }
export async function getForAuthor(db: Database, author: string) { export type SponsorInfo = {
meta: VideoMeta;
durationSponsor: number;
percentSponsor: number;
};
export async function getForAuthor(
db: Database,
author: string
): Promise<SponsorInfo[]> {
/*await db.exec(`select s.rowid from sponsorTimes s /*await db.exec(`select s.rowid from sponsorTimes s
join videoData v on s.videoid = v.videoid join videoData v on s.videoid = v.videoid
@ -93,7 +116,7 @@ export async function getForAuthor(db: Database, author: string) {
const videos = toObjects<VideoMeta>( const videos = toObjects<VideoMeta>(
await db.exec( await db.exec(
"select videoData.videoID, published, lengthSeconds, title from videoData where author = ?", "select * from videoData where author = ? order by published asc",
[author] [author]
) )
); );
@ -102,7 +125,7 @@ export async function getForAuthor(db: Database, author: string) {
await db.exec( await db.exec(
// "select videoData.videoID, published, lengthSeconds, title from videoData join sponsorTimes on sponsorTimes.videoID = videoData.videoID where author = ? order by sponsorTimes.rowid asc", // "select videoData.videoID, published, lengthSeconds, title from videoData join sponsorTimes on sponsorTimes.videoID = videoData.videoID where author = ? order by sponsorTimes.rowid asc",
// [author] // [author]
"select * from sponsorTimes where authorID = (select id from authors where name = ?) order by videoID, startTime", "select * from sponsorTimes where authorID = (select id from authors where name = ?) and not shadowHidden and category = 'sponsor' order by videoID, startTime",
[author] [author]
) )
); // select sponsorTimes.rowid, sponsorTimes.videoID from videoData join sponsorTimes on sponsorTimes.videoID = videoData.videoID where author = 'Adam Ragusea'; ); // select sponsorTimes.rowid, sponsorTimes.videoID from videoData join sponsorTimes on sponsorTimes.videoID = videoData.videoID where author = 'Adam Ragusea';
@ -130,17 +153,14 @@ export async function getForAuthor(db: Database, author: string) {
const segments = chooseSegments(sponsorTimes.filter((s) => s.votes > -1)); const segments = chooseSegments(sponsorTimes.filter((s) => s.votes > -1));
const duration = segments const duration = segments
.map((m) => m.endTime - m.startTime) .map((m) => m.endTime - m.startTime)
.reduce((a, b) => a + b); .reduce((a, b) => a + b, 0);
const total = video.meta.lengthSeconds; const total = video.meta.lengthSeconds;
const percent = (duration / total) * 100; const percentSponsor = (duration / total) * 100;
out.push({ out.push({
meta: video.meta,
durationSponsor: duration, durationSponsor: duration,
durationTotal: total, percentSponsor,
percent, });
videoTitle: video.meta.title,
videoID: video.meta.videoID,
published: new Date(video.meta.published * 1000)
})
} }
return out; return out;
} }

View File

@ -4,6 +4,18 @@
// XHR, which is not possible in browsers except in a web worker! Use preloading, // XHR, which is not possible in browsers except in a web worker! Use preloading,
// either --preload-file in emcc or FS.createPreloadedFile // either --preload-file in emcc or FS.createPreloadedFile
export type RangeMapper = (
fromByte: number,
toByte: number
) => { url: string; fromByte: number; toByte: number };
export type LazyFileConfig = {
rangeMapper: RangeMapper;
/** must be known beforehand if there's multiple server chunks (i.e. rangeMapper returns different urls) */
fileLength?: number;
requestChunkSize: number;
};
// Lazy chunked Uint8Array (implements get and length from Uint8Array). Actual getting is abstracted away for eventual reuse. // Lazy chunked Uint8Array (implements get and length from Uint8Array). Actual getting is abstracted away for eventual reuse.
class LazyUint8Array { class LazyUint8Array {
lengthKnown = false; lengthKnown = false;
@ -14,7 +26,17 @@ class LazyUint8Array {
lastEnd = 0; lastEnd = 0;
speed = 1; speed = 1;
constructor(private _chunkSize: number, private url: string) {} _chunkSize: number;
rangeMapper: RangeMapper;
constructor(config: LazyFileConfig) {
this._chunkSize = config.requestChunkSize;
this.rangeMapper = config.rangeMapper;
if(config.fileLength) {
this._length = config.fileLength;
this.lengthKnown = true;
}
}
get(idx: number) { get(idx: number) {
if (idx > this.length - 1 || idx < 0) { if (idx > this.length - 1 || idx < 0) {
return undefined; return undefined;
@ -33,14 +55,24 @@ class LazyUint8Array {
this.speed = 1; this.speed = 1;
} }
const chunksToFetch = this.speed; const chunksToFetch = this.speed;
let end = (chunkNum + chunksToFetch) * this.chunkSize - 1; // including this byte let endByte = (chunkNum + chunksToFetch) * this.chunkSize - 1; // including this byte
end = Math.min(end, this.length - 1); // if datalength-1 is selected, this is the last block endByte = Math.min(endByte, this.length - 1); // if datalength-1 is selected, this is the last block
this.lastEnd = end; this.lastEnd = endByte;
const buf = this.doXHR(start, end); const buf = this.doXHR(start, endByte);
for (let i = 0; i < chunksToFetch; i++) { for (let i = 0; i < chunksToFetch; i++) {
const curChunk = chunkNum + i; const curChunk = chunkNum + i;
this.chunks[curChunk] = new Uint8Array(buf, i * this.chunkSize, this.chunkSize); if (i * this.chunkSize >= buf.byteLength) break; // past end of file
const curSize =
(i + i) * this.chunkSize > buf.byteLength
? buf.byteLength - i * this.chunkSize
: this.chunkSize;
// console.log("constructing chunk", buf.byteLength, i * this.chunkSize, curSize);
this.chunks[curChunk] = new Uint8Array(
buf,
i * this.chunkSize,
curSize
);
} }
} }
if (typeof this.chunks[chunkNum] === "undefined") if (typeof this.chunks[chunkNum] === "undefined")
@ -50,10 +82,11 @@ class LazyUint8Array {
cacheLength() { cacheLength() {
// Find length // Find length
var xhr = new XMLHttpRequest(); var xhr = new XMLHttpRequest();
xhr.open("HEAD", this.url, false); const url = this.rangeMapper(0, 0).url;
xhr.open("HEAD", url, false);
xhr.send(null); xhr.send(null);
if (!((xhr.status >= 200 && xhr.status < 300) || xhr.status === 304)) if (!((xhr.status >= 200 && xhr.status < 300) || xhr.status === 304))
throw new Error("Couldn't load " + this.url + ". Status: " + xhr.status); throw new Error("Couldn't load " + url + ". Status: " + xhr.status);
var datalength = Number(xhr.getResponseHeader("Content-length")); var datalength = Number(xhr.getResponseHeader("Content-length"));
var header; var header;
@ -82,22 +115,32 @@ class LazyUint8Array {
} }
return this._chunkSize!; return this._chunkSize!;
} }
private doXHR(from: number, to: number) { private doXHR(absoluteFrom: number, absoluteTo: number) {
console.log(`- [xhr of size ${(to + 1-from)/1024} KiB]`); console.log(
this.totalFetchedBytes += to - from; `- [xhr of size ${(absoluteTo + 1 - absoluteFrom) / 1024} KiB]`
this.totalRequests++;
if (from > to)
throw new Error(
"invalid range (" + from + ", " + to + ") or no bytes requested!"
); );
if (to > this.length - 1) this.totalFetchedBytes += absoluteTo - absoluteFrom;
this.totalRequests++;
if (absoluteFrom > absoluteTo)
throw new Error(
"invalid range (" +
absoluteFrom +
", " +
absoluteTo +
") or no bytes requested!"
);
if (absoluteTo > this.length - 1)
throw new Error( throw new Error(
"only " + this.length + " bytes available! programmer error!" "only " + this.length + " bytes available! programmer error!"
); );
const { fromByte: from, toByte: to, url } = this.rangeMapper(
absoluteFrom,
absoluteTo
);
// TODO: Use mozResponseArrayBuffer, responseStream, etc. if available. // TODO: Use mozResponseArrayBuffer, responseStream, etc. if available.
var xhr = new XMLHttpRequest(); var xhr = new XMLHttpRequest();
xhr.open("GET", this.url, false); xhr.open("GET", url, false);
if (this.length !== this.chunkSize) if (this.length !== this.chunkSize)
xhr.setRequestHeader("Range", "bytes=" + from + "-" + to); xhr.setRequestHeader("Range", "bytes=" + from + "-" + to);
@ -109,7 +152,7 @@ class LazyUint8Array {
xhr.send(null); xhr.send(null);
if (!((xhr.status >= 200 && xhr.status < 300) || xhr.status === 304)) if (!((xhr.status >= 200 && xhr.status < 300) || xhr.status === 304))
throw new Error("Couldn't load " + this.url + ". Status: " + xhr.status); throw new Error("Couldn't load " + url + ". Status: " + xhr.status);
if (xhr.response !== undefined) { if (xhr.response !== undefined) {
return xhr.response as ArrayBuffer; return xhr.response as ArrayBuffer;
} else { } else {
@ -122,24 +165,15 @@ export function createLazyFile(
FS: any, FS: any,
parent: string, parent: string,
name: string, name: string,
url: string,
canRead: boolean, canRead: boolean,
canWrite: boolean, canWrite: boolean,
chunkSize: number = 4096 lazyFileConfig: LazyFileConfig
) { ) {
var lazyArray = new LazyUint8Array(chunkSize, url); var lazyArray = new LazyUint8Array(lazyFileConfig);
var properties = { isDevice: false, contents: lazyArray }; var properties = { isDevice: false, contents: lazyArray };
var node = FS.createFile(parent, name, properties, canRead, canWrite); var node = FS.createFile(parent, name, properties, canRead, canWrite);
// This is a total hack, but I want to get this lazy file code out of the node.contents = lazyArray;
// core of MEMFS. If we want to keep this lazy file concept I feel it should
// be its own thin LAZYFS proxying calls to MEMFS.
if (properties.contents) {
node.contents = properties.contents;
} else if (properties.url) {
node.contents = null;
node.url = properties.url;
}
// Add a function that defers querying the file size until it is asked the first time. // Add a function that defers querying the file size until it is asked the first time.
Object.defineProperties(node, { Object.defineProperties(node, {
usedBytes: { usedBytes: {

View File

@ -1,13 +1,14 @@
import * as Comlink from "comlink"; import * as Comlink from "comlink";
import wasmfile from "../sql.js/dist/sql-wasm-debug.wasm"; import wasmfile from "../sql.js/dist/sql-wasm-debug.wasm";
import initSqlJs from "../sql.js/dist/sql-wasm-debug.js"; import initSqlJs from "../sql.js/dist/sql-wasm-debug.js";
import { createLazyFile, RangeMapper } from "./lazyFile";
type Database = import("sql.js").Database; import { getSyntheticTrailingComments } from "typescript";
import { Database } from "sql.js";
// https://gist.github.com/frankier/4bbc85f65ad3311ca5134fbc744db711 // https://gist.github.com/frankier/4bbc85f65ad3311ca5134fbc744db711
function initTransferHandlers(sql: typeof import("sql.js")) { function initTransferHandlers(sql: typeof import("sql.js")) {
Comlink.transferHandlers.set("WORKERSQLPROXIES", { Comlink.transferHandlers.set("WORKERSQLPROXIES", {
canHandle: (obj) => { canHandle: (obj): obj is unknown => {
let isDB = obj instanceof sql.Database; let isDB = obj instanceof sql.Database;
let hasDB = obj.db && obj.db instanceof sql.Database; // prepared statements let hasDB = obj.db && obj.db instanceof sql.Database; // prepared statements
return isDB || hasDB; return isDB || hasDB;
@ -24,15 +25,6 @@ function initTransferHandlers(sql: typeof import("sql.js")) {
}); });
} }
function stats(db: Database) {
console.log(
db.filename,
"total bytes fetched:",
db.lazyFile.contents.totalFetchedBytes,
"total requests:",
db.lazyFile.contents.totalRequests
);
}
async function init() { async function init() {
const sql = await initSqlJs({ const sql = await initSqlJs({
locateFile: (_file: string) => wasmfile, locateFile: (_file: string) => wasmfile,
@ -41,13 +33,49 @@ async function init() {
return sql; return sql;
} }
const sqljs = init(); const sqljs = init();
export type SplitFileConfig = {
urlPrefix: string;
serverChunkSize: number;
databaseLengthBytes: number;
requestChunkSize: number;
};
const mod = { const mod = {
async UrlDatabase(url: string, chunkSize: number): Promise<Database> { db: null as null | Database,
async SplitFileHttpDatabase(p: SplitFileConfig): Promise<Database> {
const sql = await sqljs; const sql = await sqljs;
const db = new sql.UrlDatabase(url, chunkSize); console.log("constructing url database");
const rangeMapper: RangeMapper = (from: number, to: number) => {
const serverChunkId = (from / p.serverChunkSize) | 0;
const serverFrom = from % p.serverChunkSize;
const serverTo = serverFrom + (to - from);
return {
url: p.urlPrefix + String(serverChunkId).padStart(3, "0"),
fromByte: serverFrom,
toByte: serverTo,
};
};
setInterval(() => stats(db), 10000); const filename = p.urlPrefix.replace(/\//g, "_");
return db; console.log("filename", filename);
const lazyFile = createLazyFile(sql.FS, "/", filename, true, true, {
rangeMapper,
requestChunkSize: p.requestChunkSize,
fileLength: p.databaseLengthBytes,
});
this.db = new sql.UrlDatabase(filename, lazyFile);
return this.db!;
},
async getStats() {
const db = this.db;
if (!db) return null;
return {
filename: db.filename,
totalBytes: db.lazyFile.contents.length,
totalFetchedBytes: db.lazyFile.contents.totalFetchedBytes,
totalRequests: db.lazyFile.contents.totalRequests,
};
}, },
}; };
export type SqliteMod = typeof mod; export type SqliteMod = typeof mod;

View File

@ -7,6 +7,7 @@ const ts = {
module.exports = { module.exports = {
entry: "./src", entry: "./src",
mode: "development", mode: "development",
devtool: "source-map",
module: { module: {
rules: [ rules: [
{ {
@ -34,6 +35,10 @@ module.exports = {
path: path.resolve(__dirname, "dist"), path: path.resolve(__dirname, "dist"),
}, },
stats: { stats: {
children: true children: true,
} },
devServer: {
hot: false,
liveReload: false,
},
}; };

3299
yarn.lock

File diff suppressed because it is too large Load Diff