1
0
mirror of https://github.com/c9fe/22120.git synced 2024-11-10 04:52:43 +01:00

"Up deps"

This commit is contained in:
Cris Stringfellow 2021-12-17 15:43:37 +08:00
parent 143dd9e77c
commit bf011f0141
3 changed files with 83 additions and 61 deletions

View File

@ -11,8 +11,9 @@
import fs from 'fs';
// search related
import FlexSearch from 'flexsearch';
import { createIndex as NDX, addDocumentToIndex as ndx } from 'ndx';
import { query as NDXQuery } from 'ndx-query';
//import { createIndex as NDX, addDocumentToIndex as ndx } from 'ndx';
//import { query as NDXQuery } from 'ndx-query';
import { DocumentIndex } from 'ndx';
import Nat from 'natural';
import args from './args.js';
@ -623,13 +624,12 @@ export default Archivist;
State.Cache = new Map(JSON.parse(Fs.readFileSync(cacheFile)));
State.Index = new Map(JSON.parse(Fs.readFileSync(indexFile)));
console.log(Flex);
Fs.readdirSync(ftsDir, {withFileTypes:true}).forEach(dirEnt => {
if ( dirEnt.isFile() ) {
const content = Fs.readFileSync(Path.resolve(ftsDir, dirEnt.name)).toString();
const result = Flex.import(dirEnt.name, JSON.parse(content));
console.log('Imported', dirEnt.name, result);
console.log(Flex);
DEBUG && console.log('Imported', dirEnt.name, result);
DEBUG && console.log(Flex);
}
});
@ -774,55 +774,67 @@ export default Archivist;
}
function NDXIndex(fields) {
// source:
// adapted from:
// https://github.com/ndx-search/docs/blob/94530cbff6ae8ea66c54bba4c97bdd972518b8b4/README.md#creating-a-simple-indexer-with-a-search-function
// Old code (from newer, in my opinion, worse, version)
/*
// source:
// adapted from:
// https://github.com/ndx-search/docs/blob/94530cbff6ae8ea66c54bba4c97bdd972518b8b4/README.md#creating-a-simple-indexer-with-a-search-function
if ( ! new.target ) { throw `NDXIndex must be called with 'new'`; }
if ( ! new.target ) { throw `NDXIndex must be called with 'new'`; }
// `createIndex()` creates an index data structure.
// First argument specifies how many different fields we want to index.
const index = NDX(fields.length);
// `fieldAccessors` is an array with functions that used to retrieve data from different fields.
const fieldAccessors = fields.map(f => doc => doc[f.name]);
// `fieldBoostFactors` is an array of boost factors for each field, in this example all fields will have
// identical factors.
const fieldBoostFactors = fields.map(() => 1);
return {
index,
// `add()` function will add documents to the index.
add: doc => ndx(
index,
fieldAccessors,
// Tokenizer is a function that breaks text into words, phrases, symbols, or other meaningful elements
// called tokens.
// Lodash function `words()` splits string into an array of its words, see https://lodash.com/docs/#words for
// details.
words,
// Filter is a function that processes tokens and returns terms, terms are used in Inverted Index to
// index documents.
termFilter,
// Document key, it can be a unique document id or a refernce to a document if you want to store all documents
// in memory.
doc.url,
// Document.
doc,
),
// `search()` function will be used to perform queries.
search: q => NDXQuery(
index,
fieldBoostFactors,
// BM25 ranking function constants:
1.2, // BM25 k1 constant, controls non-linear term frequency normalization (saturation).
0.75, // BM25 b constant, controls to what degree document length normalizes tf values.
words,
termFilter,
// Set of removed documents, in this example we don't want to support removing documents from the index,
// so we can ignore it by specifying this set as `undefined` value.
undefined,
q,
),
};
*/
// Even older code (from older but, to me, much better, version: 0.4.1)
const index = new DocumentIndex();
fields.forEach(name => index.addField(name));
// `createIndex()` creates an index data structure.
// First argument specifies how many different fields we want to index.
const index = NDX(fields.length);
// `fieldAccessors` is an array with functions that used to retrieve data from different fields.
const fieldAccessors = fields.map(f => doc => doc[f.name]);
// `fieldBoostFactors` is an array of boost factors for each field, in this example all fields will have
// identical factors.
const fieldBoostFactors = fields.map(() => 1);
return {
index,
// `add()` function will add documents to the index.
add: doc => ndx(
index,
fieldAccessors,
// Tokenizer is a function that breaks text into words, phrases, symbols, or other meaningful elements
// called tokens.
// Lodash function `words()` splits string into an array of its words, see https://lodash.com/docs/#words for
// details.
words,
// Filter is a function that processes tokens and returns terms, terms are used in Inverted Index to
// index documents.
termFilter,
// Document key, it can be a unique document id or a refernce to a document if you want to store all documents
// in memory.
doc.url,
// Document.
doc,
),
// `search()` function will be used to perform queries.
search: q => NDXQuery(
index,
fieldBoostFactors,
// BM25 ranking function constants:
1.2, // BM25 k1 constant, controls non-linear term frequency normalization (saturation).
0.75, // BM25 b constant, controls to what degree document length normalizes tf values.
words,
termFilter,
// Set of removed documents, in this example we don't want to support removing documents from the index,
// so we can ignore it by specifying this set as `undefined` value.
undefined,
q,
),
search: query => index.search(query),
add: doc => index.add(doc.url, doc)
};
}

26
package-lock.json generated
View File

@ -16,7 +16,7 @@
"flexsearch": "latest",
"hasha": "latest",
"natural": "^5.1.11",
"ndx": "^1.0.2",
"ndx": "^0.4.1",
"ndx-query": "^1.0.1",
"ndx-serializable": "^1.0.0",
"node-fetch": "latest",
@ -3987,9 +3987,9 @@
}
},
"node_modules/ndx": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/ndx/-/ndx-1.0.2.tgz",
"integrity": "sha512-/TbqqemJ80lGKRoRuXsz7VgA0erkIxilCUbkMfRL1h2VBGBLGvQnI+FdHvWDqJnUhgOP/T9+SYeWS84wbXGBFA=="
"version": "0.4.1",
"resolved": "https://registry.npmjs.org/ndx/-/ndx-0.4.1.tgz",
"integrity": "sha512-iXpb4uQmgBxqiPtO1PojXxYoTJyC0TmeFE20VTkGYakiihnO84atCLiRZQmzYW3UqzFL8FaXPMGOJDm276tjUA=="
},
"node_modules/ndx-query": {
"version": "1.0.1",
@ -3999,6 +3999,11 @@
"ndx": "^1.0.2"
}
},
"node_modules/ndx-query/node_modules/ndx": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/ndx/-/ndx-1.0.2.tgz",
"integrity": "sha512-/TbqqemJ80lGKRoRuXsz7VgA0erkIxilCUbkMfRL1h2VBGBLGvQnI+FdHvWDqJnUhgOP/T9+SYeWS84wbXGBFA=="
},
"node_modules/ndx-serializable": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/ndx-serializable/-/ndx-serializable-1.0.0.tgz",
@ -11007,9 +11012,9 @@
"dev": true
},
"ndx": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/ndx/-/ndx-1.0.2.tgz",
"integrity": "sha512-/TbqqemJ80lGKRoRuXsz7VgA0erkIxilCUbkMfRL1h2VBGBLGvQnI+FdHvWDqJnUhgOP/T9+SYeWS84wbXGBFA=="
"version": "0.4.1",
"resolved": "https://registry.npmjs.org/ndx/-/ndx-0.4.1.tgz",
"integrity": "sha512-iXpb4uQmgBxqiPtO1PojXxYoTJyC0TmeFE20VTkGYakiihnO84atCLiRZQmzYW3UqzFL8FaXPMGOJDm276tjUA=="
},
"ndx-query": {
"version": "1.0.1",
@ -11017,6 +11022,13 @@
"integrity": "sha512-ybm/bt2WDwDzoUDXKrqW+oHKPV9qF9E8ICqZUWZDYgPvogMZ49eaXnCJ1jP9V+bkgR98EebS7ylE1DIjwqvl4g==",
"requires": {
"ndx": "^1.0.2"
},
"dependencies": {
"ndx": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/ndx/-/ndx-1.0.2.tgz",
"integrity": "sha512-/TbqqemJ80lGKRoRuXsz7VgA0erkIxilCUbkMfRL1h2VBGBLGvQnI+FdHvWDqJnUhgOP/T9+SYeWS84wbXGBFA=="
}
}
},
"ndx-serializable": {

View File

@ -39,9 +39,7 @@
"flexsearch": "latest",
"hasha": "latest",
"natural": "^5.1.11",
"ndx": "^1.0.2",
"ndx-query": "^1.0.1",
"ndx-serializable": "^1.0.0",
"ndx": "^0.4.1",
"node-fetch": "latest",
"ws": "latest"
},