site/node_modules/flexsearch/dist/module/index.js

784 lines
20 KiB
JavaScript
Raw Normal View History

2024-10-14 06:09:33 +00:00
/**!
* FlexSearch.js
* Author and Copyright: Thomas Wilkerling
* Licence: Apache-2.0
* Hosted by Nextapps GmbH
* https://github.com/nextapps-de/flexsearch
*/
import { IndexInterface } from "./type.js";
import { encode as default_encoder } from "./lang/latin/default.js";
import { create_object, create_object_array, concat, sort_by_length_down, is_array, is_string, is_object, parse_option } from "./common.js";
import { pipeline, init_stemmer_or_matcher, init_filter } from "./lang.js";
import { global_lang, global_charset } from "./global.js";
import apply_async from "./async.js";
import { intersect } from "./intersect.js";
import Cache, { searchCache } from "./cache.js";
import apply_preset from "./preset.js";
import { exportIndex, importIndex } from "./serialize.js";
/**
* @constructor
* @implements IndexInterface
* @param {Object=} options
* @param {Object=} _register
* @return {Index}
*/
function Index(options, _register) {
if (!(this instanceof Index)) {
return new Index(options);
}
let charset, lang, tmp;
if (options) {
options = apply_preset(options);
charset = options.charset;
lang = options.lang;
if (is_string(charset)) {
if (-1 === charset.indexOf(":")) {
charset += ":default";
}
charset = global_charset[charset];
}
if (is_string(lang)) {
lang = global_lang[lang];
}
} else {
options = {};
}
let resolution,
optimize,
context = options.context || {};
this.encode = options.encode || charset && charset.encode || default_encoder;
this.register = _register || create_object();
this.resolution = resolution = options.resolution || 9;
this.tokenize = tmp = charset && charset.tokenize || options.tokenize || "strict";
this.depth = "strict" === tmp && context.depth;
this.bidirectional = parse_option(context.bidirectional, /* append: */ /* skip update: */ /* skip_update: */!0);
this.optimize = optimize = parse_option(options.optimize, !0);
this.fastupdate = parse_option(options.fastupdate, !0);
this.minlength = options.minlength || 1;
this.boost = options.boost;
// when not using the memory strategy the score array should not pre-allocated to its full length
this.map = optimize ? create_object_array(resolution) : create_object();
this.resolution_ctx = resolution = context.resolution || 1;
this.ctx = optimize ? create_object_array(resolution) : create_object();
this.rtl = charset && charset.rtl || options.rtl;
this.matcher = (tmp = options.matcher || lang && lang.matcher) && init_stemmer_or_matcher(tmp, !1);
this.stemmer = (tmp = options.stemmer || lang && lang.stemmer) && init_stemmer_or_matcher(tmp, !0);
this.filter = (tmp = options.filter || lang && lang.filter) && init_filter(tmp);
this.cache = (tmp = options.cache) && new Cache(tmp);
}
export default Index;
//Index.prototype.pipeline = pipeline;
/**
* @param {!number|string} id
* @param {!string} content
*/
Index.prototype.append = function (id, content) {
return this.add(id, content, !0);
};
// TODO:
// string + number as text
// boolean, null, undefined as ?
/**
* @param {!number|string} id
* @param {!string} content
* @param {boolean=} _append
* @param {boolean=} _skip_update
*/
Index.prototype.add = function (id, content, _append, _skip_update) {
if (content && (id || 0 === id)) {
if (!_skip_update && !_append && this.register[id]) {
return this.update(id, content);
}
content = this.encode("" + content);
const length = content.length;
if (length) {
// check context dupes to skip all contextual redundancy along a document
const dupes_ctx = create_object(),
dupes = create_object(),
depth = this.depth,
resolution = this.resolution;
for (let i = 0; i < length; i++) {
let term = content[this.rtl ? length - 1 - i : i],
term_length = term.length;
// skip dupes will break the context chain
if (term && term_length >= this.minlength && (depth || !dupes[term])) {
let score = get_score(resolution, length, i),
token = "";
switch (this.tokenize) {
case "full":
if (2 < term_length) {
for (let x = 0; x < term_length; x++) {
for (let y = term_length; y > x; y--) {
if (y - x >= this.minlength) {
const partial_score = get_score(resolution, length, i, term_length, x);
token = term.substring(x, y);
this.push_index(dupes, token, partial_score, id, _append);
}
}
}
break;
}
// fallthrough to next case when term length < 3
case "reverse":
// skip last round (this token exist already in "forward")
if (1 < term_length) {
for (let x = term_length - 1; 0 < x; x--) {
token = term[x] + token;
if (token.length >= this.minlength) {
const partial_score = get_score(resolution, length, i, term_length, x);
this.push_index(dupes, token, partial_score, id, _append);
}
}
token = "";
}
// fallthrough to next case to apply forward also
case "forward":
if (1 < term_length) {
for (let x = 0; x < term_length; x++) {
token += term[x];
if (token.length >= this.minlength) {
this.push_index(dupes, token, score, id, _append);
}
}
break;
}
// fallthrough to next case when token has a length of 1
default:
// case "strict":
if (this.boost) {
score = Math.min(0 | score / this.boost(content, term, i), resolution - 1);
}
this.push_index(dupes, term, score, id, _append);
// context is just supported by tokenizer "strict"
if (depth) {
if (1 < length && i < length - 1) {
// check inner dupes to skip repeating words in the current context
const dupes_inner = create_object(),
resolution = this.resolution_ctx,
keyword = term,
size = Math.min(depth + 1, length - i);
dupes_inner[keyword] = 1;
for (let x = 1; x < size; x++) {
term = content[this.rtl ? length - 1 - i - x : i + x];
if (term && term.length >= this.minlength && !dupes_inner[term]) {
dupes_inner[term] = 1;
const context_score = get_score(resolution + (length / 2 > resolution ? 0 : 1), length, i, size - 1, x - 1),
swap = this.bidirectional && term > keyword;
this.push_index(dupes_ctx, swap ? keyword : term, context_score, id, _append, swap ? term : keyword);
}
}
}
}
}
}
}
this.fastupdate || (this.register[id] = 1);
}
}
return this;
};
/**
* @param {number} resolution
* @param {number} length
* @param {number} i
* @param {number=} term_length
* @param {number=} x
* @returns {number}
*/
function get_score(resolution, length, i, term_length, x) {
// console.log("resolution", resolution);
// console.log("length", length);
// console.log("term_length", term_length);
// console.log("i", i);
// console.log("x", x);
// console.log((resolution - 1) / (length + (term_length || 0)) * (i + (x || 0)) + 1);
// the first resolution slot is reserved for the best match,
// when a query matches the first word(s).
// also to stretch score to the whole range of resolution, the
// calculation is shift by one and cut the floating point.
// this needs the resolution "1" to be handled additionally.
// do not stretch the resolution more than the term length will
// improve performance and memory, also it improves scoring in
// most cases between a short document and a long document
return i && 1 < resolution ? length + (term_length || 0) <= resolution ? i + (x || 0) : 0 | (resolution - 1) / (length + (term_length || 0)) * (i + (x || 0)) + 1 : 0;
}
/**
* @private
* @param dupes
* @param value
* @param score
* @param id
* @param {boolean=} append
* @param {string=} keyword
*/
Index.prototype.push_index = function (dupes, value, score, id, append, keyword) {
let arr = keyword ? this.ctx : this.map;
if (!dupes[value] || keyword && !dupes[value][keyword]) {
if (this.optimize) {
arr = arr[score];
}
if (keyword) {
dupes = dupes[value] || (dupes[value] = create_object());
dupes[keyword] = 1;
arr = arr[keyword] || (arr[keyword] = create_object());
} else {
dupes[value] = 1;
}
arr = arr[value] || (arr[value] = []);
if (!this.optimize) {
arr = arr[score] || (arr[score] = []);
}
if (!append || !arr.includes(id)) {
arr[arr.length] = id;
// add a reference to the register for fast updates
if (this.fastupdate) {
const tmp = this.register[id] || (this.register[id] = []);
tmp[tmp.length] = arr;
}
}
}
};
/**
* @param {string|Object} query
* @param {number|Object=} limit
* @param {Object=} options
* @returns {Array<number|string>}
*/
Index.prototype.search = function (query, limit, options) {
if (!options) {
if (!limit && is_object(query)) {
options = /** @type {Object} */query;
query = options.query;
} else if (is_object(limit)) {
options = /** @type {Object} */limit;
}
}
let result = [],
length,
context,
suggest,
offset = 0;
if (options) {
query = options.query || query;
limit = options.limit;
offset = options.offset || 0;
context = options.context;
suggest = options.suggest;
}
if (query) {
query = /** @type {Array} */this.encode("" + query);
length = query.length;
// TODO: solve this in one single loop below
if (1 < length) {
const dupes = create_object(),
query_new = [];
for (let i = 0, count = 0, term; i < length; i++) {
term = query[i];
if (term && term.length >= this.minlength && !dupes[term]) {
// this fast path can just apply when not in memory-optimized mode
if (!this.optimize && !suggest && !this.map[term]) {
// fast path "not found"
return result;
} else {
query_new[count++] = term;
dupes[term] = 1;
}
}
}
query = query_new;
length = query.length;
}
}
if (!length) {
return result;
}
limit || (limit = 100);
let depth = this.depth && 1 < length && !1 !== context,
index = 0,
keyword;
if (depth) {
keyword = query[0];
index = 1;
} else {
if (1 < length) {
query.sort(sort_by_length_down);
}
}
for (let arr, term; index < length; index++) {
term = query[index];
// console.log(keyword);
// console.log(term);
// console.log("");
if (depth) {
arr = this.add_result(result, suggest, limit, offset, 2 === length, term, keyword);
// console.log(arr);
// console.log(result);
// when suggestion enabled just forward keyword if term was found
// as long as the result is empty forward the pointer also
if (!suggest || !1 !== arr || !result.length) {
keyword = term;
}
} else {
arr = this.add_result(result, suggest, limit, offset, 1 === length, term);
}
if (arr) {
return (/** @type {Array<number|string>} */arr
);
}
// apply suggestions on last loop or fallback
if (suggest && index == length - 1) {
let length = result.length;
if (!length) {
if (depth) {
// fallback to non-contextual search when no result was found
depth = 0;
index = -1;
continue;
}
return result;
} else if (1 === length) {
// fast path optimization
return single_result(result[0], limit, offset);
}
}
}
return intersect(result, limit, offset, suggest);
};
/**
* Returns an array when the result is done (to stop the process immediately),
* returns false when suggestions is enabled and no result was found,
* or returns nothing when a set was pushed successfully to the results
*
* @private
* @param {Array} result
* @param {Array} suggest
* @param {number} limit
* @param {number} offset
* @param {boolean} single_term
* @param {string} term
* @param {string=} keyword
* @return {Array<Array<string|number>>|boolean|undefined}
*/
Index.prototype.add_result = function (result, suggest, limit, offset, single_term, term, keyword) {
let word_arr = [],
arr = keyword ? this.ctx : this.map;
if (!this.optimize) {
arr = get_array(arr, term, keyword, this.bidirectional);
}
if (arr) {
let count = 0;
const arr_len = Math.min(arr.length, keyword ? this.resolution_ctx : this.resolution);
// relevance:
for (let x = 0, size = 0, tmp, len; x < arr_len; x++) {
tmp = arr[x];
if (tmp) {
if (this.optimize) {
tmp = get_array(tmp, term, keyword, this.bidirectional);
}
if (offset) {
if (tmp && single_term) {
len = tmp.length;
if (len <= offset) {
offset -= len;
tmp = null;
} else {
tmp = tmp.slice(offset);
offset = 0;
}
}
}
if (tmp) {
// keep score (sparse array):
//word_arr[x] = tmp;
// simplified score order:
word_arr[count++] = tmp;
if (single_term) {
size += tmp.length;
if (size >= limit) {
// fast path optimization
break;
}
}
}
}
}
if (count) {
if (single_term) {
// fast path optimization
// offset was already applied at this point
return single_result(word_arr, limit, 0);
}
result[result.length] = word_arr;
return;
}
}
// return an empty array will stop the loop,
// to prevent stop when using suggestions return a false value
return !suggest && word_arr;
};
function single_result(result, limit, offset) {
if (1 === result.length) {
result = result[0];
} else {
result = concat(result);
}
return offset || result.length > limit ? result.slice(offset, offset + limit) : result;
}
function get_array(arr, term, keyword, bidirectional) {
if (keyword) {
// the frequency of the starting letter is slightly less
// on the last half of the alphabet (m-z) in almost every latin language,
// so we sort downwards (https://en.wikipedia.org/wiki/Letter_frequency)
const swap = bidirectional && term > keyword;
arr = arr[swap ? term : keyword];
arr = arr && arr[swap ? keyword : term];
} else {
arr = arr[term];
}
return arr;
}
Index.prototype.contain = function (id) {
return !!this.register[id];
};
Index.prototype.update = function (id, content) {
return this.remove(id).add(id, content);
};
/**
* @param {boolean=} _skip_deletion
*/
Index.prototype.remove = function (id, _skip_deletion) {
const refs = this.register[id];
if (refs) {
if (this.fastupdate) {
// fast updates performs really fast but did not fully cleanup the key entries
for (let i = 0, tmp; i < refs.length; i++) {
tmp = refs[i];
tmp.splice(tmp.indexOf(id), 1);
}
} else {
remove_index(this.map, id, this.resolution, this.optimize);
if (this.depth) {
remove_index(this.ctx, id, this.resolution_ctx, this.optimize);
}
}
_skip_deletion || delete this.register[id];
if (this.cache) {
this.cache.del(id);
}
}
return this;
};
/**
* @param map
* @param id
* @param res
* @param optimize
* @param {number=} resolution
* @return {number}
*/
function remove_index(map, id, res, optimize, resolution) {
let count = 0;
if (is_array(map)) {
// the first array is the score array in both strategies
if (!resolution) {
resolution = Math.min(map.length, res);
for (let x = 0, arr; x < resolution; x++) {
arr = map[x];
if (arr) {
count = remove_index(arr, id, res, optimize, resolution);
if (!optimize && !count) {
// when not memory optimized the score index should removed
delete map[x];
}
}
}
} else {
const pos = map.indexOf(id);
if (-1 !== pos) {
// fast path, when length is 1 or lower then the whole field gets deleted
if (1 < map.length) {
map.splice(pos, 1);
count++;
}
} else {
count++;
}
}
} else {
for (let key in map) {
count = remove_index(map[key], id, res, optimize, resolution);
if (!count) {
delete map[key];
}
}
}
return count;
}
Index.prototype.searchCache = searchCache;
Index.prototype.export = exportIndex;
Index.prototype.import = importIndex;
apply_async(Index.prototype);