site/node_modules/flexsearch/dist/module-debug/index.js

/**!
 * FlexSearch.js
 * Author and Copyright: Thomas Wilkerling
 * Licence: Apache-2.0
 * Hosted by Nextapps GmbH
 * https://github.com/nextapps-de/flexsearch
 */

import { IndexInterface } from "./type.js";
import { encode as default_encoder } from "./lang/latin/default.js";
import { create_object, create_object_array, concat, sort_by_length_down, is_array, is_string, is_object, parse_option } from "./common.js";
import { pipeline, init_stemmer_or_matcher, init_filter } from "./lang.js";
import { global_lang, global_charset } from "./global.js";
import apply_async from "./async.js";
import { intersect } from "./intersect.js";
import Cache, { searchCache } from "./cache.js";
import apply_preset from "./preset.js";
import { exportIndex, importIndex } from "./serialize.js";

/**
 * @constructor
 * @implements IndexInterface
 * @param {Object=} options
 * @param {Object=} _register
 * @return {Index}
 */

function Index(options, _register) {

    if (!(this instanceof Index)) {

        return new Index(options);
    }

    let charset, lang, tmp;

    if (options) {

        options = apply_preset(options);


        charset = options.charset;
        lang = options.lang;

        if (is_string(charset)) {

            if (-1 === charset.indexOf(":")) {

                charset += ":default";
            }

            charset = global_charset[charset];
        }

        if (is_string(lang)) {

            lang = global_lang[lang];
        }
    } else {

        options = {};
    }

    let resolution,
        optimize,
        context = options.context || {};

    this.encode = options.encode || charset && charset.encode || default_encoder;
    this.register = _register || create_object();
    this.resolution = resolution = options.resolution || 9;
    this.tokenize = tmp = charset && charset.tokenize || options.tokenize || "strict";
    this.depth = "strict" === tmp && context.depth;
    this.bidirectional = parse_option(context.bidirectional, /* append: */ /* skip update: */ /* skip_update: */!0);
    this.optimize = optimize = parse_option(options.optimize, !0);
    this.fastupdate = parse_option(options.fastupdate, !0);
    this.minlength = options.minlength || 1;
    this.boost = options.boost;

    // when not using the memory strategy the score array should not pre-allocated to its full length

    this.map = optimize ? create_object_array(resolution) : create_object();
    this.resolution_ctx = resolution = context.resolution || 1;
    this.ctx = optimize ? create_object_array(resolution) : create_object();
    this.rtl = charset && charset.rtl || options.rtl;
    this.matcher = (tmp = options.matcher || lang && lang.matcher) && init_stemmer_or_matcher(tmp, !1);
    this.stemmer = (tmp = options.stemmer || lang && lang.stemmer) && init_stemmer_or_matcher(tmp, !0);
    this.filter = (tmp = options.filter || lang && lang.filter) && init_filter(tmp);

    this.cache = (tmp = options.cache) && new Cache(tmp);
}

export default Index;

//Index.prototype.pipeline = pipeline;

/**
 * @param {!number|string} id
 * @param {!string} content
 */

Index.prototype.append = function (id, content) {

    return this.add(id, content, !0);
};

// TODO:
// string + number as text
// boolean, null, undefined as ?

/**
 * @param {!number|string} id
 * @param {!string} content
 * @param {boolean=} _append
 * @param {boolean=} _skip_update
 */

Index.prototype.add = function (id, content, _append, _skip_update) {

    if (content && (id || 0 === id)) {

        if (!_skip_update && !_append && this.register[id]) {

            return this.update(id, content);
        }

        content = this.encode("" + content);
        const length = content.length;

        if (length) {

            // check context dupes to skip all contextual redundancy along a document

            const dupes_ctx = create_object(),
                  dupes = create_object(),
                  depth = this.depth,
                  resolution = this.resolution;


            for (let i = 0; i < length; i++) {
                let term = content[this.rtl ? length - 1 - i : i],
                    term_length = term.length;


                // skip dupes will break the context chain

                if (term && term_length >= this.minlength && (depth || !dupes[term])) {
                    let score = get_score(resolution, length, i),
                        token = "";


                    switch (this.tokenize) {

                        case "full":

                            if (2 < term_length) {

                                for (let x = 0; x < term_length; x++) {

                                    for (let y = term_length; y > x; y--) {

                                        if (y - x >= this.minlength) {

                                            const partial_score = get_score(resolution, length, i, term_length, x);
                                            token = term.substring(x, y);
                                            this.push_index(dupes, token, partial_score, id, _append);
                                        }
                                    }
                                }

                                break;
                            }

                        // fallthrough to next case when term length < 3

                        case "reverse":

                            // skip last round (this token exist already in "forward")

                            if (1 < term_length) {

                                for (let x = term_length - 1; 0 < x; x--) {

                                    token = term[x] + token;

                                    if (token.length >= this.minlength) {

                                        const partial_score = get_score(resolution, length, i, term_length, x);
                                        this.push_index(dupes, token, partial_score, id, _append);
                                    }
                                }

                                token = "";
                            }

                        // fallthrough to next case to apply forward also

                        case "forward":

                            if (1 < term_length) {

                                for (let x = 0; x < term_length; x++) {

                                    token += term[x];

                                    if (token.length >= this.minlength) {

                                        this.push_index(dupes, token, score, id, _append);
                                    }
                                }

                                break;
                            }

                        // fallthrough to next case when token has a length of 1

                        default:
                            // case "strict":

                            if (this.boost) {

                                score = Math.min(0 | score / this.boost(content, term, i), resolution - 1);
                            }

                            this.push_index(dupes, term, score, id, _append);

                            // context is just supported by tokenizer "strict"

                            if (depth) {

                                if (1 < length && i < length - 1) {

                                    // check inner dupes to skip repeating words in the current context

                                    const dupes_inner = create_object(),
                                          resolution = this.resolution_ctx,
                                          keyword = term,
                                          size = Math.min(depth + 1, length - i);


                                    dupes_inner[keyword] = 1;

                                    for (let x = 1; x < size; x++) {

                                        term = content[this.rtl ? length - 1 - i - x : i + x];

                                        if (term && term.length >= this.minlength && !dupes_inner[term]) {

                                            dupes_inner[term] = 1;

                                            const context_score = get_score(resolution + (length / 2 > resolution ? 0 : 1), length, i, size - 1, x - 1),
                                                  swap = this.bidirectional && term > keyword;

                                            this.push_index(dupes_ctx, swap ? keyword : term, context_score, id, _append, swap ? term : keyword);
                                        }
                                    }
                                }
                            }
                    }
                }
            }

            this.fastupdate || (this.register[id] = 1);
        }
    }

    return this;
};

/**
 * @param {number} resolution
 * @param {number} length
 * @param {number} i
 * @param {number=} term_length
 * @param {number=} x
 * @returns {number}
 */

function get_score(resolution, length, i, term_length, x) {

    // console.log("resolution", resolution);
    // console.log("length", length);
    // console.log("term_length", term_length);
    // console.log("i", i);
    // console.log("x", x);
    // console.log((resolution - 1) / (length + (term_length || 0)) * (i + (x || 0)) + 1);

    // the first resolution slot is reserved for the best match,
    // when a query matches the first word(s).

    // also to stretch score to the whole range of resolution, the
    // calculation is shift by one and cut the floating point.
    // this needs the resolution "1" to be handled additionally.

    // do not stretch the resolution more than the term length will
    // improve performance and memory, also it improves scoring in
    // most cases between a short document and a long document

    return i && 1 < resolution ? length + (term_length || 0) <= resolution ? i + (x || 0) : 0 | (resolution - 1) / (length + (term_length || 0)) * (i + (x || 0)) + 1 : 0;
}

/**
 * @private
 * @param dupes
 * @param value
 * @param score
 * @param id
 * @param {boolean=} append
 * @param {string=} keyword
 */

Index.prototype.push_index = function (dupes, value, score, id, append, keyword) {

    let arr = keyword ? this.ctx : this.map;

    if (!dupes[value] || keyword && !dupes[value][keyword]) {

        if (this.optimize) {

            arr = arr[score];
        }

        if (keyword) {

            dupes = dupes[value] || (dupes[value] = create_object());
            dupes[keyword] = 1;

            arr = arr[keyword] || (arr[keyword] = create_object());
        } else {

            dupes[value] = 1;
        }

        arr = arr[value] || (arr[value] = []);

        if (!this.optimize) {

            arr = arr[score] || (arr[score] = []);
        }

        if (!append || !arr.includes(id)) {

            arr[arr.length] = id;

            // add a reference to the register for fast updates

            if (this.fastupdate) {

                const tmp = this.register[id] || (this.register[id] = []);
                tmp[tmp.length] = arr;
            }
        }
    }
};

/**
 * @param {string|Object} query
 * @param {number|Object=} limit
 * @param {Object=} options
 * @returns {Array<number|string>}
 */

Index.prototype.search = function (query, limit, options) {

    if (!options) {

        if (!limit && is_object(query)) {

            options = /** @type {Object} */query;
            query = options.query;
        } else if (is_object(limit)) {

            options = /** @type {Object} */limit;
        }
    }

    let result = [],
        length,
        context,
        suggest,
        offset = 0;


    if (options) {

        query = options.query || query;
        limit = options.limit;
        offset = options.offset || 0;
        context = options.context;
        suggest = options.suggest;
    }

    if (query) {

        query = /** @type {Array} */this.encode("" + query);
        length = query.length;

        // TODO: solve this in one single loop below

        if (1 < length) {
            const dupes = create_object(),
                  query_new = [];


            for (let i = 0, count = 0, term; i < length; i++) {

                term = query[i];

                if (term && term.length >= this.minlength && !dupes[term]) {

                    // this fast path can just apply when not in memory-optimized mode

                    if (!this.optimize && !suggest && !this.map[term]) {

                        // fast path "not found"

                        return result;
                    } else {

                        query_new[count++] = term;
                        dupes[term] = 1;
                    }
                }
            }

            query = query_new;
            length = query.length;
        }
    }

    if (!length) {

        return result;
    }

    limit || (limit = 100);

    let depth = this.depth && 1 < length && !1 !== context,
        index = 0,
        keyword;


    if (depth) {

        keyword = query[0];
        index = 1;
    } else {

        if (1 < length) {

            query.sort(sort_by_length_down);
        }
    }

    for (let arr, term; index < length; index++) {

        term = query[index];

        // console.log(keyword);
        // console.log(term);
        // console.log("");

        if (depth) {

            arr = this.add_result(result, suggest, limit, offset, 2 === length, term, keyword);

            // console.log(arr);
            // console.log(result);

            // when suggestion enabled just forward keyword if term was found
            // as long as the result is empty forward the pointer also

            if (!suggest || !1 !== arr || !result.length) {

                keyword = term;
            }
        } else {

            arr = this.add_result(result, suggest, limit, offset, 1 === length, term);
        }

        if (arr) {

            return (/** @type {Array<number|string>} */arr
            );
        }

        // apply suggestions on last loop or fallback

        if (suggest && index == length - 1) {

            let length = result.length;

            if (!length) {

                if (depth) {

                    // fallback to non-contextual search when no result was found

                    depth = 0;
                    index = -1;

                    continue;
                }

                return result;
            } else if (1 === length) {

                // fast path optimization

                return single_result(result[0], limit, offset);
            }
        }
    }

    return intersect(result, limit, offset, suggest);
};

/**
 * Returns an array when the result is done (to stop the process immediately),
 * returns false when suggestions is enabled and no result was found,
 * or returns nothing when a set was pushed successfully to the results
 *
 * @private
 * @param {Array} result
 * @param {Array} suggest
 * @param {number} limit
 * @param {number} offset
 * @param {boolean} single_term
 * @param {string} term
 * @param {string=} keyword
 * @return {Array<Array<string|number>>|boolean|undefined}
 */

Index.prototype.add_result = function (result, suggest, limit, offset, single_term, term, keyword) {
    let word_arr = [],
        arr = keyword ? this.ctx : this.map;


    if (!this.optimize) {

        arr = get_array(arr, term, keyword, this.bidirectional);
    }

    if (arr) {

        let count = 0;
        const arr_len = Math.min(arr.length, keyword ? this.resolution_ctx : this.resolution);

        // relevance:
        for (let x = 0, size = 0, tmp, len; x < arr_len; x++) {

            tmp = arr[x];

            if (tmp) {

                if (this.optimize) {

                    tmp = get_array(tmp, term, keyword, this.bidirectional);
                }

                if (offset) {

                    if (tmp && single_term) {

                        len = tmp.length;

                        if (len <= offset) {

                            offset -= len;
                            tmp = null;
                        } else {

                            tmp = tmp.slice(offset);
                            offset = 0;
                        }
                    }
                }

                if (tmp) {

                    // keep score (sparse array):
                    //word_arr[x] = tmp;

                    // simplified score order:
                    word_arr[count++] = tmp;

                    if (single_term) {

                        size += tmp.length;

                        if (size >= limit) {

                            // fast path optimization

                            break;
                        }
                    }
                }
            }
        }

        if (count) {

            if (single_term) {

                // fast path optimization
                // offset was already applied at this point

                return single_result(word_arr, limit, 0);
            }

            result[result.length] = word_arr;
            return;
        }
    }

    // return an empty array will stop the loop,
    // to prevent stop when using suggestions return a false value

    return !suggest && word_arr;
};

function single_result(result, limit, offset) {

    if (1 === result.length) {

        result = result[0];
    } else {

        result = concat(result);
    }

    return offset || result.length > limit ? result.slice(offset, offset + limit) : result;
}

function get_array(arr, term, keyword, bidirectional) {

    if (keyword) {

        // the frequency of the starting letter is slightly less
        // on the last half of the alphabet (m-z) in almost every latin language,
        // so we sort downwards (https://en.wikipedia.org/wiki/Letter_frequency)

        const swap = bidirectional && term > keyword;

        arr = arr[swap ? term : keyword];
        arr = arr && arr[swap ? keyword : term];
    } else {

        arr = arr[term];
    }

    return arr;
}

Index.prototype.contain = function (id) {

    return !!this.register[id];
};

Index.prototype.update = function (id, content) {

    return this.remove(id).add(id, content);
};

/**
 * @param {boolean=} _skip_deletion
 */

Index.prototype.remove = function (id, _skip_deletion) {

    const refs = this.register[id];

    if (refs) {

        if (this.fastupdate) {

            // fast updates performs really fast but did not fully cleanup the key entries

            for (let i = 0, tmp; i < refs.length; i++) {

                tmp = refs[i];
                tmp.splice(tmp.indexOf(id), 1);
            }
        } else {

            remove_index(this.map, id, this.resolution, this.optimize);

            if (this.depth) {

                remove_index(this.ctx, id, this.resolution_ctx, this.optimize);
            }
        }

        _skip_deletion || delete this.register[id];

        if (this.cache) {

            this.cache.del(id);
        }
    }

    return this;
};

/**
 * @param map
 * @param id
 * @param res
 * @param optimize
 * @param {number=} resolution
 * @return {number}
 */

function remove_index(map, id, res, optimize, resolution) {

    let count = 0;

    if (is_array(map)) {

        // the first array is the score array in both strategies

        if (!resolution) {

            resolution = Math.min(map.length, res);

            for (let x = 0, arr; x < resolution; x++) {

                arr = map[x];

                if (arr) {

                    count = remove_index(arr, id, res, optimize, resolution);

                    if (!optimize && !count) {

                        // when not memory optimized the score index should removed

                        delete map[x];
                    }
                }
            }
        } else {

            const pos = map.indexOf(id);

            if (-1 !== pos) {

                // fast path, when length is 1 or lower then the whole field gets deleted

                if (1 < map.length) {

                    map.splice(pos, 1);
                    count++;
                }
            } else {

                count++;
            }
        }
    } else {

        for (let key in map) {

            count = remove_index(map[key], id, res, optimize, resolution);

            if (!count) {

                delete map[key];
            }
        }
    }

    return count;
}

Index.prototype.searchCache = searchCache;


Index.prototype.export = exportIndex;
Index.prototype.import = importIndex;


apply_async(Index.prototype);