/** * lunr - http://lunrjs.com - A bit like Solr, but much smaller and not as bright - 0.3.0 * Copyright (C) 2013 Oliver Nightingale * MIT Licensed * @license */ /** * Convenience function for instantiating a new lunr index and configuring it * with the default pipeline functions and the passed config function. * * When using this convenience function a new index will be created with the * following functions already in the pipeline: * * lunr.StopWordFilter - filters out any stop words before they enter the * index * * lunr.stemmer - stems the tokens before entering the index. * * Example: * * var idx = lunr(function () { * this.field('title', 10) * this.field('tags', 100) * this.field('body') * * this.ref('cid') * * this.pipeline.add(function () { * // some custom pipeline function * }) * * }) * * @param {Function} config A function that will be called with the new instance * of the lunr.Index as both its context and first parameter. It can be used to * customize the instance of new lunr.Index. * @namespace * @module * @returns {lunr.Index} * */ var lunr = function (config) { var idx = new lunr.Index idx.pipeline.add(lunr.stopWordFilter, lunr.stemmer) if (config) config.call(idx, idx) return idx } lunr.version = "0.3.0" if (typeof module !== 'undefined') { module.exports = lunr } /*! * lunr.tokenizer * Copyright (C) 2013 Oliver Nightingale */ /** * A function for splitting a string into tokens ready to be inserted into * the search index. * * @module * @param {String} str The string to convert into tokens * @returns {Array} */ lunr.tokenizer = function (str) { if (Array.isArray(str)) return str var str = str.replace(/^\s+/, '') for (var i = str.length - 1; i >= 0; i--) { if (/\S/.test(str.charAt(i))) { str = str.substring(0, i + 1) break } } return str .split(/\s+/) .map(function (token) { return token.replace(/^\W+/, '').replace(/\W+$/, '').toLowerCase() }) } /*! * lunr.Pipeline * Copyright (C) 2013 Oliver Nightingale */ /** * lunr.Pipelines maintain an ordered list of functions to be applied to all * tokens in documents entering the search index and queries being ran against * the index. * * An instance of lunr.Index created with the lunr shortcut will contain a * pipeline with a stop word filter and an English language stemmer. Extra * functions can be added before or after either of these functions or these * default functions can be removed. * * When run the pipeline will call each function in turn, passing a token, the * index of that token in the original list of all tokens and finally a list of * all the original tokens. * * The output of functions in the pipeline will be passed to the next function * in the pipeline. To exclude a token from entering the index the function * should return undefined, the rest of the pipeline will not be called with * this token. * * For serialisation of pipelines to work, all functions used in an instance of * a pipeline should be registered with lunr.Pipeline. Registered functions can * then be loaded. If trying to load a serialised pipeline that uses functions * that are not registered an error will be thrown. * * If not planning on serialising the pipeline then registering pipeline functions * is not necessary. * * @constructor */ lunr.Pipeline = function () { this._stack = [] } lunr.Pipeline.registeredFunctions = {} /** * Register a function with the pipeline. * * Functions that are used in the pipeline should be registered if the pipeline * needs to be serialised, or a serialised pipeline needs to be loaded. * * Registering a function does not add it to a pipeline, functions must still be * added to instances of the pipeline for them to be used when running a pipeline. * * @param {Function} fn The function to check for. * @param {String} label The label to register this function with * @memberOf Pipeline */ lunr.Pipeline.registerFunction = function (fn, label) { if (console && console.warn && (label in this.registeredFunctions)) { console.warn('Overwriting existing registered function: ' + label) } fn.label = label lunr.Pipeline.registeredFunctions[fn.label] = fn } /** * Warns if the function is not registered as a Pipeline function. * * @param {Function} fn The function to check for. * @private * @memberOf Pipeline */ lunr.Pipeline.warnIfFunctionNotRegistered = function (fn) { var isRegistered = fn.label && (fn.label in this.registeredFunctions) if (!isRegistered && console && console.warn) { console.warn('Function is not registered with pipeline. This may cause problems when serialising the index.\n', fn) } } /** * Loads a previously serialised pipeline. * * All functions to be loaded must already be registered with lunr.Pipeline. * If any function from the serialised data has not been registered then an * error will be thrown. * * @param {Object} serialised The serialised pipeline to load. * @returns {lunr.Pipeline} * @memberOf Pipeline */ lunr.Pipeline.load = function (serialised) { var pipeline = new lunr.Pipeline serialised.forEach(function (fnName) { var fn = lunr.Pipeline.registeredFunctions[fnName] if (fn) { pipeline.add(fn) } else { throw new Error ('Cannot load un-registered function: ' + fnName) } }) return pipeline } /** * Adds new functions to the end of the pipeline. * * Logs a warning if the function has not been registered. * * @param {Function} functions Any number of functions to add to the pipeline. * @memberOf Pipeline */ lunr.Pipeline.prototype.add = function () { var fns = Array.prototype.slice.call(arguments) fns.forEach(function (fn) { lunr.Pipeline.warnIfFunctionNotRegistered(fn) this._stack.push(fn) }, this) } /** * Adds a single function after a function that already exists in the * pipeline. * * Logs a warning if the function has not been registered. * * @param {Function} existingFn A function that already exists in the pipeline. * @param {Function} newFn The new function to add to the pipeline. * @memberOf Pipeline */ lunr.Pipeline.prototype.after = function (existingFn, newFn) { lunr.Pipeline.warnIfFunctionNotRegistered(newFn) var pos = this._stack.indexOf(existingFn) + 1 this._stack.splice(pos, 0, newFn) } /** * Adds a single function before a function that already exists in the * pipeline. * * Logs a warning if the function has not been registered. * * @param {Function} existingFn A function that already exists in the pipeline. * @param {Function} newFn The new function to add to the pipeline. * @memberOf Pipeline */ lunr.Pipeline.prototype.before = function (existingFn, newFn) { lunr.Pipeline.warnIfFunctionNotRegistered(newFn) var pos = this._stack.indexOf(existingFn) this._stack.splice(pos, 0, newFn) } /** * Removes a function from the pipeline. * * @param {Function} fn The function to remove from the pipeline. * @memberOf Pipeline */ lunr.Pipeline.prototype.remove = function (fn) { var pos = this._stack.indexOf(fn) this._stack.splice(pos, 1) } /** * Runs the current list of functions that make up the pipeline against the * passed tokens. * * @param {Array} tokens The tokens to run through the pipeline. * @returns {Array} * @memberOf Pipeline */ lunr.Pipeline.prototype.run = function (tokens) { var out = [], tokenLength = tokens.length, stackLength = this._stack.length for (var i = 0; i < tokenLength; i++) { var token = tokens[i] for (var j = 0; j < stackLength; j++) { token = this._stack[j](token, i, tokens) if (token === void 0) break }; if (token !== void 0) out.push(token) }; return out } /** * Returns a representation of the pipeline ready for serialisation. * * Logs a warning if the function has not been registered. * * @returns {Array} * @memberOf Pipeline */ lunr.Pipeline.prototype.toJSON = function () { return this._stack.map(function (fn) { lunr.Pipeline.warnIfFunctionNotRegistered(fn) return fn.label }) } /*! * lunr.Vector * Copyright (C) 2013 Oliver Nightingale */ /** * lunr.Vectors wrap arrays and add vector related operations for the array * elements. * * @constructor * @param {Array} elements Elements that make up the vector. */ lunr.Vector = function (elements) { this.elements = elements for (var i = 0; i < elements.length; i++) { if (!(i in this.elements)) this.elements[i] = 0 } } /** * Calculates the magnitude of this vector. * * @returns {Number} * @memberOf Vector */ lunr.Vector.prototype.magnitude = function () { if (this._magnitude) return this._magnitude var sumOfSquares = 0, elems = this.elements, len = elems.length, el for (var i = 0; i < len; i++) { el = elems[i] sumOfSquares += el * el }; return this._magnitude = Math.sqrt(sumOfSquares) } /** * Calculates the dot product of this vector and another vector. * * @param {lunr.Vector} otherVector The vector to compute the dot product with. * @returns {Number} * @memberOf Vector */ lunr.Vector.prototype.dot = function (otherVector) { var elem1 = this.elements, elem2 = otherVector.elements, length = elem1.length, dotProduct = 0 for (var i = 0; i < length; i++) { dotProduct += elem1[i] * elem2[i] }; return dotProduct } /** * Calculates the cosine similarity between this vector and another * vector. * * @param {lunr.Vector} otherVector The other vector to calculate the * similarity with. * @returns {Number} * @memberOf Vector */ lunr.Vector.prototype.similarity = function (otherVector) { return this.dot(otherVector) / (this.magnitude() * otherVector.magnitude()) } /** * Converts this vector back into an array. * * @returns {Array} * @memberOf Vector */ lunr.Vector.prototype.toArray = function () { return this.elements } /*! * lunr.SortedSet * Copyright (C) 2013 Oliver Nightingale */ /** * lunr.SortedSets are used to maintain an array of uniq values in a sorted * order. * * @constructor */ lunr.SortedSet = function () { this.length = 0 this.elements = [] } /** * Loads a previously serialised sorted set. * * @param {Array} serialisedData The serialised set to load. * @returns {lunr.SortedSet} * @memberOf SortedSet */ lunr.SortedSet.load = function (serialisedData) { var set = new this set.elements = serialisedData set.length = serialisedData.length return set } /** * Inserts new items into the set in the correct position to maintain the * order. * * @param {Object} The objects to add to this set. * @memberOf SortedSet */ lunr.SortedSet.prototype.add = function () { Array.prototype.slice.call(arguments).forEach(function (element) { if (~this.indexOf(element)) return this.elements.splice(this.locationFor(element), 0, element) }, this) this.length = this.elements.length } /** * Converts this sorted set into an array. * * @returns {Array} * @memberOf SortedSet */ lunr.SortedSet.prototype.toArray = function () { return this.elements.slice() } /** * Creates a new array with the results of calling a provided function on every * element in this sorted set. * * Delegates to Array.prototype.map and has the same signature. * * @param {Function} fn The function that is called on each element of the * set. * @param {Object} ctx An optional object that can be used as the context * for the function fn. * @returns {Array} * @memberOf SortedSet */ lunr.SortedSet.prototype.map = function (fn, ctx) { return this.elements.map(fn, ctx) } /** * Executes a provided function once per sorted set element. * * Delegates to Array.prototype.forEach and has the same signature. * * @param {Function} fn The function that is called on each element of the * set. * @param {Object} ctx An optional object that can be used as the context * @memberOf SortedSet * for the function fn. */ lunr.SortedSet.prototype.forEach = function (fn, ctx) { return this.elements.forEach(fn, ctx) } /** * Returns the index at which a given element can be found in the * sorted set, or -1 if it is not present. * * @param {Object} elem The object to locate in the sorted set. * @param {Number} start An optional index at which to start searching from * within the set. * @param {Number} end An optional index at which to stop search from within * the set. * @returns {Number} * @memberOf SortedSet */ lunr.SortedSet.prototype.indexOf = function (elem, start, end) { var start = start || 0, end = end || this.elements.length, sectionLength = end - start, pivot = start + Math.floor(sectionLength / 2), pivotElem = this.elements[pivot] if (sectionLength <= 1) { if (pivotElem === elem) { return pivot } else { return -1 } } if (pivotElem < elem) return this.indexOf(elem, pivot, end) if (pivotElem > elem) return this.indexOf(elem, start, pivot) if (pivotElem === elem) return pivot } /** * Returns the position within the sorted set that an element should be * inserted at to maintain the current order of the set. * * This function assumes that the element to search for does not already exist * in the sorted set. * * @param {Object} elem The elem to find the position for in the set * @param {Number} start An optional index at which to start searching from * within the set. * @param {Number} end An optional index at which to stop search from within * the set. * @returns {Number} * @memberOf SortedSet */ lunr.SortedSet.prototype.locationFor = function (elem, start, end) { var start = start || 0, end = end || this.elements.length, sectionLength = end - start, pivot = start + Math.floor(sectionLength / 2), pivotElem = this.elements[pivot] if (sectionLength <= 1) { if (pivotElem > elem) return pivot if (pivotElem < elem) return pivot + 1 } if (pivotElem < elem) return this.locationFor(elem, pivot, end) if (pivotElem > elem) return this.locationFor(elem, start, pivot) } /** * Creates a new lunr.SortedSet that contains the elements in the intersection * of this set and the passed set. * * @param {lunr.SortedSet} otherSet The set to intersect with this set. * @returns {lunr.SortedSet} * @memberOf SortedSet */ lunr.SortedSet.prototype.intersect = function (otherSet) { var intersectSet = new lunr.SortedSet, i = 0, j = 0, a_len = this.length, b_len = otherSet.length, a = this.elements, b = otherSet.elements while (true) { if (i > a_len - 1 || j > b_len - 1) break if (a[i] === b[j]) { intersectSet.add(a[i]) i++, j++ continue } if (a[i] < b[j]) { i++ continue } if (a[i] > b[j]) { j++ continue } }; return intersectSet } /** * Makes a copy of this set * * @returns {lunr.SortedSet} * @memberOf SortedSet */ lunr.SortedSet.prototype.clone = function () { var clone = new lunr.SortedSet clone.elements = this.toArray() clone.length = clone.elements.length return clone } /** * Creates a new lunr.SortedSet that contains the elements in the union * of this set and the passed set. * * @param {lunr.SortedSet} otherSet The set to union with this set. * @returns {lunr.SortedSet} * @memberOf SortedSet */ lunr.SortedSet.prototype.union = function (otherSet) { var longSet, shortSet, unionSet if (this.length >= otherSet.length) { longSet = this, shortSet = otherSet } else { longSet = otherSet, shortSet = this } unionSet = longSet.clone() unionSet.add.apply(unionSet, shortSet.toArray()) return unionSet } /** * Returns a representation of the sorted set ready for serialisation. * * @returns {Array} * @memberOf SortedSet */ lunr.SortedSet.prototype.toJSON = function () { return this.toArray() } /*! * lunr.Index * Copyright (C) 2013 Oliver Nightingale */ /** * lunr.Index is object that manages a search index. It contains the indexes * and stores all the tokens and document lookups. It also provides the main * user facing API for the library. * * @constructor */ lunr.Index = function () { this._fields = [] this._ref = 'id' this.pipeline = new lunr.Pipeline this.documentStore = new lunr.Store this.tokenStore = new lunr.TokenStore this.corpusTokens = new lunr.SortedSet } /** * Loads a previously serialised index. * * Issues a warning if the index being imported was serialised * by a different version of lunr. * * @param {Object} serialisedData The serialised set to load. * @returns {lunr.Index} * @memberOf Index */ lunr.Index.load = function (serialisedData) { if (serialisedData.version !== lunr.version && console && console.warn) { console.warn('version mismatch: current ' + lunr.version + ' importing ' + serialisedData.version) } var idx = new this idx._fields = serialisedData.fields idx._ref = serialisedData.ref idx.documentStore = lunr.Store.load(serialisedData.documentStore) idx.tokenStore = lunr.TokenStore.load(serialisedData.tokenStore) idx.corpusTokens = lunr.SortedSet.load(serialisedData.corpusTokens) idx.pipeline = lunr.Pipeline.load(serialisedData.pipeline) return idx } /** * Adds a field to the list of fields that will be searchable within documents * in the index. * * An optional boost param can be passed to affect how much tokens in this field * rank in search results, by default the boost value is 1. * * Fields should be added before any documents are added to the index, fields * that are added after documents are added to the index will only apply to new * documents added to the index. * * @param {String} fieldName The name of the field within the document that * should be indexed * @param {Number} boost An optional boost that can be applied to terms in this * field. * @returns {lunr.Index} * @memberOf Index */ lunr.Index.prototype.field = function (fieldName, opts) { var opts = opts || {}, field = { name: fieldName, boost: opts.boost || 1 } this._fields.push(field) return this } /** * Sets the property used to uniquely identify documents added to the index, * by default this property is 'id'. * * This should only be changed before adding documents to the index, changing * the ref property without resetting the index can lead to unexpected results. * * @param {String} refName The property to use to uniquely identify the * documents in the index. * @returns {lunr.Index} * @memberOf Index */ lunr.Index.prototype.ref = function (refName) { this._ref = refName return this } /** * Add a document to the index. * * This is the way new documents enter the index, this function will run the * fields from the document through the index's pipeline and then add it to * the index, it will then show up in search results. * * @param {Object} doc The document to add to the index. * @memberOf Index */ lunr.Index.prototype.add = function (doc) { var docTokens = {}, allDocumentTokens = new lunr.SortedSet, docRef = doc[this._ref] this._fields.forEach(function (field) { var fieldTokens = this.pipeline.run(lunr.tokenizer(doc[field.name])) docTokens[field.name] = fieldTokens lunr.SortedSet.prototype.add.apply(allDocumentTokens, fieldTokens) }, this) this.documentStore.set(docRef, allDocumentTokens) lunr.SortedSet.prototype.add.apply(this.corpusTokens, allDocumentTokens.toArray()) for (var i = 0; i < allDocumentTokens.length; i++) { var token = allDocumentTokens.elements[i] var tf = this._fields.reduce(function (memo, field) { var tokenCount = docTokens[field.name].filter(function (t) { return t === token }).length, fieldLength = docTokens[field.name].length return memo + (tokenCount / fieldLength * field.boost) }, 0) this.tokenStore.add(token, { ref: docRef, tf: tf }) }; } /** * Removes a document from the index. * * To make sure documents no longer show up in search results they can be * removed from the index using this method. * * The document passed only needs to have the same ref property value as the * document that was added to the index, they could be completely different * objects. * * @param {Object} doc The document to remove from the index. * @memberOf Index */ lunr.Index.prototype.remove = function (doc) { var docRef = doc[this._ref], docTokens = this.documentStore.get(docRef) this.documentStore.remove(docRef) docTokens.forEach(function (token) { this.tokenStore.remove(token, docRef) }, this) } /** * Updates a document in the index. * * When a document contained within the index gets updated, fields changed, * added or removed, to make sure it correctly matched against search queries, * it should be updated in the index. * * This method is just a wrapper around `remove` and `add` * * @param {Object} doc The document to update in the index. * @see Index.prototype.remove * @see Index.prototype.add * @memberOf Index */ lunr.Index.prototype.update = function (doc) { this.remove(doc) this.add(doc) } /** * Calculates the inverse document frequency for a token within the index. * * @param {String} token The token to calculate the idf of. * @see Index.prototype.idf * @private * @memberOf Index */ lunr.Index.prototype.idf = function (term) { var documentFrequency = Object.keys(this.tokenStore.get(term)).length if (documentFrequency === 0) { return 1 } else { return 1 + Math.log(this.tokenStore.length / documentFrequency) } } /** * Searches the index using the passed query. * * Queries should be a string, multiple words are allowed and will lead to an * AND based query, e.g. `idx.search('foo bar')` will run a search for * documents containing both 'foo' and 'bar'. * * All query tokens are passed through the same pipeline that document tokens * are passed through, so any language processing involved will be run on every * query term. * * Each query term is expanded, so that the term 'he' might be expanded to * 'hello' and 'help' if those terms were already included in the index. * * Matching documents are returned as an array of objects, each object contains * the matching document ref, as set for this index, and the similarity score * for this document against the query. * * @param {String} query The query to search the index with. * @returns {Object} * @see Index.prototype.idf * @see Index.prototype.documentVector * @memberOf Index */ lunr.Index.prototype.search = function (query) { var queryTokens = this.pipeline.run(lunr.tokenizer(query)), queryArr = new Array (this.corpusTokens.length), documentSets = [], fieldBoosts = this._fields.reduce(function (memo, f) { return memo + f.boost }, 0) var hasSomeToken = queryTokens.some(function (token) { return this.tokenStore.has(token) }, this) if (!hasSomeToken) return [] queryTokens .forEach(function (token, i, tokens) { var tf = 1 / tokens.length * this._fields.length * fieldBoosts, self = this var set = this.tokenStore.expand(token).reduce(function (memo, key) { var pos = self.corpusTokens.indexOf(key), idf = self.idf(key), exactMatchBoost = (key === token ? 10 : 1), set = new lunr.SortedSet // calculate the query tf-idf score for this token // applying an exactMatchBoost to ensure these rank // higher than expanded terms if (pos > -1) queryArr[pos] = tf * idf * exactMatchBoost // add all the documents that have this key into a set Object.keys(self.tokenStore.get(key)).forEach(function (ref) { set.add(ref) }) return memo.union(set) }, new lunr.SortedSet) documentSets.push(set) }, this) var documentSet = documentSets.reduce(function (memo, set) { return memo.intersect(set) }) var queryVector = new lunr.Vector (queryArr) return documentSet .map(function (ref) { return { ref: ref, score: queryVector.similarity(this.documentVector(ref)) } }, this) .sort(function (a, b) { return b.score - a.score }) } /** * Generates a vector containing all the tokens in the document matching the * passed documentRef. * * The vector contains the tf-idf score for each token contained in the * document with the passed documentRef. The vector will contain an element * for every token in the indexes corpus, if the document does not contain that * token the element will be 0. * * @param {Object} documentRef The ref to find the document with. * @returns {lunr.Vector} * @private * @memberOf Index */ lunr.Index.prototype.documentVector = function (documentRef) { var documentTokens = this.documentStore.get(documentRef), documentTokensLength = documentTokens.length, documentArr = new Array (this.corpusTokens.length) for (var i = 0; i < documentTokensLength; i++) { var token = documentTokens.elements[i], tf = this.tokenStore.get(token)[documentRef].tf, idf = this.idf(token) documentArr[this.corpusTokens.indexOf(token)] = tf * idf }; return new lunr.Vector (documentArr) } /** * Returns a representation of the index ready for serialisation. * * @returns {Object} * @memberOf Index */ lunr.Index.prototype.toJSON = function () { return { version: lunr.version, fields: this._fields, ref: this._ref, documentStore: this.documentStore.toJSON(), tokenStore: this.tokenStore.toJSON(), corpusTokens: this.corpusTokens.toJSON(), pipeline: this.pipeline.toJSON() } } /*! * lunr.Store * Copyright (C) 2013 Oliver Nightingale */ /** * lunr.Store is a simple key-value store used for storing sets of tokens for * documents stored in index. * * @constructor * @module */ lunr.Store = function () { this.store = {} this.length = 0 } /** * Loads a previously serialised store * * @param {Object} serialisedData The serialised store to load. * @returns {lunr.Store} * @memberOf Store */ lunr.Store.load = function (serialisedData) { var store = new this store.length = serialisedData.length store.store = Object.keys(serialisedData.store).reduce(function (memo, key) { memo[key] = lunr.SortedSet.load(serialisedData.store[key]) return memo }, {}) return store } /** * Stores the given tokens in the store against the given id. * * @param {Object} id The key used to store the tokens against. * @param {Object} tokens The tokens to store against the key. * @memberOf Store */ lunr.Store.prototype.set = function (id, tokens) { this.store[id] = tokens this.length = Object.keys(this.store).length } /** * Retrieves the tokens from the store for a given key. * * @param {Object} id The key to lookup and retrieve from the store. * @returns {Object} * @memberOf Store */ lunr.Store.prototype.get = function (id) { return this.store[id] } /** * Checks whether the store contains a key. * * @param {Object} id The id to look up in the store. * @returns {Boolean} * @memberOf Store */ lunr.Store.prototype.has = function (id) { return id in this.store } /** * Removes the value for a key in the store. * * @param {Object} id The id to remove from the store. * @memberOf Store */ lunr.Store.prototype.remove = function (id) { if (!this.has(id)) return delete this.store[id] this.length-- } /** * Returns a representation of the store ready for serialisation. * * @returns {Object} * @memberOf Store */ lunr.Store.prototype.toJSON = function () { return { store: this.store, length: this.length } } /*! * lunr.stemmer * Copyright (C) 2013 Oliver Nightingale * Includes code from - http://tartarus.org/~martin/PorterStemmer/js.txt */ /** * lunr.stemmer is an english language stemmer, this is a JavaScript * implementation of the PorterStemmer taken from http://tartaurs.org/~martin * * @module * @param {String} str The string to stem * @returns {String} * @see lunr.Pipeline */ lunr.stemmer = (function(){ var step2list = { "ational" : "ate", "tional" : "tion", "enci" : "ence", "anci" : "ance", "izer" : "ize", "bli" : "ble", "alli" : "al", "entli" : "ent", "eli" : "e", "ousli" : "ous", "ization" : "ize", "ation" : "ate", "ator" : "ate", "alism" : "al", "iveness" : "ive", "fulness" : "ful", "ousness" : "ous", "aliti" : "al", "iviti" : "ive", "biliti" : "ble", "logi" : "log" }, step3list = { "icate" : "ic", "ative" : "", "alize" : "al", "iciti" : "ic", "ical" : "ic", "ful" : "", "ness" : "" }, c = "[^aeiou]", // consonant v = "[aeiouy]", // vowel C = c + "[^aeiouy]*", // consonant sequence V = v + "[aeiou]*", // vowel sequence mgr0 = "^(" + C + ")?" + V + C, // [C]VC... is m>0 meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$", // [C]VC[V] is m=1 mgr1 = "^(" + C + ")?" + V + C + V + C, // [C]VCVC... is m>1 s_v = "^(" + C + ")?" + v; // vowel in stem return function (w) { var stem, suffix, firstch, re, re2, re3, re4; if (w.length < 3) { return w; } firstch = w.substr(0,1); if (firstch == "y") { w = firstch.toUpperCase() + w.substr(1); } // Step 1a re = /^(.+?)(ss|i)es$/; re2 = /^(.+?)([^s])s$/; if (re.test(w)) { w = w.replace(re,"$1$2"); } else if (re2.test(w)) { w = w.replace(re2,"$1$2"); } // Step 1b re = /^(.+?)eed$/; re2 = /^(.+?)(ed|ing)$/; if (re.test(w)) { var fp = re.exec(w); re = new RegExp(mgr0); if (re.test(fp[1])) { re = /.$/; w = w.replace(re,""); } } else if (re2.test(w)) { var fp = re2.exec(w); stem = fp[1]; re2 = new RegExp(s_v); if (re2.test(stem)) { w = stem; re2 = /(at|bl|iz)$/; re3 = new RegExp("([^aeiouylsz])\\1$"); re4 = new RegExp("^" + C + v + "[^aeiouwxy]$"); if (re2.test(w)) { w = w + "e"; } else if (re3.test(w)) { re = /.$/; w = w.replace(re,""); } else if (re4.test(w)) { w = w + "e"; } } } // Step 1c re = /^(.+?)y$/; if (re.test(w)) { var fp = re.exec(w); stem = fp[1]; re = new RegExp(s_v); if (re.test(stem)) { w = stem + "i"; } } // Step 2 re = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/; if (re.test(w)) { var fp = re.exec(w); stem = fp[1]; suffix = fp[2]; re = new RegExp(mgr0); if (re.test(stem)) { w = stem + step2list[suffix]; } } // Step 3 re = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/; if (re.test(w)) { var fp = re.exec(w); stem = fp[1]; suffix = fp[2]; re = new RegExp(mgr0); if (re.test(stem)) { w = stem + step3list[suffix]; } } // Step 4 re = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/; re2 = /^(.+?)(s|t)(ion)$/; if (re.test(w)) { var fp = re.exec(w); stem = fp[1]; re = new RegExp(mgr1); if (re.test(stem)) { w = stem; } } else if (re2.test(w)) { var fp = re2.exec(w); stem = fp[1] + fp[2]; re2 = new RegExp(mgr1); if (re2.test(stem)) { w = stem; } } // Step 5 re = /^(.+?)e$/; if (re.test(w)) { var fp = re.exec(w); stem = fp[1]; re = new RegExp(mgr1); re2 = new RegExp(meq1); re3 = new RegExp("^" + C + v + "[^aeiouwxy]$"); if (re.test(stem) || (re2.test(stem) && !(re3.test(stem)))) { w = stem; } } re = /ll$/; re2 = new RegExp(mgr1); if (re.test(w) && re2.test(w)) { re = /.$/; w = w.replace(re,""); } // and turn initial Y back to y if (firstch == "y") { w = firstch.toLowerCase() + w.substr(1); } return w; } })(); lunr.Pipeline.registerFunction(lunr.stemmer, 'stemmer') /*! * lunr.stopWordFilter * Copyright (C) 2013 Oliver Nightingale */ /** * lunr.stopWordFilter is an English language stop word list filter, any words * contained in the list will not be passed through the filter. * * This is intended to be used in the Pipeline. If the token does not pass the * filter then undefined will be returned. * * @module * @param {String} token The token to pass through the filter * @returns {String} * @see lunr.Pipeline */ lunr.stopWordFilter = function (token) { if (lunr.stopWordFilter.stopWords.indexOf(token) === -1) return token } lunr.stopWordFilter.stopWords = new lunr.SortedSet lunr.stopWordFilter.stopWords.length = 119 lunr.stopWordFilter.stopWords.elements = [ "a", "able", "about", "across", "after", "all", "almost", "also", "am", "among", "an", "and", "any", "are", "as", "at", "be", "because", "been", "but", "by", "can", "cannot", "could", "dear", "did", "do", "does", "either", "else", "ever", "every", "for", "from", "get", "got", "had", "has", "have", "he", "her", "hers", "him", "his", "how", "however", "i", "if", "in", "into", "is", "it", "its", "just", "least", "let", "like", "likely", "may", "me", "might", "most", "must", "my", "neither", "no", "nor", "not", "of", "off", "often", "on", "only", "or", "other", "our", "own", "rather", "said", "say", "says", "she", "should", "since", "so", "some", "than", "that", "the", "their", "them", "then", "there", "these", "they", "this", "tis", "to", "too", "twas", "us", "wants", "was", "we", "were", "what", "when", "where", "which", "while", "who", "whom", "why", "will", "with", "would", "yet", "you", "your" ] lunr.Pipeline.registerFunction(lunr.stopWordFilter, 'stopWordFilter') /*! * lunr.stemmer * Copyright (C) 2013 Oliver Nightingale * Includes code from - http://tartarus.org/~martin/PorterStemmer/js.txt */ /** * lunr.TokenStore is used for efficient storing and lookup of the reverse * index of token to document ref. * * @constructor */ lunr.TokenStore = function () { this.root = { docs: {} } this.length = 0 } /** * Loads a previously serialised token store * * @param {Object} serialisedData The serialised token store to load. * @returns {lunr.TokenStore} * @memberOf TokenStore */ lunr.TokenStore.load = function (serialisedData) { var store = new this store.root = serialisedData.root store.length = serialisedData.length return store } /** * Adds a new token doc pair to the store. * * By default this function starts at the root of the current store, however * it can start at any node of any token store if required. * * @param {String} token The token to store the doc under * @param {Object} doc The doc to store against the token * @param {Object} root An optional node at which to start looking for the * correct place to enter the doc, by default the root of this lunr.TokenStore * is used. * @memberOf TokenStore */ lunr.TokenStore.prototype.add = function (token, doc, root) { var root = root || this.root, key = token[0], rest = token.slice(1) if (!(key in root)) root[key] = {docs: {}} if (rest.length === 0) { root[key].docs[doc.ref] = doc this.length += 1 return } else { return this.add(rest, doc, root[key]) } } /** * Checks whether this key is contained within this lunr.TokenStore. * * By default this function starts at the root of the current store, however * it can start at any node of any token store if required. * * @param {String} token The token to check for * @param {Object} root An optional node at which to start * @memberOf TokenStore */ lunr.TokenStore.prototype.has = function (token, root) { var root = root || this.root, key = token[0], rest = token.slice(1) if (!(key in root)) return false if (rest.length === 0) { return true } else { return this.has(rest, root[key]) } } /** * Retrieve a node from the token store for a given token. * * By default this function starts at the root of the current store, however * it can start at any node of any token store if required. * * @param {String} token The token to get the node for. * @param {Object} root An optional node at which to start. * @returns {Object} * @see TokenStore.prototype.get * @memberOf TokenStore */ lunr.TokenStore.prototype.getNode = function (token, root) { var root = root || this.root, key = token[0], rest = token.slice(1) if (!(key in root)) return {} if (rest.length === 0) { return root[key] } else { return this.getNode(rest, root[key]) } } /** * Retrieve the documents for a node for the given token. * * By default this function starts at the root of the current store, however * it can start at any node of any token store if required. * * @param {String} token The token to get the documents for. * @param {Object} root An optional node at which to start. * @returns {Object} * @memberOf TokenStore */ lunr.TokenStore.prototype.get = function (token, root) { return this.getNode(token, root).docs || {} } /** * Remove the document identified by ref from the token in the store. * * By default this function starts at the root of the current store, however * it can start at any node of any token store if required. * * @param {String} token The token to get the documents for. * @param {String} ref The ref of the document to remove from this token. * @param {Object} root An optional node at which to start. * @returns {Object} * @memberOf TokenStore */ lunr.TokenStore.prototype.remove = function (token, ref, root) { var root = root || this.root, key = token[0], rest = token.slice(1) if (!(key in root)) return if (rest.length === 0) { delete root[key].docs[ref] } else { return this.remove(rest, ref, root[key]) } } /** * Find all the possible suffixes of the passed token using tokens * currently in the store. * * @param {String} token The token to expand. * @returns {Array} * @memberOf TokenStore */ lunr.TokenStore.prototype.expand = function (token, memo) { var root = this.getNode(token), docs = root.docs || {}, memo = memo || [] if (Object.keys(docs).length) memo.push(token) Object.keys(root) .forEach(function (key) { if (key === 'docs') return memo.concat(this.expand(token + key, memo)) }, this) return memo } /** * Returns a representation of the token store ready for serialisation. * * @returns {Object} * @memberOf TokenStore */ lunr.TokenStore.prototype.toJSON = function () { return { root: this.root, length: this.length } }