Chromium Code Reviews| Index: appengine/swarming/elements/third_party/common-strings.js |
| diff --git a/appengine/swarming/elements/third_party/common-strings.js b/appengine/swarming/elements/third_party/common-strings.js |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..072e25c97de23c097cdd47adbe4fb4cabb4a4c30 |
| --- /dev/null |
| +++ b/appengine/swarming/elements/third_party/common-strings.js |
| @@ -0,0 +1,1126 @@ |
| +(function(){ |
|
jcgregorio
2016/10/03 15:15:27
What is the license for these libraries?
Why is t
|
| + |
| +var customUtils = {}; |
| +/** |
| + * Return an array with the numbers from 0 to n-1, in a random order |
| + */ |
| +function getRandomArray (n) { |
| + var res, next; |
| + |
| + if (n === 0) { return []; } |
| + if (n === 1) { return [0]; } |
| + |
| + res = getRandomArray(n - 1); |
| + next = Math.floor(Math.random() * n); |
| + res.splice(next, 0, n - 1); // Add n-1 at a random position in the array |
| + |
| + return res; |
| +}; |
| +customUtils.getRandomArray = getRandomArray; |
| + |
| + |
| +/* |
| + * Default compareKeys function will work for numbers, strings and dates |
| + */ |
| +function defaultCompareKeysFunction (a, b) { |
| + if (a < b) { return -1; } |
| + if (a > b) { return 1; } |
| + if (a === b) { return 0; } |
| + |
| + throw { message: "Couldn't compare elements", a: a, b: b }; |
| +} |
| +customUtils.defaultCompareKeysFunction = defaultCompareKeysFunction; |
| + |
| + |
| +/** |
| + * Check whether two values are equal (used in non-unique deletion) |
| + */ |
| +function defaultCheckValueEquality (a, b) { |
| + return a === b; |
| +} |
| +customUtils.defaultCheckValueEquality = defaultCheckValueEquality; |
| + |
| +/** |
| + * Simple binary search tree |
| + */ |
| + |
| + |
| +/** |
| + * Constructor |
| + * @param {Object} options Optional |
| + * @param {Boolean} options.unique Whether to enforce a 'unique' constraint on the key or not |
| + * @param {Key} options.key Initialize this BST's key with key |
| + * @param {Value} options.value Initialize this BST's data with [value] |
| + * @param {Function} options.compareKeys Initialize this BST's compareKeys |
| + */ |
| +function BinarySearchTree (options) { |
| + options = options || {}; |
| + |
| + this.left = null; |
| + this.right = null; |
| + this.parent = options.parent !== undefined ? options.parent : null; |
| + if (options.hasOwnProperty('key')) { this.key = options.key; } |
| + this.data = options.hasOwnProperty('value') ? [options.value] : []; |
| + this.unique = options.unique || false; |
| + |
| + this.compareKeys = options.compareKeys || customUtils.defaultCompareKeysFunction; |
| + this.checkValueEquality = options.checkValueEquality || customUtils.defaultCheckValueEquality; |
| +} |
| + |
| + |
| +// ================================ |
| +// Methods used to test the tree |
| +// ================================ |
| + |
| + |
| +/** |
| + * Get the descendant with max key |
| + */ |
| +BinarySearchTree.prototype.getMaxKeyDescendant = function () { |
| + if (this.right) { |
| + return this.right.getMaxKeyDescendant(); |
| + } else { |
| + return this; |
| + } |
| +}; |
| + |
| + |
| +/** |
| + * Get the maximum key |
| + */ |
| +BinarySearchTree.prototype.getMaxKey = function () { |
| + return this.getMaxKeyDescendant().key; |
| +}; |
| + |
| + |
| +/** |
| + * Get the descendant with min key |
| + */ |
| +BinarySearchTree.prototype.getMinKeyDescendant = function () { |
| + if (this.left) { |
| + return this.left.getMinKeyDescendant() |
| + } else { |
| + return this; |
| + } |
| +}; |
| + |
| + |
| +/** |
| + * Get the minimum key |
| + */ |
| +BinarySearchTree.prototype.getMinKey = function () { |
| + return this.getMinKeyDescendant().key; |
| +}; |
| + |
| + |
| +/** |
| + * Check that all nodes (incl. leaves) fullfil condition given by fn |
| + * test is a function passed every (key, data) and which throws if the condition is not met |
| + */ |
| +BinarySearchTree.prototype.checkAllNodesFullfillCondition = function (test) { |
| + if (!this.hasOwnProperty('key')) { return; } |
| + |
| + test(this.key, this.data); |
| + if (this.left) { this.left.checkAllNodesFullfillCondition(test); } |
| + if (this.right) { this.right.checkAllNodesFullfillCondition(test); } |
| +}; |
| + |
| + |
| +/** |
| + * Check that the core BST properties on node ordering are verified |
| + * Throw if they aren't |
| + */ |
| +BinarySearchTree.prototype.checkNodeOrdering = function () { |
| + var self = this; |
| + |
| + if (!this.hasOwnProperty('key')) { return; } |
| + |
| + if (this.left) { |
| + this.left.checkAllNodesFullfillCondition(function (k) { |
| + if (self.compareKeys(k, self.key) >= 0) { |
| + throw 'Tree with root ' + self.key + ' is not a binary search tree'; |
| + } |
| + }); |
| + this.left.checkNodeOrdering(); |
| + } |
| + |
| + if (this.right) { |
| + this.right.checkAllNodesFullfillCondition(function (k) { |
| + if (self.compareKeys(k, self.key) <= 0) { |
| + throw 'Tree with root ' + self.key + ' is not a binary search tree'; |
| + } |
| + }); |
| + this.right.checkNodeOrdering(); |
| + } |
| +}; |
| + |
| + |
| +/** |
| + * Check that all pointers are coherent in this tree |
| + */ |
| +BinarySearchTree.prototype.checkInternalPointers = function () { |
| + if (this.left) { |
| + if (this.left.parent !== this) { throw 'Parent pointer broken for key ' + this.key; } |
| + this.left.checkInternalPointers(); |
| + } |
| + |
| + if (this.right) { |
| + if (this.right.parent !== this) { throw 'Parent pointer broken for key ' + this.key; } |
| + this.right.checkInternalPointers(); |
| + } |
| +}; |
| + |
| + |
| +/** |
| + * Check that a tree is a BST as defined here (node ordering and pointer references) |
| + */ |
| +BinarySearchTree.prototype.checkIsBST = function () { |
| + this.checkNodeOrdering(); |
| + this.checkInternalPointers(); |
| + if (this.parent) { throw "The root shouldn't have a parent"; } |
| +}; |
| + |
| + |
| +/** |
| + * Get number of keys inserted |
| + */ |
| +BinarySearchTree.prototype.getNumberOfKeys = function () { |
| + var res; |
| + |
| + if (!this.hasOwnProperty('key')) { return 0; } |
| + |
| + res = 1; |
| + if (this.left) { res += this.left.getNumberOfKeys(); } |
| + if (this.right) { res += this.right.getNumberOfKeys(); } |
| + |
| + return res; |
| +}; |
| + |
| + |
| + |
| +// ============================================ |
| +// Methods used to actually work on the tree |
| +// ============================================ |
| + |
| +/** |
| + * Create a BST similar (i.e. same options except for key and value) to the current one |
| + * Use the same constructor (i.e. BinarySearchTree, AVLTree etc) |
| + * @param {Object} options see constructor |
| + */ |
| +BinarySearchTree.prototype.createSimilar = function (options) { |
| + options = options || {}; |
| + options.unique = this.unique; |
| + options.compareKeys = this.compareKeys; |
| + options.checkValueEquality = this.checkValueEquality; |
| + |
| + return new this.constructor(options); |
| +}; |
| + |
| + |
| +/** |
| + * Create the left child of this BST and return it |
| + */ |
| +BinarySearchTree.prototype.createLeftChild = function (options) { |
| + var leftChild = this.createSimilar(options); |
| + leftChild.parent = this; |
| + this.left = leftChild; |
| + |
| + return leftChild; |
| +}; |
| + |
| + |
| +/** |
| + * Create the right child of this BST and return it |
| + */ |
| +BinarySearchTree.prototype.createRightChild = function (options) { |
| + var rightChild = this.createSimilar(options); |
| + rightChild.parent = this; |
| + this.right = rightChild; |
| + |
| + return rightChild; |
| +}; |
| + |
| + |
| +/** |
| + * Insert a new element |
| + */ |
| +BinarySearchTree.prototype.insert = function (key, value) { |
| + // Empty tree, insert as root |
| + if (!this.hasOwnProperty('key')) { |
| + this.key = key; |
| + this.data.push(value); |
| + return; |
| + } |
| + |
| + // Same key as root |
| + if (this.compareKeys(this.key, key) === 0) { |
| + if (this.unique) { |
| + throw { message: "Can't insert key " + key + ", it violates the unique constraint" |
| + , key: key |
| + , errorType: 'uniqueViolated' |
| + }; |
| + } else { |
| + this.data.push(value); |
| + } |
| + return; |
| + } |
| + |
| + if (this.compareKeys(key, this.key) < 0) { |
| + // Insert in left subtree |
| + if (this.left) { |
| + this.left.insert(key, value); |
| + } else { |
| + this.createLeftChild({ key: key, value: value }); |
| + } |
| + } else { |
| + // Insert in right subtree |
| + if (this.right) { |
| + this.right.insert(key, value); |
| + } else { |
| + this.createRightChild({ key: key, value: value }); |
| + } |
| + } |
| +}; |
| + |
| + |
| +/** |
| + * Search for all data corresponding to a key |
| + */ |
| +BinarySearchTree.prototype.search = function (key) { |
| + if (!this.hasOwnProperty('key')) { return []; } |
| + |
| + if (this.compareKeys(this.key, key) === 0) { return this.data; } |
| + |
| + if (this.compareKeys(key, this.key) < 0) { |
| + if (this.left) { |
| + return this.left.search(key); |
| + } else { |
| + return []; |
| + } |
| + } else { |
| + if (this.right) { |
| + return this.right.search(key); |
| + } else { |
| + return []; |
| + } |
| + } |
| +}; |
| + |
| + |
| +/** |
| + * Return a function that tells whether a given key matches a lower bound |
| + */ |
| +BinarySearchTree.prototype.getLowerBoundMatcher = function (query) { |
| + var self = this; |
| + |
| + // No lower bound |
| + if (!query.hasOwnProperty('$gt') && !query.hasOwnProperty('$gte')) { |
| + return function () { return true; }; |
| + } |
| + |
| + if (query.hasOwnProperty('$gt') && query.hasOwnProperty('$gte')) { |
| + if (self.compareKeys(query.$gte, query.$gt) === 0) { |
| + return function (key) { return self.compareKeys(key, query.$gt) > 0; }; |
| + } |
| + |
| + if (self.compareKeys(query.$gte, query.$gt) > 0) { |
| + return function (key) { return self.compareKeys(key, query.$gte) >= 0; }; |
| + } else { |
| + return function (key) { return self.compareKeys(key, query.$gt) > 0; }; |
| + } |
| + } |
| + |
| + if (query.hasOwnProperty('$gt')) { |
| + return function (key) { return self.compareKeys(key, query.$gt) > 0; }; |
| + } else { |
| + return function (key) { return self.compareKeys(key, query.$gte) >= 0; }; |
| + } |
| +}; |
| + |
| + |
| +/** |
| + * Return a function that tells whether a given key matches an upper bound |
| + */ |
| +BinarySearchTree.prototype.getUpperBoundMatcher = function (query) { |
| + var self = this; |
| + |
| + // No lower bound |
| + if (!query.hasOwnProperty('$lt') && !query.hasOwnProperty('$lte')) { |
| + return function () { return true; }; |
| + } |
| + |
| + if (query.hasOwnProperty('$lt') && query.hasOwnProperty('$lte')) { |
| + if (self.compareKeys(query.$lte, query.$lt) === 0) { |
| + return function (key) { return self.compareKeys(key, query.$lt) < 0; }; |
| + } |
| + |
| + if (self.compareKeys(query.$lte, query.$lt) < 0) { |
| + return function (key) { return self.compareKeys(key, query.$lte) <= 0; }; |
| + } else { |
| + return function (key) { return self.compareKeys(key, query.$lt) < 0; }; |
| + } |
| + } |
| + |
| + if (query.hasOwnProperty('$lt')) { |
| + return function (key) { return self.compareKeys(key, query.$lt) < 0; }; |
| + } else { |
| + return function (key) { return self.compareKeys(key, query.$lte) <= 0; }; |
| + } |
| +}; |
| + |
| + |
| +// Append all elements in toAppend to array |
| +function append (array, toAppend) { |
| + var i; |
| + |
| + for (i = 0; i < toAppend.length; i += 1) { |
| + array.push(toAppend[i]); |
| + } |
| +} |
| + |
| + |
| +/** |
| + * Get all data for a key between bounds |
| + * Return it in key order |
| + * @param {Object} query Mongo-style query where keys are $lt, $lte, $gt or $gte (other keys are not considered) |
| + * @param {Functions} lbm/ubm matching functions calculated at the first recursive step |
| + */ |
| +BinarySearchTree.prototype.betweenBounds = function (query, lbm, ubm) { |
| + var res = []; |
| + |
| + if (!this.hasOwnProperty('key')) { return []; } // Empty tree |
| + |
| + lbm = lbm || this.getLowerBoundMatcher(query); |
| + ubm = ubm || this.getUpperBoundMatcher(query); |
| + |
| + if (lbm(this.key) && this.left) { append(res, this.left.betweenBounds(query, lbm, ubm)); } |
| + if (lbm(this.key) && ubm(this.key)) { append(res, this.data); } |
| + if (ubm(this.key) && this.right) { append(res, this.right.betweenBounds(query, lbm, ubm)); } |
| + |
| + return res; |
| +}; |
| + |
| + |
| +/** |
| + * Delete the current node if it is a leaf |
| + * Return true if it was deleted |
| + */ |
| +BinarySearchTree.prototype.deleteIfLeaf = function () { |
| + if (this.left || this.right) { return false; } |
| + |
| + // The leaf is itself a root |
| + if (!this.parent) { |
| + delete this.key; |
| + this.data = []; |
| + return true; |
| + } |
| + |
| + if (this.parent.left === this) { |
| + this.parent.left = null; |
| + } else { |
| + this.parent.right = null; |
| + } |
| + |
| + return true; |
| +}; |
| + |
| + |
| +/** |
| + * Delete the current node if it has only one child |
| + * Return true if it was deleted |
| + */ |
| +BinarySearchTree.prototype.deleteIfOnlyOneChild = function () { |
| + var child; |
| + |
| + if (this.left && !this.right) { child = this.left; } |
| + if (!this.left && this.right) { child = this.right; } |
| + if (!child) { return false; } |
| + |
| + // Root |
| + if (!this.parent) { |
| + this.key = child.key; |
| + this.data = child.data; |
| + |
| + this.left = null; |
| + if (child.left) { |
| + this.left = child.left; |
| + child.left.parent = this; |
| + } |
| + |
| + this.right = null; |
| + if (child.right) { |
| + this.right = child.right; |
| + child.right.parent = this; |
| + } |
| + |
| + return true; |
| + } |
| + |
| + if (this.parent.left === this) { |
| + this.parent.left = child; |
| + child.parent = this.parent; |
| + } else { |
| + this.parent.right = child; |
| + child.parent = this.parent; |
| + } |
| + |
| + return true; |
| +}; |
| + |
| + |
| +/** |
| + * Delete a key or just a value |
| + * @param {Key} key |
| + * @param {Value} value Optional. If not set, the whole key is deleted. If set, only this value is deleted |
| + */ |
| +BinarySearchTree.prototype.delete = function (key, value) { |
| + var newData = [], replaceWith |
| + , self = this |
| + ; |
| + |
| + if (!this.hasOwnProperty('key')) { return; } |
| + |
| + if (this.compareKeys(key, this.key) < 0) { |
| + if (this.left) { this.left.delete(key, value); } |
| + return; |
| + } |
| + |
| + if (this.compareKeys(key, this.key) > 0) { |
| + if (this.right) { this.right.delete(key, value); } |
| + return; |
| + } |
| + |
| + if (!this.compareKeys(key, this.key) === 0) { return; } |
| + |
| + // Delete only a value |
| + if (this.data.length > 1 && value !== undefined) { |
| + this.data.forEach(function (d) { |
| + if (!self.checkValueEquality(d, value)) { newData.push(d); } |
| + }); |
| + self.data = newData; |
| + return; |
| + } |
| + |
| + // Delete the whole node |
| + if (this.deleteIfLeaf()) { |
| + return; |
| + } |
| + if (this.deleteIfOnlyOneChild()) { |
| + return; |
| + } |
| + |
| + // We are in the case where the node to delete has two children |
| + if (Math.random() >= 0.5) { // Randomize replacement to avoid unbalancing the tree too much |
| + // Use the in-order predecessor |
| + replaceWith = this.left.getMaxKeyDescendant(); |
| + |
| + this.key = replaceWith.key; |
| + this.data = replaceWith.data; |
| + |
| + if (this === replaceWith.parent) { // Special case |
| + this.left = replaceWith.left; |
| + if (replaceWith.left) { replaceWith.left.parent = replaceWith.parent; } |
| + } else { |
| + replaceWith.parent.right = replaceWith.left; |
| + if (replaceWith.left) { replaceWith.left.parent = replaceWith.parent; } |
| + } |
| + } else { |
| + // Use the in-order successor |
| + replaceWith = this.right.getMinKeyDescendant(); |
| + |
| + this.key = replaceWith.key; |
| + this.data = replaceWith.data; |
| + |
| + if (this === replaceWith.parent) { // Special case |
| + this.right = replaceWith.right; |
| + if (replaceWith.right) { replaceWith.right.parent = replaceWith.parent; } |
| + } else { |
| + replaceWith.parent.left = replaceWith.right; |
| + if (replaceWith.right) { replaceWith.right.parent = replaceWith.parent; } |
| + } |
| + } |
| +}; |
| + |
| + |
| +/** |
| + * Execute a function on every node of the tree, in key order |
| + * @param {Function} fn Signature: node. Most useful will probably be node.key and node.data |
| + */ |
| +BinarySearchTree.prototype.executeOnEveryNode = function (fn) { |
| + if (this.left) { this.left.executeOnEveryNode(fn); } |
| + fn(this); |
| + if (this.right) { this.right.executeOnEveryNode(fn); } |
| +}; |
| + |
| + |
| +/** |
| + * Pretty print a tree |
| + * @param {Boolean} printData To print the nodes' data along with the key |
| + */ |
| +BinarySearchTree.prototype.prettyPrint = function (printData, spacing) { |
| + spacing = spacing || ""; |
| + |
| + console.log(spacing + "* " + this.key); |
| + if (printData) { console.log(spacing + "* " + this.data); } |
| + |
| + if (!this.left && !this.right) { return; } |
| + |
| + if (this.left) { |
| + this.left.prettyPrint(printData, spacing + " "); |
| + } else { |
| + console.log(spacing + " *"); |
| + } |
| + if (this.right) { |
| + this.right.prettyPrint(printData, spacing + " "); |
| + } else { |
| + console.log(spacing + " *"); |
| + } |
| +}; |
| + |
| +/** |
| +* Created by Hanwen on 01.07.2014. |
| +*/ |
| + |
| +/** |
| + * Constructor |
| + * @param {Object} [options] config map |
| + * @param {Number} options.minLength the shortest length of the fragment. |
| + * @param {Number} options.minOccurrence the minimum occurrence of the fragment. |
| + * @param {Boolean} options.debug whether show the console message and set the minLength and minOccurrence. |
| + * @constructor |
| + */ |
| +function SuffixTrie(options) { |
| + this.options = options || {}; |
| + |
| + this.structure = {}; |
| + this.debug = this.options.debug === undefined ? false : options.debug; |
| + |
| + |
| + this.minLENGTH = this.options.minLength === undefined ? 3 : options.minLength; |
| + this.minOccurrence = this.options.minOccurrence === undefined ? 2 : options.minOccurrence; |
| + this.isByLength = this.options.byLength === undefined ? false : options.byLength; |
| + |
| + //this.options.limit is a number |
| + this.save = []; |
| + this.array = null; |
| + this.labelArray = null; |
| + this.fragmentsArray = null; |
| + this.fragmentTrie = {}; |
| + this.rebuildArray; |
| + //default test environment |
| + |
| +} |
| + |
| +//help function to clear the trie |
| +SuffixTrie.prototype.refresh = function(){ |
| + this.structure = {}; |
| + this.save = []; |
| + this.array = null; |
| + this.labelArray = null; |
| + this.fragmentsArray = null; |
| + this.fragmentTrie = {}; |
| + this.rebuildArray; |
| +}; |
| + |
| + |
| +//help function for quick weight |
| +SuffixTrie.prototype.weigh = function(){ |
| + if(this.isByLength){ |
| + return this.weighByMax() |
| + }else{ |
| + return this.weighByAverage() |
| + } |
| +} |
| + |
| +// ====================================================================================================================== |
| +// ==================================================== Public Functions ================================================ |
| +// ====================================================================================================================== |
| +/** |
| + * add fragment string to the trie |
| + * @param array {Array} adding word |
| + */ |
| +SuffixTrie.prototype.build = function (array) { |
| + var debug = this.debug; |
| + this.buildLabelTrie(array); |
| + this.optimize(this.structure); |
| + if(debug) console.log('after optimization our label trie of array length ' + this.array.length + ' is ', this.structure); |
| + |
| + this.listLabel(); |
| + if(debug) console.log('get the compressed label array (without duplicate fragments ) of array length ' + this.array.length + ' is ', this.labelArray); |
| + if(debug) console.log('and fragments array of length ' + this.fragmentsArray.length + ' is ', this.fragmentsArray); |
| + |
| + this.clearRedundantFragment(); |
| + if(debug) console.log('get the cleared label array (without duplicate fragments ) of array length ' + this.array.length + ' is ', this.labelArray); |
| + if(debug) console.log('and cleared fragments array of length ' + this.fragmentsArray.length + ' is ', this.fragmentsArray); |
| + |
| + this.rebuild(); |
| + if(debug) console.log('rebuild ended' , this.rebuildArray); |
| +}; |
| + |
| +SuffixTrie.prototype.buildLabelTrie = function (array) { |
| + this.array = array; |
| + var root = this.structure; |
| + var LENGTH = this.minLENGTH; |
| + |
| + array.forEach(function (word, index) { |
| + var first = true; |
| + //used to store the loop element, for connect the suffixes, e.g. 'foob' next is 'oob', by this connection, we get all the substrings. |
| + var last = {}; |
| + |
| + //loop every suffix in one word, e.g. 'oss' in 'boss' |
| + for (var i = 0, l = word.length; i <= l - LENGTH; i++) { |
| + var cur = root; |
| + var suffix = word.substring(i); |
| + var letters = suffix.split(""); |
| + |
| + //loop every letter in the suffix, e.g. 'o' in 'oss' |
| + for (var j = 0; j < letters.length; j++) { |
| + var letter = letters[j], pos = cur[ letter ]; |
| + |
| + if (j === letters.length - 1) { |
| + if (pos == null) { |
| + //create new node and add the information. |
| + cur[letter] = {source: [index], listed: false}; |
| + } else if (pos.hasOwnProperty('source')) { |
| + //just add occurrence |
| + pos["source"].push(index); |
| + } else { |
| + //node already existed, add information. |
| + cur[letter]['source'] = [index]; |
| + cur[letter]['listed'] = false; |
| + } |
| + cur = cur[letter]; |
| + |
| + if (!first) { |
| + last['next'] = suffix; |
| + } else { |
| + first = false; |
| + } |
| + |
| + last = cur; |
| + } else if (pos == null) { |
| + //create node |
| + cur = cur[letter] = {}; |
| + } else { |
| + //no creation, loop to next node |
| + cur = cur[ letter ]; |
| + } |
| + } |
| + } |
| + }); |
| + |
| +}; |
| + |
| +/** |
| + * add the origin for the split node and trunk node && integrate prefix in each branch |
| + * @param {Object} root the start node |
| + * @param {Number} [rootLevel] the start level of the algorithm, should be -1 |
| + * @returns {Array} array with the origin of the word |
| + * |
| + */ |
| +SuffixTrie.prototype.optimize = function (root, rootLevel) { |
| + var occurrence = this.minOccurrence; |
| + //self origin indexes |
| + var self_save = []; |
| + rootLevel = rootLevel === undefined ? -1 : rootLevel; |
| + rootLevel++; |
| + //whether the word is long enough. ignored the short part. |
| + var is_allowed = rootLevel >= this.minLENGTH; |
| + |
| + for (var child in root) { |
| + |
| + if (root.hasOwnProperty(child)) { //&& child !== 'next' && child !== 'level' |
| + //loop to new child and combine the index information |
| + if (child.length === 1) { |
| + |
| + // returned indexes from children (iterate from the leaf) |
| + var children_save = this.optimize(root[child], rootLevel); |
| + if (is_allowed) { |
| + self_save = self_save.concat(children_save); |
| + } |
| + } else if (child === 'source' && is_allowed) { |
| + self_save = self_save.concat(root['source']); |
| + } |
| + } |
| + } |
| + |
| + // this part is to test if the fragment fulfil the occurrence requirement, delete if not, which reduce time significantly. |
| + self_save = uniqueArray(self_save); |
| + var isEnoughOccurred = self_save.length >= occurrence; |
| + is_allowed = is_allowed && isEnoughOccurred; |
| + |
| + if (is_allowed) { |
| + //leaf will at least has property of 'listed' and 'next' |
| + var is_SoleNode = Object.keys(root).length === 1; |
| + if (!is_SoleNode) { |
| + //this is a split node or a flaged node, add information include source here |
| + root['source'] = self_save; //update it |
| + root['level'] = rootLevel; |
| + root['listed'] = false; //indicate this node should be check |
| + root['weight'] = self_save.length * rootLevel; |
| + self_save = []; //clear the array: this is important if we don't want the node with former indexes! |
| + } else { |
| + //this is just a sole node with one child, do nothing |
| + } |
| + } |
| + else { |
| + //it is lower than the request level , or it is a leaf node, so not calculate here |
| + delete root['source']; |
| + } |
| + |
| + return self_save; |
| +}; |
| + |
| +/** |
| + * list the repeat part with certain order && integrate suffix in 'next' branch |
| + * @param {Number} [start] start point in the array [0 - array.length] |
| + */ |
| +SuffixTrie.prototype.listLabel = function (start) { |
| + var array = this.array; |
| + var root = this.structure; |
| + var label_array = []; |
| + var fragments_array = []; |
| + var length = this.minLENGTH; |
| + var occurrence = this.minOccurrence; |
| + start = start === undefined ? 1 : start; |
| + |
| + //loop from the certain index, lead to different rebuild array. |
| + for (var index = start - 1, i = 0; i < array.length; index++, index = index % (array.length), i++) { |
| + var word = array[index]; |
| + |
| + var fragments = {}; |
| + //skip the short word, just push the empty object. |
| + if (word.length < length) { |
| + label_array.push(fragments); |
| + continue; |
| + } |
| + |
| + findFragments(word, fragments, root, length, occurrence, false); |
| + |
| + //accumulate the fragment to another array. |
| + for (var fragment in fragments) { |
| + if (fragments.hasOwnProperty(fragment)) { |
| + //get all the origin label index and push them into fragment array with default order |
| + var fragmentsArrayIndex = fragments_array.push(fragments[fragment]) - 1; |
| + fragments_array[fragmentsArrayIndex]['name'] = fragment; |
| + } |
| + } |
| + |
| + //build another array to map each label and all of its fragments. |
| + label_array.push(fragments); |
| + } |
| + this.labelArray = label_array; |
| + this.fragmentsArray = fragments_array; |
| + return label_array; |
| +}; |
| + |
| +/** |
| + * rebuild the label array, make sure every label has all of its own fragments |
| + */ |
| +SuffixTrie.prototype.rebuild = function () { |
| + var rebuildArray = JSON.parse(JSON.stringify(this.labelArray));//deep copy array |
| + rebuildArray.forEach(function (object, index) { |
| + for (var fragment in object) { |
| + if (object.hasOwnProperty(fragment)) { |
| + object[fragment]['source'].forEach(function (labelIndex) { |
| + if (labelIndex > index) { |
| + rebuildArray[labelIndex][fragment] = object[fragment]; |
| + } |
| + }); |
| + } |
| + } |
| + }); |
| + this.rebuildArray = rebuildArray; |
| +}; |
| + |
| +/** |
| + * weight the fragments and order them by the product of their occurrence and length, |
| + * should use this function after the build and rebuild. |
| + */ |
| +SuffixTrie.prototype.weighByAverage = function () { |
| + var debug = this.debug; |
| + var fragmentsArray = this.fragmentsArray; |
| + var fragments_Num = this.fragmentsArray.length; |
| + fragmentsArray.sort(function (f1, f2) { |
| + return f2['weight'] - f1['weight']; |
| + }); |
| + if (debug) console.log('weigh by average: result of length ' + fragments_Num + ' is', fragmentsArray); |
| + |
| + if(typeof this.options.limit == "number") |
| + fragments_Num = this.options.limit < fragments_Num ? this.options.limit : fragments_Num; |
| + |
| + return fragmentsArray.slice(0, fragments_Num); |
| +}; |
| + |
| +/** |
| + * weight the fragments and order them by max label length. |
| + */ |
| +SuffixTrie.prototype.weighByMax = function () { |
| + var debug = this.debug; |
| + |
| + buildFragmentTrie(this.fragmentTrie, this.fragmentsArray); |
| + |
| + var rebuildArray = JSON.parse(JSON.stringify(this.rebuildArray)); |
| +// var rebuildArray = this.rebuildArray; |
| + var fragmentsArray = []; |
| + var fragmentsTrie = this.fragmentTrie; |
| + var fragments_Num = this.fragmentsArray.length; |
| + var label_Lengths = this.array.map(function (s) { |
| + return s.length; |
| + }); |
| + |
| + var label_Tree = new BinarySearchTree(); |
| + label_Lengths.forEach(function (length, index) { |
| + label_Tree.insert(length, index); |
| + }); |
| + |
| + if (typeof this.options.limit == "number") |
| + fragments_Num = this.options.limit < fragments_Num ? this.options.limit : fragments_Num; |
| + |
| + while (fragmentsArray.length < fragments_Num) { |
| + var maxKey = label_Tree.getMaxKey(); |
| + |
| + var labels_In_Key = label_Tree.search(maxKey); |
| + |
| + if (labels_In_Key.length === 0) { |
| + label_Tree.delete(maxKey); |
| + continue; |
| + } |
| + //choose a random labels index with the biggest length |
| + var longest_Label = labels_In_Key[0]; |
| + |
| + //get all the fragments in this label |
| + //in case the there is no fragments under it. |
| + var fragments = Object.keys(rebuildArray[longest_Label]); |
| + if (fragments.length > 0) { |
| + //then find the longest fragment |
| + fragments.sort(function (f1, f2) { |
| + return f2.length - f1.length; |
| + }); |
| + removeTrie(fragments[0], fragmentsTrie, fragmentsArray, label_Lengths, rebuildArray, label_Tree); |
| + } |
| + //in case there is no this label in fragments array -- which could be possible |
| + label_Tree.delete(maxKey, longest_Label); |
| + } |
| + |
| + this.fragmentsArray = fragmentsArray; |
| + if (debug) console.log('weigh by max: result of length ' + fragmentsArray.length + ' is', fragmentsArray); |
| + |
| + return fragmentsArray.slice(0, fragments_Num); |
| +}; |
| + |
| +/** |
| + * delete the redundant fragment, always keep the longer one. |
| + */ |
| +SuffixTrie.prototype.clearRedundantFragment = function () { |
| + var fragments = this.fragmentsArray; |
| + var occurrence = this.minOccurrence; |
| + var newFragmentArray = []; |
| + var labelArray = this.labelArray; |
| + //sort the fragments array from short to long. |
| + fragments.sort(function (a, b) { |
| + return a.name.length - b.name.length; |
| + }); |
| + |
| + fragments.forEach(function (fragment, index) { |
| + //backup fragment's occurrence. |
| + var backupsource = fragment.source.slice(); |
| + //check if the longer contain duplicated occurrence, filter such occurrence |
| + for (var i = index + 1; i < fragments.length; i++) { |
| + var longerFragment = fragments[i]; |
| + if (longerFragment.name.indexOf(fragment.name) !== -1) { |
| + fragment.source = fragment.source.filter(function (i) { |
| + return this.indexOf(i) === -1; |
| + }, longerFragment.source); |
| + } |
| + } |
| + |
| + //build the new fragment array with no duplication |
| + if (fragment.source.length >= occurrence) { |
| + newFragmentArray.push(fragment); |
| + } |
| + else { |
| + if (backupsource.length !== fragment.source.length) { |
| + deleteFragmentInLabelArray(backupsource, fragment.name, labelArray); |
| + } |
| + } |
| + }); |
| + this.fragmentsArray = newFragmentArray; |
| +}; |
| + |
| +// ====================================================================================================================== |
| +// ==================================================== Helper Functions ================================================ |
| +// ====================================================================================================================== |
| + |
| +/** |
| + * Most Important Helper function |
| + * help function to find the all the fragment of one word from the root of the tree, and finally link to another suffix |
| + * @param word searching objects |
| + * @param fragments an object to store the sources of the common fragments in the path to find the word |
| + * @param root start point |
| + * @param length minimum length requirement |
| + * @param occurrence minimum occurrence requirement |
| + * @param [iterate] {Boolean} whether it is the complete word or suffix |
| + */ |
| +function findFragments(word, fragments, root, length, occurrence, iterate) { |
| + |
| + var cur = root; |
| + var letters = word.split(""); |
| + //loop every letter in the suffix, e.g. 'o' in 'oss' |
| + for (var j = 0; j < letters.length; j++) { |
| + var letter = letters[j], pos = cur[ letter ]; |
| + if (j + 1 >= length) { |
| + var fragment = word.substring(0, j + 1); |
| + |
| + if (pos.hasOwnProperty('listed')) { |
| + if (!pos['listed']) { |
| + //if it is a node fulfil our requirement |
| + if (pos.hasOwnProperty('source')) { |
| + fragments[fragment] = {}; |
| + fragments[fragment]['source'] = pos.source; |
| + fragments[fragment]['weight'] = pos['weight']; |
| + |
| + //debug whether we include all the situation. |
| + if (pos['weight'] == null) { |
| + console.warn('escape case! fragment does not has weight property, node is ', fragments[fragment]); |
| + } |
| + if (fragments.hasOwnProperty(fragment)) { |
| + //if this is not the full word (but a suffix) we check the duplication |
| + if (iterate) { |
| + for (var property in fragments) { |
| + if (property !== fragment && fragments.hasOwnProperty(property) && fragments.hasOwnProperty(fragment)) { |
| + if (property.indexOf(fragment) !== -1) { |
| + fragments[fragment]['source'] = fragments[fragment]['source'].filter(function (i) { |
| + return this.indexOf(i) < 0 |
| + }, fragments[property].source); |
| + } |
| + } |
| + } |
| + |
| + if (fragments[fragment]['source'].length < occurrence) { |
| + delete fragments[fragment]; |
| + } |
| + } |
| + } |
| + pos['listed'] = true; |
| + } |
| + //iterate to its suffix from root |
| + if (pos.hasOwnProperty('next')) { |
| + findFragments(pos['next'], fragments, root, length, occurrence, true); |
| + } |
| + } |
| + } |
| + } |
| + cur = pos; |
| + } |
| +} |
| + |
| +/** |
| + * A helper function to delete the reference in Label Array |
| + * @param {Array} source fragment.source |
| + * @param {String} name fragment.name |
| + * @param labelArray LabelArray |
| + */ |
| +function deleteFragmentInLabelArray(source, name, labelArray) { |
| + var label = labelArray[source[0]]; |
| + if (label.hasOwnProperty(name)) { |
| + delete label[name]; |
| + } |
| +} |
| + |
| +/** |
| + * Helper function for build a dictionary trie for searching the fragment. |
| + * @param trie the fragment trie will be build in this variable. |
| + * @param array the fragment array used for building trie. |
| + */ |
| +function buildFragmentTrie(trie, array) { |
| + for (var i = 0; i < array.length; i++) { |
| + |
| + var word = array[i]['name'], letters = word.split(""), cur = trie; |
| + |
| + // Loop through the letters |
| + for (var j = 0; j < letters.length; j++) { |
| + var letter = letters[j]; |
| + |
| + if (!cur.hasOwnProperty(letter)) { |
| + cur[ letter ] = {}; |
| + } |
| + cur = cur[letter]; |
| + if (j === letters.length - 1) { |
| + cur['source'] = array[i]['source']; |
| + cur['name'] = array[i]['name']; |
| + cur['weight'] = array[i]['weight']; |
| + cur['list'] = false; |
| + } |
| + |
| + } |
| + } |
| +} |
| + |
| +/** |
| + * Helper function to remove the fragment from the trie and list it in the result array (fragmentsArray). |
| + * @param word the word to be searched for. |
| + * @param cur the node where the search start. |
| + * @param array the array to store the result fragment |
| + * @param labelLengthArray the array map to the length of the label |
| + * @param rebuildArray this.rebuildArray |
| + * @param label_trie the binary search trie where store all the labels with their length. |
| + * @returns {Number|Boolean} if find return the length of fragment, else return false. |
| + */ |
| +function removeTrie(word, cur, array, labelLengthArray, rebuildArray, label_trie) { |
| + for (var node in cur) { |
| + |
| + if (cur.hasOwnProperty(node) && node.length === 1) { |
| + |
| + if (word.indexOf(node) === 0) { |
| + //if this is the leaf of the trie |
| + if (word.length === 1) { |
| + if (cur[node].hasOwnProperty('list')) { |
| + var fragment = {}; |
| + fragment['source'] = cur[node]['source']; |
| + var fragmentName = cur[node]['name']; |
| + fragment['name'] = fragmentName; |
| + fragment['weight'] = cur[node]['weight']; |
| + delete cur[node]['list']; |
| + //store it into our result |
| + array.push(fragment); |
| + |
| + //for other node who has same fragment |
| + cur[node]['source'].forEach(function (indexLabel) { |
| + //delete the fragment reference in the label |
| + delete rebuildArray[indexLabel][fragmentName]; |
| + //update the label length in the trie |
| + var labelLength = labelLengthArray[indexLabel]; |
| + label_trie.delete(labelLength, indexLabel); |
| + labelLength -= fragmentName.length; |
| + label_trie.insert(labelLength, indexLabel); |
| + }); |
| + } |
| + } else { |
| + removeTrie(word.slice(1), cur[node], array, labelLengthArray, rebuildArray, label_trie); |
| + } |
| + } |
| + |
| + } |
| + } |
| +} |
| + |
| +/** |
| + * Helper function to remove the repeat elements in an array |
| + * @param {Array} array |
| + * @returns {Array} the result array without duplicate elments |
| + */ |
| +function uniqueArray(array) { |
| + var a = array.concat(); |
| + for (var i = 0; i < a.length; ++i) { |
| + for (var j = i + 1; j < a.length; ++j) { |
| + if (a[i] === a[j]) |
| + a.splice(j--, 1); |
| + } |
| + } |
| + return a; |
| +} |
| + |
| +window.Substrings = SuffixTrie; |
| + |
| +})(); |