Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(2489)

Unified Diff: appengine/swarming/elements/third_party/common-strings.js

Issue 2381853003: Add bot-page summary with utilization stats (Closed) Base URL: git@github.com:luci/luci-py@page-everywhere
Patch Set: rebase Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « appengine/swarming/elements/third_party/README.md ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: appengine/swarming/elements/third_party/common-strings.js
diff --git a/appengine/swarming/elements/third_party/common-strings.js b/appengine/swarming/elements/third_party/common-strings.js
new file mode 100644
index 0000000000000000000000000000000000000000..072e25c97de23c097cdd47adbe4fb4cabb4a4c30
--- /dev/null
+++ b/appengine/swarming/elements/third_party/common-strings.js
@@ -0,0 +1,1126 @@
+(function(){
jcgregorio 2016/10/03 15:15:27 What is the license for these libraries? Why is t
+
+var customUtils = {};
+/**
+ * Return an array with the numbers from 0 to n-1, in a random order
+ */
+function getRandomArray (n) {
+ var res, next;
+
+ if (n === 0) { return []; }
+ if (n === 1) { return [0]; }
+
+ res = getRandomArray(n - 1);
+ next = Math.floor(Math.random() * n);
+ res.splice(next, 0, n - 1); // Add n-1 at a random position in the array
+
+ return res;
+};
+customUtils.getRandomArray = getRandomArray;
+
+
+/*
+ * Default compareKeys function will work for numbers, strings and dates
+ */
+function defaultCompareKeysFunction (a, b) {
+ if (a < b) { return -1; }
+ if (a > b) { return 1; }
+ if (a === b) { return 0; }
+
+ throw { message: "Couldn't compare elements", a: a, b: b };
+}
+customUtils.defaultCompareKeysFunction = defaultCompareKeysFunction;
+
+
+/**
+ * Check whether two values are equal (used in non-unique deletion)
+ */
+function defaultCheckValueEquality (a, b) {
+ return a === b;
+}
+customUtils.defaultCheckValueEquality = defaultCheckValueEquality;
+
+/**
+ * Simple binary search tree
+ */
+
+
+/**
+ * Constructor
+ * @param {Object} options Optional
+ * @param {Boolean} options.unique Whether to enforce a 'unique' constraint on the key or not
+ * @param {Key} options.key Initialize this BST's key with key
+ * @param {Value} options.value Initialize this BST's data with [value]
+ * @param {Function} options.compareKeys Initialize this BST's compareKeys
+ */
+function BinarySearchTree (options) {
+ options = options || {};
+
+ this.left = null;
+ this.right = null;
+ this.parent = options.parent !== undefined ? options.parent : null;
+ if (options.hasOwnProperty('key')) { this.key = options.key; }
+ this.data = options.hasOwnProperty('value') ? [options.value] : [];
+ this.unique = options.unique || false;
+
+ this.compareKeys = options.compareKeys || customUtils.defaultCompareKeysFunction;
+ this.checkValueEquality = options.checkValueEquality || customUtils.defaultCheckValueEquality;
+}
+
+
+// ================================
+// Methods used to test the tree
+// ================================
+
+
+/**
+ * Get the descendant with max key
+ */
+BinarySearchTree.prototype.getMaxKeyDescendant = function () {
+ if (this.right) {
+ return this.right.getMaxKeyDescendant();
+ } else {
+ return this;
+ }
+};
+
+
+/**
+ * Get the maximum key
+ */
+BinarySearchTree.prototype.getMaxKey = function () {
+ return this.getMaxKeyDescendant().key;
+};
+
+
+/**
+ * Get the descendant with min key
+ */
+BinarySearchTree.prototype.getMinKeyDescendant = function () {
+ if (this.left) {
+ return this.left.getMinKeyDescendant()
+ } else {
+ return this;
+ }
+};
+
+
+/**
+ * Get the minimum key
+ */
+BinarySearchTree.prototype.getMinKey = function () {
+ return this.getMinKeyDescendant().key;
+};
+
+
+/**
+ * Check that all nodes (incl. leaves) fullfil condition given by fn
+ * test is a function passed every (key, data) and which throws if the condition is not met
+ */
+BinarySearchTree.prototype.checkAllNodesFullfillCondition = function (test) {
+ if (!this.hasOwnProperty('key')) { return; }
+
+ test(this.key, this.data);
+ if (this.left) { this.left.checkAllNodesFullfillCondition(test); }
+ if (this.right) { this.right.checkAllNodesFullfillCondition(test); }
+};
+
+
+/**
+ * Check that the core BST properties on node ordering are verified
+ * Throw if they aren't
+ */
+BinarySearchTree.prototype.checkNodeOrdering = function () {
+ var self = this;
+
+ if (!this.hasOwnProperty('key')) { return; }
+
+ if (this.left) {
+ this.left.checkAllNodesFullfillCondition(function (k) {
+ if (self.compareKeys(k, self.key) >= 0) {
+ throw 'Tree with root ' + self.key + ' is not a binary search tree';
+ }
+ });
+ this.left.checkNodeOrdering();
+ }
+
+ if (this.right) {
+ this.right.checkAllNodesFullfillCondition(function (k) {
+ if (self.compareKeys(k, self.key) <= 0) {
+ throw 'Tree with root ' + self.key + ' is not a binary search tree';
+ }
+ });
+ this.right.checkNodeOrdering();
+ }
+};
+
+
+/**
+ * Check that all pointers are coherent in this tree
+ */
+BinarySearchTree.prototype.checkInternalPointers = function () {
+ if (this.left) {
+ if (this.left.parent !== this) { throw 'Parent pointer broken for key ' + this.key; }
+ this.left.checkInternalPointers();
+ }
+
+ if (this.right) {
+ if (this.right.parent !== this) { throw 'Parent pointer broken for key ' + this.key; }
+ this.right.checkInternalPointers();
+ }
+};
+
+
+/**
+ * Check that a tree is a BST as defined here (node ordering and pointer references)
+ */
+BinarySearchTree.prototype.checkIsBST = function () {
+ this.checkNodeOrdering();
+ this.checkInternalPointers();
+ if (this.parent) { throw "The root shouldn't have a parent"; }
+};
+
+
+/**
+ * Get number of keys inserted
+ */
+BinarySearchTree.prototype.getNumberOfKeys = function () {
+ var res;
+
+ if (!this.hasOwnProperty('key')) { return 0; }
+
+ res = 1;
+ if (this.left) { res += this.left.getNumberOfKeys(); }
+ if (this.right) { res += this.right.getNumberOfKeys(); }
+
+ return res;
+};
+
+
+
+// ============================================
+// Methods used to actually work on the tree
+// ============================================
+
+/**
+ * Create a BST similar (i.e. same options except for key and value) to the current one
+ * Use the same constructor (i.e. BinarySearchTree, AVLTree etc)
+ * @param {Object} options see constructor
+ */
+BinarySearchTree.prototype.createSimilar = function (options) {
+ options = options || {};
+ options.unique = this.unique;
+ options.compareKeys = this.compareKeys;
+ options.checkValueEquality = this.checkValueEquality;
+
+ return new this.constructor(options);
+};
+
+
+/**
+ * Create the left child of this BST and return it
+ */
+BinarySearchTree.prototype.createLeftChild = function (options) {
+ var leftChild = this.createSimilar(options);
+ leftChild.parent = this;
+ this.left = leftChild;
+
+ return leftChild;
+};
+
+
+/**
+ * Create the right child of this BST and return it
+ */
+BinarySearchTree.prototype.createRightChild = function (options) {
+ var rightChild = this.createSimilar(options);
+ rightChild.parent = this;
+ this.right = rightChild;
+
+ return rightChild;
+};
+
+
+/**
+ * Insert a new element
+ */
+BinarySearchTree.prototype.insert = function (key, value) {
+ // Empty tree, insert as root
+ if (!this.hasOwnProperty('key')) {
+ this.key = key;
+ this.data.push(value);
+ return;
+ }
+
+ // Same key as root
+ if (this.compareKeys(this.key, key) === 0) {
+ if (this.unique) {
+ throw { message: "Can't insert key " + key + ", it violates the unique constraint"
+ , key: key
+ , errorType: 'uniqueViolated'
+ };
+ } else {
+ this.data.push(value);
+ }
+ return;
+ }
+
+ if (this.compareKeys(key, this.key) < 0) {
+ // Insert in left subtree
+ if (this.left) {
+ this.left.insert(key, value);
+ } else {
+ this.createLeftChild({ key: key, value: value });
+ }
+ } else {
+ // Insert in right subtree
+ if (this.right) {
+ this.right.insert(key, value);
+ } else {
+ this.createRightChild({ key: key, value: value });
+ }
+ }
+};
+
+
+/**
+ * Search for all data corresponding to a key
+ */
+BinarySearchTree.prototype.search = function (key) {
+ if (!this.hasOwnProperty('key')) { return []; }
+
+ if (this.compareKeys(this.key, key) === 0) { return this.data; }
+
+ if (this.compareKeys(key, this.key) < 0) {
+ if (this.left) {
+ return this.left.search(key);
+ } else {
+ return [];
+ }
+ } else {
+ if (this.right) {
+ return this.right.search(key);
+ } else {
+ return [];
+ }
+ }
+};
+
+
+/**
+ * Return a function that tells whether a given key matches a lower bound
+ */
+BinarySearchTree.prototype.getLowerBoundMatcher = function (query) {
+ var self = this;
+
+ // No lower bound
+ if (!query.hasOwnProperty('$gt') && !query.hasOwnProperty('$gte')) {
+ return function () { return true; };
+ }
+
+ if (query.hasOwnProperty('$gt') && query.hasOwnProperty('$gte')) {
+ if (self.compareKeys(query.$gte, query.$gt) === 0) {
+ return function (key) { return self.compareKeys(key, query.$gt) > 0; };
+ }
+
+ if (self.compareKeys(query.$gte, query.$gt) > 0) {
+ return function (key) { return self.compareKeys(key, query.$gte) >= 0; };
+ } else {
+ return function (key) { return self.compareKeys(key, query.$gt) > 0; };
+ }
+ }
+
+ if (query.hasOwnProperty('$gt')) {
+ return function (key) { return self.compareKeys(key, query.$gt) > 0; };
+ } else {
+ return function (key) { return self.compareKeys(key, query.$gte) >= 0; };
+ }
+};
+
+
+/**
+ * Return a function that tells whether a given key matches an upper bound
+ */
+BinarySearchTree.prototype.getUpperBoundMatcher = function (query) {
+ var self = this;
+
+ // No lower bound
+ if (!query.hasOwnProperty('$lt') && !query.hasOwnProperty('$lte')) {
+ return function () { return true; };
+ }
+
+ if (query.hasOwnProperty('$lt') && query.hasOwnProperty('$lte')) {
+ if (self.compareKeys(query.$lte, query.$lt) === 0) {
+ return function (key) { return self.compareKeys(key, query.$lt) < 0; };
+ }
+
+ if (self.compareKeys(query.$lte, query.$lt) < 0) {
+ return function (key) { return self.compareKeys(key, query.$lte) <= 0; };
+ } else {
+ return function (key) { return self.compareKeys(key, query.$lt) < 0; };
+ }
+ }
+
+ if (query.hasOwnProperty('$lt')) {
+ return function (key) { return self.compareKeys(key, query.$lt) < 0; };
+ } else {
+ return function (key) { return self.compareKeys(key, query.$lte) <= 0; };
+ }
+};
+
+
+// Append all elements in toAppend to array
+function append (array, toAppend) {
+ var i;
+
+ for (i = 0; i < toAppend.length; i += 1) {
+ array.push(toAppend[i]);
+ }
+}
+
+
+/**
+ * Get all data for a key between bounds
+ * Return it in key order
+ * @param {Object} query Mongo-style query where keys are $lt, $lte, $gt or $gte (other keys are not considered)
+ * @param {Functions} lbm/ubm matching functions calculated at the first recursive step
+ */
+BinarySearchTree.prototype.betweenBounds = function (query, lbm, ubm) {
+ var res = [];
+
+ if (!this.hasOwnProperty('key')) { return []; } // Empty tree
+
+ lbm = lbm || this.getLowerBoundMatcher(query);
+ ubm = ubm || this.getUpperBoundMatcher(query);
+
+ if (lbm(this.key) && this.left) { append(res, this.left.betweenBounds(query, lbm, ubm)); }
+ if (lbm(this.key) && ubm(this.key)) { append(res, this.data); }
+ if (ubm(this.key) && this.right) { append(res, this.right.betweenBounds(query, lbm, ubm)); }
+
+ return res;
+};
+
+
+/**
+ * Delete the current node if it is a leaf
+ * Return true if it was deleted
+ */
+BinarySearchTree.prototype.deleteIfLeaf = function () {
+ if (this.left || this.right) { return false; }
+
+ // The leaf is itself a root
+ if (!this.parent) {
+ delete this.key;
+ this.data = [];
+ return true;
+ }
+
+ if (this.parent.left === this) {
+ this.parent.left = null;
+ } else {
+ this.parent.right = null;
+ }
+
+ return true;
+};
+
+
+/**
+ * Delete the current node if it has only one child
+ * Return true if it was deleted
+ */
+BinarySearchTree.prototype.deleteIfOnlyOneChild = function () {
+ var child;
+
+ if (this.left && !this.right) { child = this.left; }
+ if (!this.left && this.right) { child = this.right; }
+ if (!child) { return false; }
+
+ // Root
+ if (!this.parent) {
+ this.key = child.key;
+ this.data = child.data;
+
+ this.left = null;
+ if (child.left) {
+ this.left = child.left;
+ child.left.parent = this;
+ }
+
+ this.right = null;
+ if (child.right) {
+ this.right = child.right;
+ child.right.parent = this;
+ }
+
+ return true;
+ }
+
+ if (this.parent.left === this) {
+ this.parent.left = child;
+ child.parent = this.parent;
+ } else {
+ this.parent.right = child;
+ child.parent = this.parent;
+ }
+
+ return true;
+};
+
+
+/**
+ * Delete a key or just a value
+ * @param {Key} key
+ * @param {Value} value Optional. If not set, the whole key is deleted. If set, only this value is deleted
+ */
+BinarySearchTree.prototype.delete = function (key, value) {
+ var newData = [], replaceWith
+ , self = this
+ ;
+
+ if (!this.hasOwnProperty('key')) { return; }
+
+ if (this.compareKeys(key, this.key) < 0) {
+ if (this.left) { this.left.delete(key, value); }
+ return;
+ }
+
+ if (this.compareKeys(key, this.key) > 0) {
+ if (this.right) { this.right.delete(key, value); }
+ return;
+ }
+
+ if (!this.compareKeys(key, this.key) === 0) { return; }
+
+ // Delete only a value
+ if (this.data.length > 1 && value !== undefined) {
+ this.data.forEach(function (d) {
+ if (!self.checkValueEquality(d, value)) { newData.push(d); }
+ });
+ self.data = newData;
+ return;
+ }
+
+ // Delete the whole node
+ if (this.deleteIfLeaf()) {
+ return;
+ }
+ if (this.deleteIfOnlyOneChild()) {
+ return;
+ }
+
+ // We are in the case where the node to delete has two children
+ if (Math.random() >= 0.5) { // Randomize replacement to avoid unbalancing the tree too much
+ // Use the in-order predecessor
+ replaceWith = this.left.getMaxKeyDescendant();
+
+ this.key = replaceWith.key;
+ this.data = replaceWith.data;
+
+ if (this === replaceWith.parent) { // Special case
+ this.left = replaceWith.left;
+ if (replaceWith.left) { replaceWith.left.parent = replaceWith.parent; }
+ } else {
+ replaceWith.parent.right = replaceWith.left;
+ if (replaceWith.left) { replaceWith.left.parent = replaceWith.parent; }
+ }
+ } else {
+ // Use the in-order successor
+ replaceWith = this.right.getMinKeyDescendant();
+
+ this.key = replaceWith.key;
+ this.data = replaceWith.data;
+
+ if (this === replaceWith.parent) { // Special case
+ this.right = replaceWith.right;
+ if (replaceWith.right) { replaceWith.right.parent = replaceWith.parent; }
+ } else {
+ replaceWith.parent.left = replaceWith.right;
+ if (replaceWith.right) { replaceWith.right.parent = replaceWith.parent; }
+ }
+ }
+};
+
+
+/**
+ * Execute a function on every node of the tree, in key order
+ * @param {Function} fn Signature: node. Most useful will probably be node.key and node.data
+ */
+BinarySearchTree.prototype.executeOnEveryNode = function (fn) {
+ if (this.left) { this.left.executeOnEveryNode(fn); }
+ fn(this);
+ if (this.right) { this.right.executeOnEveryNode(fn); }
+};
+
+
+/**
+ * Pretty print a tree
+ * @param {Boolean} printData To print the nodes' data along with the key
+ */
+BinarySearchTree.prototype.prettyPrint = function (printData, spacing) {
+ spacing = spacing || "";
+
+ console.log(spacing + "* " + this.key);
+ if (printData) { console.log(spacing + "* " + this.data); }
+
+ if (!this.left && !this.right) { return; }
+
+ if (this.left) {
+ this.left.prettyPrint(printData, spacing + " ");
+ } else {
+ console.log(spacing + " *");
+ }
+ if (this.right) {
+ this.right.prettyPrint(printData, spacing + " ");
+ } else {
+ console.log(spacing + " *");
+ }
+};
+
+/**
+* Created by Hanwen on 01.07.2014.
+*/
+
+/**
+ * Constructor
+ * @param {Object} [options] config map
+ * @param {Number} options.minLength the shortest length of the fragment.
+ * @param {Number} options.minOccurrence the minimum occurrence of the fragment.
+ * @param {Boolean} options.debug whether show the console message and set the minLength and minOccurrence.
+ * @constructor
+ */
+function SuffixTrie(options) {
+ this.options = options || {};
+
+ this.structure = {};
+ this.debug = this.options.debug === undefined ? false : options.debug;
+
+
+ this.minLENGTH = this.options.minLength === undefined ? 3 : options.minLength;
+ this.minOccurrence = this.options.minOccurrence === undefined ? 2 : options.minOccurrence;
+ this.isByLength = this.options.byLength === undefined ? false : options.byLength;
+
+ //this.options.limit is a number
+ this.save = [];
+ this.array = null;
+ this.labelArray = null;
+ this.fragmentsArray = null;
+ this.fragmentTrie = {};
+ this.rebuildArray;
+ //default test environment
+
+}
+
+//help function to clear the trie
+SuffixTrie.prototype.refresh = function(){
+ this.structure = {};
+ this.save = [];
+ this.array = null;
+ this.labelArray = null;
+ this.fragmentsArray = null;
+ this.fragmentTrie = {};
+ this.rebuildArray;
+};
+
+
+//help function for quick weight
+SuffixTrie.prototype.weigh = function(){
+ if(this.isByLength){
+ return this.weighByMax()
+ }else{
+ return this.weighByAverage()
+ }
+}
+
+// ======================================================================================================================
+// ==================================================== Public Functions ================================================
+// ======================================================================================================================
+/**
+ * add fragment string to the trie
+ * @param array {Array} adding word
+ */
+SuffixTrie.prototype.build = function (array) {
+ var debug = this.debug;
+ this.buildLabelTrie(array);
+ this.optimize(this.structure);
+ if(debug) console.log('after optimization our label trie of array length ' + this.array.length + ' is ', this.structure);
+
+ this.listLabel();
+ if(debug) console.log('get the compressed label array (without duplicate fragments ) of array length ' + this.array.length + ' is ', this.labelArray);
+ if(debug) console.log('and fragments array of length ' + this.fragmentsArray.length + ' is ', this.fragmentsArray);
+
+ this.clearRedundantFragment();
+ if(debug) console.log('get the cleared label array (without duplicate fragments ) of array length ' + this.array.length + ' is ', this.labelArray);
+ if(debug) console.log('and cleared fragments array of length ' + this.fragmentsArray.length + ' is ', this.fragmentsArray);
+
+ this.rebuild();
+ if(debug) console.log('rebuild ended' , this.rebuildArray);
+};
+
+SuffixTrie.prototype.buildLabelTrie = function (array) {
+ this.array = array;
+ var root = this.structure;
+ var LENGTH = this.minLENGTH;
+
+ array.forEach(function (word, index) {
+ var first = true;
+ //used to store the loop element, for connect the suffixes, e.g. 'foob' next is 'oob', by this connection, we get all the substrings.
+ var last = {};
+
+ //loop every suffix in one word, e.g. 'oss' in 'boss'
+ for (var i = 0, l = word.length; i <= l - LENGTH; i++) {
+ var cur = root;
+ var suffix = word.substring(i);
+ var letters = suffix.split("");
+
+ //loop every letter in the suffix, e.g. 'o' in 'oss'
+ for (var j = 0; j < letters.length; j++) {
+ var letter = letters[j], pos = cur[ letter ];
+
+ if (j === letters.length - 1) {
+ if (pos == null) {
+ //create new node and add the information.
+ cur[letter] = {source: [index], listed: false};
+ } else if (pos.hasOwnProperty('source')) {
+ //just add occurrence
+ pos["source"].push(index);
+ } else {
+ //node already existed, add information.
+ cur[letter]['source'] = [index];
+ cur[letter]['listed'] = false;
+ }
+ cur = cur[letter];
+
+ if (!first) {
+ last['next'] = suffix;
+ } else {
+ first = false;
+ }
+
+ last = cur;
+ } else if (pos == null) {
+ //create node
+ cur = cur[letter] = {};
+ } else {
+ //no creation, loop to next node
+ cur = cur[ letter ];
+ }
+ }
+ }
+ });
+
+};
+
+/**
+ * add the origin for the split node and trunk node && integrate prefix in each branch
+ * @param {Object} root the start node
+ * @param {Number} [rootLevel] the start level of the algorithm, should be -1
+ * @returns {Array} array with the origin of the word
+ *
+ */
+SuffixTrie.prototype.optimize = function (root, rootLevel) {
+ var occurrence = this.minOccurrence;
+ //self origin indexes
+ var self_save = [];
+ rootLevel = rootLevel === undefined ? -1 : rootLevel;
+ rootLevel++;
+ //whether the word is long enough. ignored the short part.
+ var is_allowed = rootLevel >= this.minLENGTH;
+
+ for (var child in root) {
+
+ if (root.hasOwnProperty(child)) { //&& child !== 'next' && child !== 'level'
+ //loop to new child and combine the index information
+ if (child.length === 1) {
+
+ // returned indexes from children (iterate from the leaf)
+ var children_save = this.optimize(root[child], rootLevel);
+ if (is_allowed) {
+ self_save = self_save.concat(children_save);
+ }
+ } else if (child === 'source' && is_allowed) {
+ self_save = self_save.concat(root['source']);
+ }
+ }
+ }
+
+ // this part is to test if the fragment fulfil the occurrence requirement, delete if not, which reduce time significantly.
+ self_save = uniqueArray(self_save);
+ var isEnoughOccurred = self_save.length >= occurrence;
+ is_allowed = is_allowed && isEnoughOccurred;
+
+ if (is_allowed) {
+ //leaf will at least has property of 'listed' and 'next'
+ var is_SoleNode = Object.keys(root).length === 1;
+ if (!is_SoleNode) {
+ //this is a split node or a flaged node, add information include source here
+ root['source'] = self_save; //update it
+ root['level'] = rootLevel;
+ root['listed'] = false; //indicate this node should be check
+ root['weight'] = self_save.length * rootLevel;
+ self_save = []; //clear the array: this is important if we don't want the node with former indexes!
+ } else {
+ //this is just a sole node with one child, do nothing
+ }
+ }
+ else {
+ //it is lower than the request level , or it is a leaf node, so not calculate here
+ delete root['source'];
+ }
+
+ return self_save;
+};
+
+/**
+ * list the repeat part with certain order && integrate suffix in 'next' branch
+ * @param {Number} [start] start point in the array [0 - array.length]
+ */
+SuffixTrie.prototype.listLabel = function (start) {
+ var array = this.array;
+ var root = this.structure;
+ var label_array = [];
+ var fragments_array = [];
+ var length = this.minLENGTH;
+ var occurrence = this.minOccurrence;
+ start = start === undefined ? 1 : start;
+
+ //loop from the certain index, lead to different rebuild array.
+ for (var index = start - 1, i = 0; i < array.length; index++, index = index % (array.length), i++) {
+ var word = array[index];
+
+ var fragments = {};
+ //skip the short word, just push the empty object.
+ if (word.length < length) {
+ label_array.push(fragments);
+ continue;
+ }
+
+ findFragments(word, fragments, root, length, occurrence, false);
+
+ //accumulate the fragment to another array.
+ for (var fragment in fragments) {
+ if (fragments.hasOwnProperty(fragment)) {
+ //get all the origin label index and push them into fragment array with default order
+ var fragmentsArrayIndex = fragments_array.push(fragments[fragment]) - 1;
+ fragments_array[fragmentsArrayIndex]['name'] = fragment;
+ }
+ }
+
+ //build another array to map each label and all of its fragments.
+ label_array.push(fragments);
+ }
+ this.labelArray = label_array;
+ this.fragmentsArray = fragments_array;
+ return label_array;
+};
+
+/**
+ * rebuild the label array, make sure every label has all of its own fragments
+ */
+SuffixTrie.prototype.rebuild = function () {
+ var rebuildArray = JSON.parse(JSON.stringify(this.labelArray));//deep copy array
+ rebuildArray.forEach(function (object, index) {
+ for (var fragment in object) {
+ if (object.hasOwnProperty(fragment)) {
+ object[fragment]['source'].forEach(function (labelIndex) {
+ if (labelIndex > index) {
+ rebuildArray[labelIndex][fragment] = object[fragment];
+ }
+ });
+ }
+ }
+ });
+ this.rebuildArray = rebuildArray;
+};
+
+/**
+ * weight the fragments and order them by the product of their occurrence and length,
+ * should use this function after the build and rebuild.
+ */
+SuffixTrie.prototype.weighByAverage = function () {
+ var debug = this.debug;
+ var fragmentsArray = this.fragmentsArray;
+ var fragments_Num = this.fragmentsArray.length;
+ fragmentsArray.sort(function (f1, f2) {
+ return f2['weight'] - f1['weight'];
+ });
+ if (debug) console.log('weigh by average: result of length ' + fragments_Num + ' is', fragmentsArray);
+
+ if(typeof this.options.limit == "number")
+ fragments_Num = this.options.limit < fragments_Num ? this.options.limit : fragments_Num;
+
+ return fragmentsArray.slice(0, fragments_Num);
+};
+
+/**
+ * weight the fragments and order them by max label length.
+ */
+SuffixTrie.prototype.weighByMax = function () {
+ var debug = this.debug;
+
+ buildFragmentTrie(this.fragmentTrie, this.fragmentsArray);
+
+ var rebuildArray = JSON.parse(JSON.stringify(this.rebuildArray));
+// var rebuildArray = this.rebuildArray;
+ var fragmentsArray = [];
+ var fragmentsTrie = this.fragmentTrie;
+ var fragments_Num = this.fragmentsArray.length;
+ var label_Lengths = this.array.map(function (s) {
+ return s.length;
+ });
+
+ var label_Tree = new BinarySearchTree();
+ label_Lengths.forEach(function (length, index) {
+ label_Tree.insert(length, index);
+ });
+
+ if (typeof this.options.limit == "number")
+ fragments_Num = this.options.limit < fragments_Num ? this.options.limit : fragments_Num;
+
+ while (fragmentsArray.length < fragments_Num) {
+ var maxKey = label_Tree.getMaxKey();
+
+ var labels_In_Key = label_Tree.search(maxKey);
+
+ if (labels_In_Key.length === 0) {
+ label_Tree.delete(maxKey);
+ continue;
+ }
+ //choose a random labels index with the biggest length
+ var longest_Label = labels_In_Key[0];
+
+ //get all the fragments in this label
+ //in case the there is no fragments under it.
+ var fragments = Object.keys(rebuildArray[longest_Label]);
+ if (fragments.length > 0) {
+ //then find the longest fragment
+ fragments.sort(function (f1, f2) {
+ return f2.length - f1.length;
+ });
+ removeTrie(fragments[0], fragmentsTrie, fragmentsArray, label_Lengths, rebuildArray, label_Tree);
+ }
+ //in case there is no this label in fragments array -- which could be possible
+ label_Tree.delete(maxKey, longest_Label);
+ }
+
+ this.fragmentsArray = fragmentsArray;
+ if (debug) console.log('weigh by max: result of length ' + fragmentsArray.length + ' is', fragmentsArray);
+
+ return fragmentsArray.slice(0, fragments_Num);
+};
+
+/**
+ * delete the redundant fragment, always keep the longer one.
+ */
+SuffixTrie.prototype.clearRedundantFragment = function () {
+ var fragments = this.fragmentsArray;
+ var occurrence = this.minOccurrence;
+ var newFragmentArray = [];
+ var labelArray = this.labelArray;
+ //sort the fragments array from short to long.
+ fragments.sort(function (a, b) {
+ return a.name.length - b.name.length;
+ });
+
+ fragments.forEach(function (fragment, index) {
+ //backup fragment's occurrence.
+ var backupsource = fragment.source.slice();
+ //check if the longer contain duplicated occurrence, filter such occurrence
+ for (var i = index + 1; i < fragments.length; i++) {
+ var longerFragment = fragments[i];
+ if (longerFragment.name.indexOf(fragment.name) !== -1) {
+ fragment.source = fragment.source.filter(function (i) {
+ return this.indexOf(i) === -1;
+ }, longerFragment.source);
+ }
+ }
+
+ //build the new fragment array with no duplication
+ if (fragment.source.length >= occurrence) {
+ newFragmentArray.push(fragment);
+ }
+ else {
+ if (backupsource.length !== fragment.source.length) {
+ deleteFragmentInLabelArray(backupsource, fragment.name, labelArray);
+ }
+ }
+ });
+ this.fragmentsArray = newFragmentArray;
+};
+
+// ======================================================================================================================
+// ==================================================== Helper Functions ================================================
+// ======================================================================================================================
+
+/**
+ * Most Important Helper function
+ * help function to find the all the fragment of one word from the root of the tree, and finally link to another suffix
+ * @param word searching objects
+ * @param fragments an object to store the sources of the common fragments in the path to find the word
+ * @param root start point
+ * @param length minimum length requirement
+ * @param occurrence minimum occurrence requirement
+ * @param [iterate] {Boolean} whether it is the complete word or suffix
+ */
+function findFragments(word, fragments, root, length, occurrence, iterate) {
+
+ var cur = root;
+ var letters = word.split("");
+ //loop every letter in the suffix, e.g. 'o' in 'oss'
+ for (var j = 0; j < letters.length; j++) {
+ var letter = letters[j], pos = cur[ letter ];
+ if (j + 1 >= length) {
+ var fragment = word.substring(0, j + 1);
+
+ if (pos.hasOwnProperty('listed')) {
+ if (!pos['listed']) {
+ //if it is a node fulfil our requirement
+ if (pos.hasOwnProperty('source')) {
+ fragments[fragment] = {};
+ fragments[fragment]['source'] = pos.source;
+ fragments[fragment]['weight'] = pos['weight'];
+
+ //debug whether we include all the situation.
+ if (pos['weight'] == null) {
+ console.warn('escape case! fragment does not has weight property, node is ', fragments[fragment]);
+ }
+ if (fragments.hasOwnProperty(fragment)) {
+ //if this is not the full word (but a suffix) we check the duplication
+ if (iterate) {
+ for (var property in fragments) {
+ if (property !== fragment && fragments.hasOwnProperty(property) && fragments.hasOwnProperty(fragment)) {
+ if (property.indexOf(fragment) !== -1) {
+ fragments[fragment]['source'] = fragments[fragment]['source'].filter(function (i) {
+ return this.indexOf(i) < 0
+ }, fragments[property].source);
+ }
+ }
+ }
+
+ if (fragments[fragment]['source'].length < occurrence) {
+ delete fragments[fragment];
+ }
+ }
+ }
+ pos['listed'] = true;
+ }
+ //iterate to its suffix from root
+ if (pos.hasOwnProperty('next')) {
+ findFragments(pos['next'], fragments, root, length, occurrence, true);
+ }
+ }
+ }
+ }
+ cur = pos;
+ }
+}
+
+/**
+ * A helper function to delete the reference in Label Array
+ * @param {Array} source fragment.source
+ * @param {String} name fragment.name
+ * @param labelArray LabelArray
+ */
+function deleteFragmentInLabelArray(source, name, labelArray) {
+ var label = labelArray[source[0]];
+ if (label.hasOwnProperty(name)) {
+ delete label[name];
+ }
+}
+
+/**
+ * Helper function for build a dictionary trie for searching the fragment.
+ * @param trie the fragment trie will be build in this variable.
+ * @param array the fragment array used for building trie.
+ */
+function buildFragmentTrie(trie, array) {
+ for (var i = 0; i < array.length; i++) {
+
+ var word = array[i]['name'], letters = word.split(""), cur = trie;
+
+ // Loop through the letters
+ for (var j = 0; j < letters.length; j++) {
+ var letter = letters[j];
+
+ if (!cur.hasOwnProperty(letter)) {
+ cur[ letter ] = {};
+ }
+ cur = cur[letter];
+ if (j === letters.length - 1) {
+ cur['source'] = array[i]['source'];
+ cur['name'] = array[i]['name'];
+ cur['weight'] = array[i]['weight'];
+ cur['list'] = false;
+ }
+
+ }
+ }
+}
+
+/**
+ * Helper function to remove the fragment from the trie and list it in the result array (fragmentsArray).
+ * @param word the word to be searched for.
+ * @param cur the node where the search start.
+ * @param array the array to store the result fragment
+ * @param labelLengthArray the array map to the length of the label
+ * @param rebuildArray this.rebuildArray
+ * @param label_trie the binary search trie where store all the labels with their length.
+ * @returns {Number|Boolean} if find return the length of fragment, else return false.
+ */
+function removeTrie(word, cur, array, labelLengthArray, rebuildArray, label_trie) {
+ for (var node in cur) {
+
+ if (cur.hasOwnProperty(node) && node.length === 1) {
+
+ if (word.indexOf(node) === 0) {
+ //if this is the leaf of the trie
+ if (word.length === 1) {
+ if (cur[node].hasOwnProperty('list')) {
+ var fragment = {};
+ fragment['source'] = cur[node]['source'];
+ var fragmentName = cur[node]['name'];
+ fragment['name'] = fragmentName;
+ fragment['weight'] = cur[node]['weight'];
+ delete cur[node]['list'];
+ //store it into our result
+ array.push(fragment);
+
+ //for other node who has same fragment
+ cur[node]['source'].forEach(function (indexLabel) {
+ //delete the fragment reference in the label
+ delete rebuildArray[indexLabel][fragmentName];
+ //update the label length in the trie
+ var labelLength = labelLengthArray[indexLabel];
+ label_trie.delete(labelLength, indexLabel);
+ labelLength -= fragmentName.length;
+ label_trie.insert(labelLength, indexLabel);
+ });
+ }
+ } else {
+ removeTrie(word.slice(1), cur[node], array, labelLengthArray, rebuildArray, label_trie);
+ }
+ }
+
+ }
+ }
+}
+
+/**
+ * Helper function to remove the repeat elements in an array
+ * @param {Array} array
+ * @returns {Array} the result array without duplicate elments
+ */
+function uniqueArray(array) {
+ var a = array.concat();
+ for (var i = 0; i < a.length; ++i) {
+ for (var j = i + 1; j < a.length; ++j) {
+ if (a[i] === a[j])
+ a.splice(j--, 1);
+ }
+ }
+ return a;
+}
+
+window.Substrings = SuffixTrie;
+
+})();
« no previous file with comments | « appengine/swarming/elements/third_party/README.md ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698