Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(81)

Side by Side Diff: appengine/swarming/elements/third_party/common-strings.js

Issue 2381853003: Add bot-page summary with utilization stats (Closed) Base URL: git@github.com:luci/luci-py@page-everywhere
Patch Set: rebase Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « appengine/swarming/elements/third_party/README.md ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 (function(){
jcgregorio 2016/10/03 15:15:27 What is the license for these libraries? Why is t
2
3 var customUtils = {};
4 /**
5 * Return an array with the numbers from 0 to n-1, in a random order
6 */
7 function getRandomArray (n) {
8 var res, next;
9
10 if (n === 0) { return []; }
11 if (n === 1) { return [0]; }
12
13 res = getRandomArray(n - 1);
14 next = Math.floor(Math.random() * n);
15 res.splice(next, 0, n - 1); // Add n-1 at a random position in the array
16
17 return res;
18 };
19 customUtils.getRandomArray = getRandomArray;
20
21
22 /*
23 * Default compareKeys function will work for numbers, strings and dates
24 */
25 function defaultCompareKeysFunction (a, b) {
26 if (a < b) { return -1; }
27 if (a > b) { return 1; }
28 if (a === b) { return 0; }
29
30 throw { message: "Couldn't compare elements", a: a, b: b };
31 }
32 customUtils.defaultCompareKeysFunction = defaultCompareKeysFunction;
33
34
35 /**
36 * Check whether two values are equal (used in non-unique deletion)
37 */
38 function defaultCheckValueEquality (a, b) {
39 return a === b;
40 }
41 customUtils.defaultCheckValueEquality = defaultCheckValueEquality;
42
43 /**
44 * Simple binary search tree
45 */
46
47
48 /**
49 * Constructor
50 * @param {Object} options Optional
51 * @param {Boolean} options.unique Whether to enforce a 'unique' constraint on the key or not
52 * @param {Key} options.key Initialize this BST's key with key
53 * @param {Value} options.value Initialize this BST's data with [value]
54 * @param {Function} options.compareKeys Initialize this BST's compareKeys
55 */
56 function BinarySearchTree (options) {
57 options = options || {};
58
59 this.left = null;
60 this.right = null;
61 this.parent = options.parent !== undefined ? options.parent : null;
62 if (options.hasOwnProperty('key')) { this.key = options.key; }
63 this.data = options.hasOwnProperty('value') ? [options.value] : [];
64 this.unique = options.unique || false;
65
66 this.compareKeys = options.compareKeys || customUtils.defaultCompareKeysFuncti on;
67 this.checkValueEquality = options.checkValueEquality || customUtils.defaultChe ckValueEquality;
68 }
69
70
71 // ================================
72 // Methods used to test the tree
73 // ================================
74
75
76 /**
77 * Get the descendant with max key
78 */
79 BinarySearchTree.prototype.getMaxKeyDescendant = function () {
80 if (this.right) {
81 return this.right.getMaxKeyDescendant();
82 } else {
83 return this;
84 }
85 };
86
87
88 /**
89 * Get the maximum key
90 */
91 BinarySearchTree.prototype.getMaxKey = function () {
92 return this.getMaxKeyDescendant().key;
93 };
94
95
96 /**
97 * Get the descendant with min key
98 */
99 BinarySearchTree.prototype.getMinKeyDescendant = function () {
100 if (this.left) {
101 return this.left.getMinKeyDescendant()
102 } else {
103 return this;
104 }
105 };
106
107
108 /**
109 * Get the minimum key
110 */
111 BinarySearchTree.prototype.getMinKey = function () {
112 return this.getMinKeyDescendant().key;
113 };
114
115
116 /**
117 * Check that all nodes (incl. leaves) fullfil condition given by fn
118 * test is a function passed every (key, data) and which throws if the condition is not met
119 */
120 BinarySearchTree.prototype.checkAllNodesFullfillCondition = function (test) {
121 if (!this.hasOwnProperty('key')) { return; }
122
123 test(this.key, this.data);
124 if (this.left) { this.left.checkAllNodesFullfillCondition(test); }
125 if (this.right) { this.right.checkAllNodesFullfillCondition(test); }
126 };
127
128
129 /**
130 * Check that the core BST properties on node ordering are verified
131 * Throw if they aren't
132 */
133 BinarySearchTree.prototype.checkNodeOrdering = function () {
134 var self = this;
135
136 if (!this.hasOwnProperty('key')) { return; }
137
138 if (this.left) {
139 this.left.checkAllNodesFullfillCondition(function (k) {
140 if (self.compareKeys(k, self.key) >= 0) {
141 throw 'Tree with root ' + self.key + ' is not a binary search tree';
142 }
143 });
144 this.left.checkNodeOrdering();
145 }
146
147 if (this.right) {
148 this.right.checkAllNodesFullfillCondition(function (k) {
149 if (self.compareKeys(k, self.key) <= 0) {
150 throw 'Tree with root ' + self.key + ' is not a binary search tree';
151 }
152 });
153 this.right.checkNodeOrdering();
154 }
155 };
156
157
158 /**
159 * Check that all pointers are coherent in this tree
160 */
161 BinarySearchTree.prototype.checkInternalPointers = function () {
162 if (this.left) {
163 if (this.left.parent !== this) { throw 'Parent pointer broken for key ' + th is.key; }
164 this.left.checkInternalPointers();
165 }
166
167 if (this.right) {
168 if (this.right.parent !== this) { throw 'Parent pointer broken for key ' + t his.key; }
169 this.right.checkInternalPointers();
170 }
171 };
172
173
174 /**
175 * Check that a tree is a BST as defined here (node ordering and pointer referen ces)
176 */
177 BinarySearchTree.prototype.checkIsBST = function () {
178 this.checkNodeOrdering();
179 this.checkInternalPointers();
180 if (this.parent) { throw "The root shouldn't have a parent"; }
181 };
182
183
184 /**
185 * Get number of keys inserted
186 */
187 BinarySearchTree.prototype.getNumberOfKeys = function () {
188 var res;
189
190 if (!this.hasOwnProperty('key')) { return 0; }
191
192 res = 1;
193 if (this.left) { res += this.left.getNumberOfKeys(); }
194 if (this.right) { res += this.right.getNumberOfKeys(); }
195
196 return res;
197 };
198
199
200
201 // ============================================
202 // Methods used to actually work on the tree
203 // ============================================
204
205 /**
206 * Create a BST similar (i.e. same options except for key and value) to the curr ent one
207 * Use the same constructor (i.e. BinarySearchTree, AVLTree etc)
208 * @param {Object} options see constructor
209 */
210 BinarySearchTree.prototype.createSimilar = function (options) {
211 options = options || {};
212 options.unique = this.unique;
213 options.compareKeys = this.compareKeys;
214 options.checkValueEquality = this.checkValueEquality;
215
216 return new this.constructor(options);
217 };
218
219
220 /**
221 * Create the left child of this BST and return it
222 */
223 BinarySearchTree.prototype.createLeftChild = function (options) {
224 var leftChild = this.createSimilar(options);
225 leftChild.parent = this;
226 this.left = leftChild;
227
228 return leftChild;
229 };
230
231
232 /**
233 * Create the right child of this BST and return it
234 */
235 BinarySearchTree.prototype.createRightChild = function (options) {
236 var rightChild = this.createSimilar(options);
237 rightChild.parent = this;
238 this.right = rightChild;
239
240 return rightChild;
241 };
242
243
244 /**
245 * Insert a new element
246 */
247 BinarySearchTree.prototype.insert = function (key, value) {
248 // Empty tree, insert as root
249 if (!this.hasOwnProperty('key')) {
250 this.key = key;
251 this.data.push(value);
252 return;
253 }
254
255 // Same key as root
256 if (this.compareKeys(this.key, key) === 0) {
257 if (this.unique) {
258 throw { message: "Can't insert key " + key + ", it violates the unique con straint"
259 , key: key
260 , errorType: 'uniqueViolated'
261 };
262 } else {
263 this.data.push(value);
264 }
265 return;
266 }
267
268 if (this.compareKeys(key, this.key) < 0) {
269 // Insert in left subtree
270 if (this.left) {
271 this.left.insert(key, value);
272 } else {
273 this.createLeftChild({ key: key, value: value });
274 }
275 } else {
276 // Insert in right subtree
277 if (this.right) {
278 this.right.insert(key, value);
279 } else {
280 this.createRightChild({ key: key, value: value });
281 }
282 }
283 };
284
285
286 /**
287 * Search for all data corresponding to a key
288 */
289 BinarySearchTree.prototype.search = function (key) {
290 if (!this.hasOwnProperty('key')) { return []; }
291
292 if (this.compareKeys(this.key, key) === 0) { return this.data; }
293
294 if (this.compareKeys(key, this.key) < 0) {
295 if (this.left) {
296 return this.left.search(key);
297 } else {
298 return [];
299 }
300 } else {
301 if (this.right) {
302 return this.right.search(key);
303 } else {
304 return [];
305 }
306 }
307 };
308
309
310 /**
311 * Return a function that tells whether a given key matches a lower bound
312 */
313 BinarySearchTree.prototype.getLowerBoundMatcher = function (query) {
314 var self = this;
315
316 // No lower bound
317 if (!query.hasOwnProperty('$gt') && !query.hasOwnProperty('$gte')) {
318 return function () { return true; };
319 }
320
321 if (query.hasOwnProperty('$gt') && query.hasOwnProperty('$gte')) {
322 if (self.compareKeys(query.$gte, query.$gt) === 0) {
323 return function (key) { return self.compareKeys(key, query.$gt) > 0; };
324 }
325
326 if (self.compareKeys(query.$gte, query.$gt) > 0) {
327 return function (key) { return self.compareKeys(key, query.$gte) >= 0; };
328 } else {
329 return function (key) { return self.compareKeys(key, query.$gt) > 0; };
330 }
331 }
332
333 if (query.hasOwnProperty('$gt')) {
334 return function (key) { return self.compareKeys(key, query.$gt) > 0; };
335 } else {
336 return function (key) { return self.compareKeys(key, query.$gte) >= 0; };
337 }
338 };
339
340
341 /**
342 * Return a function that tells whether a given key matches an upper bound
343 */
344 BinarySearchTree.prototype.getUpperBoundMatcher = function (query) {
345 var self = this;
346
347 // No lower bound
348 if (!query.hasOwnProperty('$lt') && !query.hasOwnProperty('$lte')) {
349 return function () { return true; };
350 }
351
352 if (query.hasOwnProperty('$lt') && query.hasOwnProperty('$lte')) {
353 if (self.compareKeys(query.$lte, query.$lt) === 0) {
354 return function (key) { return self.compareKeys(key, query.$lt) < 0; };
355 }
356
357 if (self.compareKeys(query.$lte, query.$lt) < 0) {
358 return function (key) { return self.compareKeys(key, query.$lte) <= 0; };
359 } else {
360 return function (key) { return self.compareKeys(key, query.$lt) < 0; };
361 }
362 }
363
364 if (query.hasOwnProperty('$lt')) {
365 return function (key) { return self.compareKeys(key, query.$lt) < 0; };
366 } else {
367 return function (key) { return self.compareKeys(key, query.$lte) <= 0; };
368 }
369 };
370
371
372 // Append all elements in toAppend to array
373 function append (array, toAppend) {
374 var i;
375
376 for (i = 0; i < toAppend.length; i += 1) {
377 array.push(toAppend[i]);
378 }
379 }
380
381
382 /**
383 * Get all data for a key between bounds
384 * Return it in key order
385 * @param {Object} query Mongo-style query where keys are $lt, $lte, $gt or $gte (other keys are not considered)
386 * @param {Functions} lbm/ubm matching functions calculated at the first recursi ve step
387 */
388 BinarySearchTree.prototype.betweenBounds = function (query, lbm, ubm) {
389 var res = [];
390
391 if (!this.hasOwnProperty('key')) { return []; } // Empty tree
392
393 lbm = lbm || this.getLowerBoundMatcher(query);
394 ubm = ubm || this.getUpperBoundMatcher(query);
395
396 if (lbm(this.key) && this.left) { append(res, this.left.betweenBounds(query, l bm, ubm)); }
397 if (lbm(this.key) && ubm(this.key)) { append(res, this.data); }
398 if (ubm(this.key) && this.right) { append(res, this.right.betweenBounds(query, lbm, ubm)); }
399
400 return res;
401 };
402
403
404 /**
405 * Delete the current node if it is a leaf
406 * Return true if it was deleted
407 */
408 BinarySearchTree.prototype.deleteIfLeaf = function () {
409 if (this.left || this.right) { return false; }
410
411 // The leaf is itself a root
412 if (!this.parent) {
413 delete this.key;
414 this.data = [];
415 return true;
416 }
417
418 if (this.parent.left === this) {
419 this.parent.left = null;
420 } else {
421 this.parent.right = null;
422 }
423
424 return true;
425 };
426
427
428 /**
429 * Delete the current node if it has only one child
430 * Return true if it was deleted
431 */
432 BinarySearchTree.prototype.deleteIfOnlyOneChild = function () {
433 var child;
434
435 if (this.left && !this.right) { child = this.left; }
436 if (!this.left && this.right) { child = this.right; }
437 if (!child) { return false; }
438
439 // Root
440 if (!this.parent) {
441 this.key = child.key;
442 this.data = child.data;
443
444 this.left = null;
445 if (child.left) {
446 this.left = child.left;
447 child.left.parent = this;
448 }
449
450 this.right = null;
451 if (child.right) {
452 this.right = child.right;
453 child.right.parent = this;
454 }
455
456 return true;
457 }
458
459 if (this.parent.left === this) {
460 this.parent.left = child;
461 child.parent = this.parent;
462 } else {
463 this.parent.right = child;
464 child.parent = this.parent;
465 }
466
467 return true;
468 };
469
470
471 /**
472 * Delete a key or just a value
473 * @param {Key} key
474 * @param {Value} value Optional. If not set, the whole key is deleted. If set, only this value is deleted
475 */
476 BinarySearchTree.prototype.delete = function (key, value) {
477 var newData = [], replaceWith
478 , self = this
479 ;
480
481 if (!this.hasOwnProperty('key')) { return; }
482
483 if (this.compareKeys(key, this.key) < 0) {
484 if (this.left) { this.left.delete(key, value); }
485 return;
486 }
487
488 if (this.compareKeys(key, this.key) > 0) {
489 if (this.right) { this.right.delete(key, value); }
490 return;
491 }
492
493 if (!this.compareKeys(key, this.key) === 0) { return; }
494
495 // Delete only a value
496 if (this.data.length > 1 && value !== undefined) {
497 this.data.forEach(function (d) {
498 if (!self.checkValueEquality(d, value)) { newData.push(d); }
499 });
500 self.data = newData;
501 return;
502 }
503
504 // Delete the whole node
505 if (this.deleteIfLeaf()) {
506 return;
507 }
508 if (this.deleteIfOnlyOneChild()) {
509 return;
510 }
511
512 // We are in the case where the node to delete has two children
513 if (Math.random() >= 0.5) { // Randomize replacement to avoid unbalancing th e tree too much
514 // Use the in-order predecessor
515 replaceWith = this.left.getMaxKeyDescendant();
516
517 this.key = replaceWith.key;
518 this.data = replaceWith.data;
519
520 if (this === replaceWith.parent) { // Special case
521 this.left = replaceWith.left;
522 if (replaceWith.left) { replaceWith.left.parent = replaceWith.parent; }
523 } else {
524 replaceWith.parent.right = replaceWith.left;
525 if (replaceWith.left) { replaceWith.left.parent = replaceWith.parent; }
526 }
527 } else {
528 // Use the in-order successor
529 replaceWith = this.right.getMinKeyDescendant();
530
531 this.key = replaceWith.key;
532 this.data = replaceWith.data;
533
534 if (this === replaceWith.parent) { // Special case
535 this.right = replaceWith.right;
536 if (replaceWith.right) { replaceWith.right.parent = replaceWith.parent; }
537 } else {
538 replaceWith.parent.left = replaceWith.right;
539 if (replaceWith.right) { replaceWith.right.parent = replaceWith.parent; }
540 }
541 }
542 };
543
544
545 /**
546 * Execute a function on every node of the tree, in key order
547 * @param {Function} fn Signature: node. Most useful will probably be node.key a nd node.data
548 */
549 BinarySearchTree.prototype.executeOnEveryNode = function (fn) {
550 if (this.left) { this.left.executeOnEveryNode(fn); }
551 fn(this);
552 if (this.right) { this.right.executeOnEveryNode(fn); }
553 };
554
555
556 /**
557 * Pretty print a tree
558 * @param {Boolean} printData To print the nodes' data along with the key
559 */
560 BinarySearchTree.prototype.prettyPrint = function (printData, spacing) {
561 spacing = spacing || "";
562
563 console.log(spacing + "* " + this.key);
564 if (printData) { console.log(spacing + "* " + this.data); }
565
566 if (!this.left && !this.right) { return; }
567
568 if (this.left) {
569 this.left.prettyPrint(printData, spacing + " ");
570 } else {
571 console.log(spacing + " *");
572 }
573 if (this.right) {
574 this.right.prettyPrint(printData, spacing + " ");
575 } else {
576 console.log(spacing + " *");
577 }
578 };
579
580 /**
581 * Created by Hanwen on 01.07.2014.
582 */
583
584 /**
585 * Constructor
586 * @param {Object} [options] config map
587 * @param {Number} options.minLength the shortest length of the fragment.
588 * @param {Number} options.minOccurrence the minimum occurrence of the fragment .
589 * @param {Boolean} options.debug whether show the console message and set the minLength and minOccurrence.
590 * @constructor
591 */
592 function SuffixTrie(options) {
593 this.options = options || {};
594
595 this.structure = {};
596 this.debug = this.options.debug === undefined ? false : options.debug;
597
598
599 this.minLENGTH = this.options.minLength === undefined ? 3 : options.minLengt h;
600 this.minOccurrence = this.options.minOccurrence === undefined ? 2 : options. minOccurrence;
601 this.isByLength = this.options.byLength === undefined ? false : options.byLe ngth;
602
603 //this.options.limit is a number
604 this.save = [];
605 this.array = null;
606 this.labelArray = null;
607 this.fragmentsArray = null;
608 this.fragmentTrie = {};
609 this.rebuildArray;
610 //default test environment
611
612 }
613
614 //help function to clear the trie
615 SuffixTrie.prototype.refresh = function(){
616 this.structure = {};
617 this.save = [];
618 this.array = null;
619 this.labelArray = null;
620 this.fragmentsArray = null;
621 this.fragmentTrie = {};
622 this.rebuildArray;
623 };
624
625
626 //help function for quick weight
627 SuffixTrie.prototype.weigh = function(){
628 if(this.isByLength){
629 return this.weighByMax()
630 }else{
631 return this.weighByAverage()
632 }
633 }
634
635 // ========================================================================== ============================================
636 // ==================================================== Public Functions ==== ============================================
637 // ========================================================================== ============================================
638 /**
639 * add fragment string to the trie
640 * @param array {Array} adding word
641 */
642 SuffixTrie.prototype.build = function (array) {
643 var debug = this.debug;
644 this.buildLabelTrie(array);
645 this.optimize(this.structure);
646 if(debug) console.log('after optimization our label trie of array length ' + this.array.length + ' is ', this.structure);
647
648 this.listLabel();
649 if(debug) console.log('get the compressed label array (without duplicate fra gments ) of array length ' + this.array.length + ' is ', this.labelArray);
650 if(debug) console.log('and fragments array of length ' + this.fragmentsArray .length + ' is ', this.fragmentsArray);
651
652 this.clearRedundantFragment();
653 if(debug) console.log('get the cleared label array (without duplicate fragme nts ) of array length ' + this.array.length + ' is ', this.labelArray);
654 if(debug) console.log('and cleared fragments array of length ' + this.fragme ntsArray.length + ' is ', this.fragmentsArray);
655
656 this.rebuild();
657 if(debug) console.log('rebuild ended' , this.rebuildArray);
658 };
659
660 SuffixTrie.prototype.buildLabelTrie = function (array) {
661 this.array = array;
662 var root = this.structure;
663 var LENGTH = this.minLENGTH;
664
665 array.forEach(function (word, index) {
666 var first = true;
667 //used to store the loop element, for connect the suffixes, e.g. 'foob' next is 'oob', by this connection, we get all the substrings.
668 var last = {};
669
670 //loop every suffix in one word, e.g. 'oss' in 'boss'
671 for (var i = 0, l = word.length; i <= l - LENGTH; i++) {
672 var cur = root;
673 var suffix = word.substring(i);
674 var letters = suffix.split("");
675
676 //loop every letter in the suffix, e.g. 'o' in 'oss'
677 for (var j = 0; j < letters.length; j++) {
678 var letter = letters[j], pos = cur[ letter ];
679
680 if (j === letters.length - 1) {
681 if (pos == null) {
682 //create new node and add the information.
683 cur[letter] = {source: [index], listed: false};
684 } else if (pos.hasOwnProperty('source')) {
685 //just add occurrence
686 pos["source"].push(index);
687 } else {
688 //node already existed, add information.
689 cur[letter]['source'] = [index];
690 cur[letter]['listed'] = false;
691 }
692 cur = cur[letter];
693
694 if (!first) {
695 last['next'] = suffix;
696 } else {
697 first = false;
698 }
699
700 last = cur;
701 } else if (pos == null) {
702 //create node
703 cur = cur[letter] = {};
704 } else {
705 //no creation, loop to next node
706 cur = cur[ letter ];
707 }
708 }
709 }
710 });
711
712 };
713
714 /**
715 * add the origin for the split node and trunk node && integrate prefix in each branch
716 * @param {Object} root the start node
717 * @param {Number} [rootLevel] the start level of the algorithm, should be -1
718 * @returns {Array} array with the origin of the word
719 *
720 */
721 SuffixTrie.prototype.optimize = function (root, rootLevel) {
722 var occurrence = this.minOccurrence;
723 //self origin indexes
724 var self_save = [];
725 rootLevel = rootLevel === undefined ? -1 : rootLevel;
726 rootLevel++;
727 //whether the word is long enough. ignored the short part.
728 var is_allowed = rootLevel >= this.minLENGTH;
729
730 for (var child in root) {
731
732 if (root.hasOwnProperty(child)) { //&& child !== 'next' && child !== 'le vel'
733 //loop to new child and combine the index information
734 if (child.length === 1) {
735
736 // returned indexes from children (iterate from the leaf)
737 var children_save = this.optimize(root[child], rootLevel);
738 if (is_allowed) {
739 self_save = self_save.concat(children_save);
740 }
741 } else if (child === 'source' && is_allowed) {
742 self_save = self_save.concat(root['source']);
743 }
744 }
745 }
746
747 // this part is to test if the fragment fulfil the occurrence requirement, d elete if not, which reduce time significantly.
748 self_save = uniqueArray(self_save);
749 var isEnoughOccurred = self_save.length >= occurrence;
750 is_allowed = is_allowed && isEnoughOccurred;
751
752 if (is_allowed) {
753 //leaf will at least has property of 'listed' and 'next'
754 var is_SoleNode = Object.keys(root).length === 1;
755 if (!is_SoleNode) {
756 //this is a split node or a flaged node, add information include sou rce here
757 root['source'] = self_save; //update it
758 root['level'] = rootLevel;
759 root['listed'] = false; //indicate this node should be check
760 root['weight'] = self_save.length * rootLevel;
761 self_save = []; //clear the array: this is important if we don't want the node with former indexes!
762 } else {
763 //this is just a sole node with one child, do nothing
764 }
765 }
766 else {
767 //it is lower than the request level , or it is a leaf node, so not calc ulate here
768 delete root['source'];
769 }
770
771 return self_save;
772 };
773
774 /**
775 * list the repeat part with certain order && integrate suffix in 'next' branch
776 * @param {Number} [start] start point in the array [0 - array.length]
777 */
778 SuffixTrie.prototype.listLabel = function (start) {
779 var array = this.array;
780 var root = this.structure;
781 var label_array = [];
782 var fragments_array = [];
783 var length = this.minLENGTH;
784 var occurrence = this.minOccurrence;
785 start = start === undefined ? 1 : start;
786
787 //loop from the certain index, lead to different rebuild array.
788 for (var index = start - 1, i = 0; i < array.length; index++, index = index % (array.length), i++) {
789 var word = array[index];
790
791 var fragments = {};
792 //skip the short word, just push the empty object.
793 if (word.length < length) {
794 label_array.push(fragments);
795 continue;
796 }
797
798 findFragments(word, fragments, root, length, occurrence, false);
799
800 //accumulate the fragment to another array.
801 for (var fragment in fragments) {
802 if (fragments.hasOwnProperty(fragment)) {
803 //get all the origin label index and push them into fragment arr ay with default order
804 var fragmentsArrayIndex = fragments_array.push(fragments[fragmen t]) - 1;
805 fragments_array[fragmentsArrayIndex]['name'] = fragment;
806 }
807 }
808
809 //build another array to map each label and all of its fragments.
810 label_array.push(fragments);
811 }
812 this.labelArray = label_array;
813 this.fragmentsArray = fragments_array;
814 return label_array;
815 };
816
817 /**
818 * rebuild the label array, make sure every label has all of its own fragments
819 */
820 SuffixTrie.prototype.rebuild = function () {
821 var rebuildArray = JSON.parse(JSON.stringify(this.labelArray));//deep copy a rray
822 rebuildArray.forEach(function (object, index) {
823 for (var fragment in object) {
824 if (object.hasOwnProperty(fragment)) {
825 object[fragment]['source'].forEach(function (labelIndex) {
826 if (labelIndex > index) {
827 rebuildArray[labelIndex][fragment] = object[fragment];
828 }
829 });
830 }
831 }
832 });
833 this.rebuildArray = rebuildArray;
834 };
835
836 /**
837 * weight the fragments and order them by the product of their occurrence and le ngth,
838 * should use this function after the build and rebuild.
839 */
840 SuffixTrie.prototype.weighByAverage = function () {
841 var debug = this.debug;
842 var fragmentsArray = this.fragmentsArray;
843 var fragments_Num = this.fragmentsArray.length;
844 fragmentsArray.sort(function (f1, f2) {
845 return f2['weight'] - f1['weight'];
846 });
847 if (debug) console.log('weigh by average: result of length ' + fragments_Nu m + ' is', fragmentsArray);
848
849 if(typeof this.options.limit == "number")
850 fragments_Num = this.options.limit < fragments_Num ? this.options.limit : fragments_Num;
851
852 return fragmentsArray.slice(0, fragments_Num);
853 };
854
855 /**
856 * weight the fragments and order them by max label length.
857 */
858 SuffixTrie.prototype.weighByMax = function () {
859 var debug = this.debug;
860
861 buildFragmentTrie(this.fragmentTrie, this.fragmentsArray);
862
863 var rebuildArray = JSON.parse(JSON.stringify(this.rebuildArray));
864 // var rebuildArray = this.rebuildArray;
865 var fragmentsArray = [];
866 var fragmentsTrie = this.fragmentTrie;
867 var fragments_Num = this.fragmentsArray.length;
868 var label_Lengths = this.array.map(function (s) {
869 return s.length;
870 });
871
872 var label_Tree = new BinarySearchTree();
873 label_Lengths.forEach(function (length, index) {
874 label_Tree.insert(length, index);
875 });
876
877 if (typeof this.options.limit == "number")
878 fragments_Num = this.options.limit < fragments_Num ? this.options.limit : fragments_Num;
879
880 while (fragmentsArray.length < fragments_Num) {
881 var maxKey = label_Tree.getMaxKey();
882
883 var labels_In_Key = label_Tree.search(maxKey);
884
885 if (labels_In_Key.length === 0) {
886 label_Tree.delete(maxKey);
887 continue;
888 }
889 //choose a random labels index with the biggest length
890 var longest_Label = labels_In_Key[0];
891
892 //get all the fragments in this label
893 //in case the there is no fragments under it.
894 var fragments = Object.keys(rebuildArray[longest_Label]);
895 if (fragments.length > 0) {
896 //then find the longest fragment
897 fragments.sort(function (f1, f2) {
898 return f2.length - f1.length;
899 });
900 removeTrie(fragments[0], fragmentsTrie, fragmentsArray, label_Length s, rebuildArray, label_Tree);
901 }
902 //in case there is no this label in fragments array -- which could be po ssible
903 label_Tree.delete(maxKey, longest_Label);
904 }
905
906 this.fragmentsArray = fragmentsArray;
907 if (debug) console.log('weigh by max: result of length ' + fragmentsArray.l ength + ' is', fragmentsArray);
908
909 return fragmentsArray.slice(0, fragments_Num);
910 };
911
912 /**
913 * delete the redundant fragment, always keep the longer one.
914 */
915 SuffixTrie.prototype.clearRedundantFragment = function () {
916 var fragments = this.fragmentsArray;
917 var occurrence = this.minOccurrence;
918 var newFragmentArray = [];
919 var labelArray = this.labelArray;
920 //sort the fragments array from short to long.
921 fragments.sort(function (a, b) {
922 return a.name.length - b.name.length;
923 });
924
925 fragments.forEach(function (fragment, index) {
926 //backup fragment's occurrence.
927 var backupsource = fragment.source.slice();
928 //check if the longer contain duplicated occurrence, filter such occurre nce
929 for (var i = index + 1; i < fragments.length; i++) {
930 var longerFragment = fragments[i];
931 if (longerFragment.name.indexOf(fragment.name) !== -1) {
932 fragment.source = fragment.source.filter(function (i) {
933 return this.indexOf(i) === -1;
934 }, longerFragment.source);
935 }
936 }
937
938 //build the new fragment array with no duplication
939 if (fragment.source.length >= occurrence) {
940 newFragmentArray.push(fragment);
941 }
942 else {
943 if (backupsource.length !== fragment.source.length) {
944 deleteFragmentInLabelArray(backupsource, fragment.name, labelArr ay);
945 }
946 }
947 });
948 this.fragmentsArray = newFragmentArray;
949 };
950
951 // ========================================================================== ============================================
952 // ==================================================== Helper Functions ==== ============================================
953 // ========================================================================== ============================================
954
955 /**
956 * Most Important Helper function
957 * help function to find the all the fragment of one word from the root of the t ree, and finally link to another suffix
958 * @param word searching objects
959 * @param fragments an object to store the sources of the common fragments in th e path to find the word
960 * @param root start point
961 * @param length minimum length requirement
962 * @param occurrence minimum occurrence requirement
963 * @param [iterate] {Boolean} whether it is the complete word or suffix
964 */
965 function findFragments(word, fragments, root, length, occurrence, iterate) {
966
967 var cur = root;
968 var letters = word.split("");
969 //loop every letter in the suffix, e.g. 'o' in 'oss'
970 for (var j = 0; j < letters.length; j++) {
971 var letter = letters[j], pos = cur[ letter ];
972 if (j + 1 >= length) {
973 var fragment = word.substring(0, j + 1);
974
975 if (pos.hasOwnProperty('listed')) {
976 if (!pos['listed']) {
977 //if it is a node fulfil our requirement
978 if (pos.hasOwnProperty('source')) {
979 fragments[fragment] = {};
980 fragments[fragment]['source'] = pos.source;
981 fragments[fragment]['weight'] = pos['weight'];
982
983 //debug whether we include all the situation.
984 if (pos['weight'] == null) {
985 console.warn('escape case! fragment does not has wei ght property, node is ', fragments[fragment]);
986 }
987 if (fragments.hasOwnProperty(fragment)) {
988 //if this is not the full word (but a suffix) we che ck the duplication
989 if (iterate) {
990 for (var property in fragments) {
991 if (property !== fragment && fragments.hasOw nProperty(property) && fragments.hasOwnProperty(fragment)) {
992 if (property.indexOf(fragment) !== -1) {
993 fragments[fragment]['source'] = frag ments[fragment]['source'].filter(function (i) {
994 return this.indexOf(i) < 0
995 }, fragments[property].source);
996 }
997 }
998 }
999
1000 if (fragments[fragment]['source'].length < occur rence) {
1001 delete fragments[fragment];
1002 }
1003 }
1004 }
1005 pos['listed'] = true;
1006 }
1007 //iterate to its suffix from root
1008 if (pos.hasOwnProperty('next')) {
1009 findFragments(pos['next'], fragments, root, length, occu rrence, true);
1010 }
1011 }
1012 }
1013 }
1014 cur = pos;
1015 }
1016 }
1017
1018 /**
1019 * A helper function to delete the reference in Label Array
1020 * @param {Array} source fragment.source
1021 * @param {String} name fragment.name
1022 * @param labelArray LabelArray
1023 */
1024 function deleteFragmentInLabelArray(source, name, labelArray) {
1025 var label = labelArray[source[0]];
1026 if (label.hasOwnProperty(name)) {
1027 delete label[name];
1028 }
1029 }
1030
1031 /**
1032 * Helper function for build a dictionary trie for searching the fragment.
1033 * @param trie the fragment trie will be build in this variable.
1034 * @param array the fragment array used for building trie.
1035 */
1036 function buildFragmentTrie(trie, array) {
1037 for (var i = 0; i < array.length; i++) {
1038
1039 var word = array[i]['name'], letters = word.split(""), cur = trie;
1040
1041 // Loop through the letters
1042 for (var j = 0; j < letters.length; j++) {
1043 var letter = letters[j];
1044
1045 if (!cur.hasOwnProperty(letter)) {
1046 cur[ letter ] = {};
1047 }
1048 cur = cur[letter];
1049 if (j === letters.length - 1) {
1050 cur['source'] = array[i]['source'];
1051 cur['name'] = array[i]['name'];
1052 cur['weight'] = array[i]['weight'];
1053 cur['list'] = false;
1054 }
1055
1056 }
1057 }
1058 }
1059
1060 /**
1061 * Helper function to remove the fragment from the trie and list it in the resul t array (fragmentsArray).
1062 * @param word the word to be searched for.
1063 * @param cur the node where the search start.
1064 * @param array the array to store the result fragment
1065 * @param labelLengthArray the array map to the length of the label
1066 * @param rebuildArray this.rebuildArray
1067 * @param label_trie the binary search trie where store all the labels with thei r length.
1068 * @returns {Number|Boolean} if find return the length of fragment, else return false.
1069 */
1070 function removeTrie(word, cur, array, labelLengthArray, rebuildArray, label_trie ) {
1071 for (var node in cur) {
1072
1073 if (cur.hasOwnProperty(node) && node.length === 1) {
1074
1075 if (word.indexOf(node) === 0) {
1076 //if this is the leaf of the trie
1077 if (word.length === 1) {
1078 if (cur[node].hasOwnProperty('list')) {
1079 var fragment = {};
1080 fragment['source'] = cur[node]['source'];
1081 var fragmentName = cur[node]['name'];
1082 fragment['name'] = fragmentName;
1083 fragment['weight'] = cur[node]['weight'];
1084 delete cur[node]['list'];
1085 //store it into our result
1086 array.push(fragment);
1087
1088 //for other node who has same fragment
1089 cur[node]['source'].forEach(function (indexLabel) {
1090 //delete the fragment reference in the label
1091 delete rebuildArray[indexLabel][fragmentName];
1092 //update the label length in the trie
1093 var labelLength = labelLengthArray[indexLabel];
1094 label_trie.delete(labelLength, indexLabel);
1095 labelLength -= fragmentName.length;
1096 label_trie.insert(labelLength, indexLabel);
1097 });
1098 }
1099 } else {
1100 removeTrie(word.slice(1), cur[node], array, labelLengthArray , rebuildArray, label_trie);
1101 }
1102 }
1103
1104 }
1105 }
1106 }
1107
1108 /**
1109 * Helper function to remove the repeat elements in an array
1110 * @param {Array} array
1111 * @returns {Array} the result array without duplicate elments
1112 */
1113 function uniqueArray(array) {
1114 var a = array.concat();
1115 for (var i = 0; i < a.length; ++i) {
1116 for (var j = i + 1; j < a.length; ++j) {
1117 if (a[i] === a[j])
1118 a.splice(j--, 1);
1119 }
1120 }
1121 return a;
1122 }
1123
1124 window.Substrings = SuffixTrie;
1125
1126 })();
OLDNEW
« no previous file with comments | « appengine/swarming/elements/third_party/README.md ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698