Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 (function(){ | |
|
jcgregorio
2016/10/03 15:15:27
What is the license for these libraries?
Why is t
| |
| 2 | |
| 3 var customUtils = {}; | |
| 4 /** | |
| 5 * Return an array with the numbers from 0 to n-1, in a random order | |
| 6 */ | |
| 7 function getRandomArray (n) { | |
| 8 var res, next; | |
| 9 | |
| 10 if (n === 0) { return []; } | |
| 11 if (n === 1) { return [0]; } | |
| 12 | |
| 13 res = getRandomArray(n - 1); | |
| 14 next = Math.floor(Math.random() * n); | |
| 15 res.splice(next, 0, n - 1); // Add n-1 at a random position in the array | |
| 16 | |
| 17 return res; | |
| 18 }; | |
| 19 customUtils.getRandomArray = getRandomArray; | |
| 20 | |
| 21 | |
| 22 /* | |
| 23 * Default compareKeys function will work for numbers, strings and dates | |
| 24 */ | |
| 25 function defaultCompareKeysFunction (a, b) { | |
| 26 if (a < b) { return -1; } | |
| 27 if (a > b) { return 1; } | |
| 28 if (a === b) { return 0; } | |
| 29 | |
| 30 throw { message: "Couldn't compare elements", a: a, b: b }; | |
| 31 } | |
| 32 customUtils.defaultCompareKeysFunction = defaultCompareKeysFunction; | |
| 33 | |
| 34 | |
| 35 /** | |
| 36 * Check whether two values are equal (used in non-unique deletion) | |
| 37 */ | |
| 38 function defaultCheckValueEquality (a, b) { | |
| 39 return a === b; | |
| 40 } | |
| 41 customUtils.defaultCheckValueEquality = defaultCheckValueEquality; | |
| 42 | |
| 43 /** | |
| 44 * Simple binary search tree | |
| 45 */ | |
| 46 | |
| 47 | |
| 48 /** | |
| 49 * Constructor | |
| 50 * @param {Object} options Optional | |
| 51 * @param {Boolean} options.unique Whether to enforce a 'unique' constraint on the key or not | |
| 52 * @param {Key} options.key Initialize this BST's key with key | |
| 53 * @param {Value} options.value Initialize this BST's data with [value] | |
| 54 * @param {Function} options.compareKeys Initialize this BST's compareKeys | |
| 55 */ | |
| 56 function BinarySearchTree (options) { | |
| 57 options = options || {}; | |
| 58 | |
| 59 this.left = null; | |
| 60 this.right = null; | |
| 61 this.parent = options.parent !== undefined ? options.parent : null; | |
| 62 if (options.hasOwnProperty('key')) { this.key = options.key; } | |
| 63 this.data = options.hasOwnProperty('value') ? [options.value] : []; | |
| 64 this.unique = options.unique || false; | |
| 65 | |
| 66 this.compareKeys = options.compareKeys || customUtils.defaultCompareKeysFuncti on; | |
| 67 this.checkValueEquality = options.checkValueEquality || customUtils.defaultChe ckValueEquality; | |
| 68 } | |
| 69 | |
| 70 | |
| 71 // ================================ | |
| 72 // Methods used to test the tree | |
| 73 // ================================ | |
| 74 | |
| 75 | |
| 76 /** | |
| 77 * Get the descendant with max key | |
| 78 */ | |
| 79 BinarySearchTree.prototype.getMaxKeyDescendant = function () { | |
| 80 if (this.right) { | |
| 81 return this.right.getMaxKeyDescendant(); | |
| 82 } else { | |
| 83 return this; | |
| 84 } | |
| 85 }; | |
| 86 | |
| 87 | |
| 88 /** | |
| 89 * Get the maximum key | |
| 90 */ | |
| 91 BinarySearchTree.prototype.getMaxKey = function () { | |
| 92 return this.getMaxKeyDescendant().key; | |
| 93 }; | |
| 94 | |
| 95 | |
| 96 /** | |
| 97 * Get the descendant with min key | |
| 98 */ | |
| 99 BinarySearchTree.prototype.getMinKeyDescendant = function () { | |
| 100 if (this.left) { | |
| 101 return this.left.getMinKeyDescendant() | |
| 102 } else { | |
| 103 return this; | |
| 104 } | |
| 105 }; | |
| 106 | |
| 107 | |
| 108 /** | |
| 109 * Get the minimum key | |
| 110 */ | |
| 111 BinarySearchTree.prototype.getMinKey = function () { | |
| 112 return this.getMinKeyDescendant().key; | |
| 113 }; | |
| 114 | |
| 115 | |
| 116 /** | |
| 117 * Check that all nodes (incl. leaves) fullfil condition given by fn | |
| 118 * test is a function passed every (key, data) and which throws if the condition is not met | |
| 119 */ | |
| 120 BinarySearchTree.prototype.checkAllNodesFullfillCondition = function (test) { | |
| 121 if (!this.hasOwnProperty('key')) { return; } | |
| 122 | |
| 123 test(this.key, this.data); | |
| 124 if (this.left) { this.left.checkAllNodesFullfillCondition(test); } | |
| 125 if (this.right) { this.right.checkAllNodesFullfillCondition(test); } | |
| 126 }; | |
| 127 | |
| 128 | |
| 129 /** | |
| 130 * Check that the core BST properties on node ordering are verified | |
| 131 * Throw if they aren't | |
| 132 */ | |
| 133 BinarySearchTree.prototype.checkNodeOrdering = function () { | |
| 134 var self = this; | |
| 135 | |
| 136 if (!this.hasOwnProperty('key')) { return; } | |
| 137 | |
| 138 if (this.left) { | |
| 139 this.left.checkAllNodesFullfillCondition(function (k) { | |
| 140 if (self.compareKeys(k, self.key) >= 0) { | |
| 141 throw 'Tree with root ' + self.key + ' is not a binary search tree'; | |
| 142 } | |
| 143 }); | |
| 144 this.left.checkNodeOrdering(); | |
| 145 } | |
| 146 | |
| 147 if (this.right) { | |
| 148 this.right.checkAllNodesFullfillCondition(function (k) { | |
| 149 if (self.compareKeys(k, self.key) <= 0) { | |
| 150 throw 'Tree with root ' + self.key + ' is not a binary search tree'; | |
| 151 } | |
| 152 }); | |
| 153 this.right.checkNodeOrdering(); | |
| 154 } | |
| 155 }; | |
| 156 | |
| 157 | |
| 158 /** | |
| 159 * Check that all pointers are coherent in this tree | |
| 160 */ | |
| 161 BinarySearchTree.prototype.checkInternalPointers = function () { | |
| 162 if (this.left) { | |
| 163 if (this.left.parent !== this) { throw 'Parent pointer broken for key ' + th is.key; } | |
| 164 this.left.checkInternalPointers(); | |
| 165 } | |
| 166 | |
| 167 if (this.right) { | |
| 168 if (this.right.parent !== this) { throw 'Parent pointer broken for key ' + t his.key; } | |
| 169 this.right.checkInternalPointers(); | |
| 170 } | |
| 171 }; | |
| 172 | |
| 173 | |
| 174 /** | |
| 175 * Check that a tree is a BST as defined here (node ordering and pointer referen ces) | |
| 176 */ | |
| 177 BinarySearchTree.prototype.checkIsBST = function () { | |
| 178 this.checkNodeOrdering(); | |
| 179 this.checkInternalPointers(); | |
| 180 if (this.parent) { throw "The root shouldn't have a parent"; } | |
| 181 }; | |
| 182 | |
| 183 | |
| 184 /** | |
| 185 * Get number of keys inserted | |
| 186 */ | |
| 187 BinarySearchTree.prototype.getNumberOfKeys = function () { | |
| 188 var res; | |
| 189 | |
| 190 if (!this.hasOwnProperty('key')) { return 0; } | |
| 191 | |
| 192 res = 1; | |
| 193 if (this.left) { res += this.left.getNumberOfKeys(); } | |
| 194 if (this.right) { res += this.right.getNumberOfKeys(); } | |
| 195 | |
| 196 return res; | |
| 197 }; | |
| 198 | |
| 199 | |
| 200 | |
| 201 // ============================================ | |
| 202 // Methods used to actually work on the tree | |
| 203 // ============================================ | |
| 204 | |
| 205 /** | |
| 206 * Create a BST similar (i.e. same options except for key and value) to the curr ent one | |
| 207 * Use the same constructor (i.e. BinarySearchTree, AVLTree etc) | |
| 208 * @param {Object} options see constructor | |
| 209 */ | |
| 210 BinarySearchTree.prototype.createSimilar = function (options) { | |
| 211 options = options || {}; | |
| 212 options.unique = this.unique; | |
| 213 options.compareKeys = this.compareKeys; | |
| 214 options.checkValueEquality = this.checkValueEquality; | |
| 215 | |
| 216 return new this.constructor(options); | |
| 217 }; | |
| 218 | |
| 219 | |
| 220 /** | |
| 221 * Create the left child of this BST and return it | |
| 222 */ | |
| 223 BinarySearchTree.prototype.createLeftChild = function (options) { | |
| 224 var leftChild = this.createSimilar(options); | |
| 225 leftChild.parent = this; | |
| 226 this.left = leftChild; | |
| 227 | |
| 228 return leftChild; | |
| 229 }; | |
| 230 | |
| 231 | |
| 232 /** | |
| 233 * Create the right child of this BST and return it | |
| 234 */ | |
| 235 BinarySearchTree.prototype.createRightChild = function (options) { | |
| 236 var rightChild = this.createSimilar(options); | |
| 237 rightChild.parent = this; | |
| 238 this.right = rightChild; | |
| 239 | |
| 240 return rightChild; | |
| 241 }; | |
| 242 | |
| 243 | |
| 244 /** | |
| 245 * Insert a new element | |
| 246 */ | |
| 247 BinarySearchTree.prototype.insert = function (key, value) { | |
| 248 // Empty tree, insert as root | |
| 249 if (!this.hasOwnProperty('key')) { | |
| 250 this.key = key; | |
| 251 this.data.push(value); | |
| 252 return; | |
| 253 } | |
| 254 | |
| 255 // Same key as root | |
| 256 if (this.compareKeys(this.key, key) === 0) { | |
| 257 if (this.unique) { | |
| 258 throw { message: "Can't insert key " + key + ", it violates the unique con straint" | |
| 259 , key: key | |
| 260 , errorType: 'uniqueViolated' | |
| 261 }; | |
| 262 } else { | |
| 263 this.data.push(value); | |
| 264 } | |
| 265 return; | |
| 266 } | |
| 267 | |
| 268 if (this.compareKeys(key, this.key) < 0) { | |
| 269 // Insert in left subtree | |
| 270 if (this.left) { | |
| 271 this.left.insert(key, value); | |
| 272 } else { | |
| 273 this.createLeftChild({ key: key, value: value }); | |
| 274 } | |
| 275 } else { | |
| 276 // Insert in right subtree | |
| 277 if (this.right) { | |
| 278 this.right.insert(key, value); | |
| 279 } else { | |
| 280 this.createRightChild({ key: key, value: value }); | |
| 281 } | |
| 282 } | |
| 283 }; | |
| 284 | |
| 285 | |
| 286 /** | |
| 287 * Search for all data corresponding to a key | |
| 288 */ | |
| 289 BinarySearchTree.prototype.search = function (key) { | |
| 290 if (!this.hasOwnProperty('key')) { return []; } | |
| 291 | |
| 292 if (this.compareKeys(this.key, key) === 0) { return this.data; } | |
| 293 | |
| 294 if (this.compareKeys(key, this.key) < 0) { | |
| 295 if (this.left) { | |
| 296 return this.left.search(key); | |
| 297 } else { | |
| 298 return []; | |
| 299 } | |
| 300 } else { | |
| 301 if (this.right) { | |
| 302 return this.right.search(key); | |
| 303 } else { | |
| 304 return []; | |
| 305 } | |
| 306 } | |
| 307 }; | |
| 308 | |
| 309 | |
| 310 /** | |
| 311 * Return a function that tells whether a given key matches a lower bound | |
| 312 */ | |
| 313 BinarySearchTree.prototype.getLowerBoundMatcher = function (query) { | |
| 314 var self = this; | |
| 315 | |
| 316 // No lower bound | |
| 317 if (!query.hasOwnProperty('$gt') && !query.hasOwnProperty('$gte')) { | |
| 318 return function () { return true; }; | |
| 319 } | |
| 320 | |
| 321 if (query.hasOwnProperty('$gt') && query.hasOwnProperty('$gte')) { | |
| 322 if (self.compareKeys(query.$gte, query.$gt) === 0) { | |
| 323 return function (key) { return self.compareKeys(key, query.$gt) > 0; }; | |
| 324 } | |
| 325 | |
| 326 if (self.compareKeys(query.$gte, query.$gt) > 0) { | |
| 327 return function (key) { return self.compareKeys(key, query.$gte) >= 0; }; | |
| 328 } else { | |
| 329 return function (key) { return self.compareKeys(key, query.$gt) > 0; }; | |
| 330 } | |
| 331 } | |
| 332 | |
| 333 if (query.hasOwnProperty('$gt')) { | |
| 334 return function (key) { return self.compareKeys(key, query.$gt) > 0; }; | |
| 335 } else { | |
| 336 return function (key) { return self.compareKeys(key, query.$gte) >= 0; }; | |
| 337 } | |
| 338 }; | |
| 339 | |
| 340 | |
| 341 /** | |
| 342 * Return a function that tells whether a given key matches an upper bound | |
| 343 */ | |
| 344 BinarySearchTree.prototype.getUpperBoundMatcher = function (query) { | |
| 345 var self = this; | |
| 346 | |
| 347 // No lower bound | |
| 348 if (!query.hasOwnProperty('$lt') && !query.hasOwnProperty('$lte')) { | |
| 349 return function () { return true; }; | |
| 350 } | |
| 351 | |
| 352 if (query.hasOwnProperty('$lt') && query.hasOwnProperty('$lte')) { | |
| 353 if (self.compareKeys(query.$lte, query.$lt) === 0) { | |
| 354 return function (key) { return self.compareKeys(key, query.$lt) < 0; }; | |
| 355 } | |
| 356 | |
| 357 if (self.compareKeys(query.$lte, query.$lt) < 0) { | |
| 358 return function (key) { return self.compareKeys(key, query.$lte) <= 0; }; | |
| 359 } else { | |
| 360 return function (key) { return self.compareKeys(key, query.$lt) < 0; }; | |
| 361 } | |
| 362 } | |
| 363 | |
| 364 if (query.hasOwnProperty('$lt')) { | |
| 365 return function (key) { return self.compareKeys(key, query.$lt) < 0; }; | |
| 366 } else { | |
| 367 return function (key) { return self.compareKeys(key, query.$lte) <= 0; }; | |
| 368 } | |
| 369 }; | |
| 370 | |
| 371 | |
| 372 // Append all elements in toAppend to array | |
| 373 function append (array, toAppend) { | |
| 374 var i; | |
| 375 | |
| 376 for (i = 0; i < toAppend.length; i += 1) { | |
| 377 array.push(toAppend[i]); | |
| 378 } | |
| 379 } | |
| 380 | |
| 381 | |
| 382 /** | |
| 383 * Get all data for a key between bounds | |
| 384 * Return it in key order | |
| 385 * @param {Object} query Mongo-style query where keys are $lt, $lte, $gt or $gte (other keys are not considered) | |
| 386 * @param {Functions} lbm/ubm matching functions calculated at the first recursi ve step | |
| 387 */ | |
| 388 BinarySearchTree.prototype.betweenBounds = function (query, lbm, ubm) { | |
| 389 var res = []; | |
| 390 | |
| 391 if (!this.hasOwnProperty('key')) { return []; } // Empty tree | |
| 392 | |
| 393 lbm = lbm || this.getLowerBoundMatcher(query); | |
| 394 ubm = ubm || this.getUpperBoundMatcher(query); | |
| 395 | |
| 396 if (lbm(this.key) && this.left) { append(res, this.left.betweenBounds(query, l bm, ubm)); } | |
| 397 if (lbm(this.key) && ubm(this.key)) { append(res, this.data); } | |
| 398 if (ubm(this.key) && this.right) { append(res, this.right.betweenBounds(query, lbm, ubm)); } | |
| 399 | |
| 400 return res; | |
| 401 }; | |
| 402 | |
| 403 | |
| 404 /** | |
| 405 * Delete the current node if it is a leaf | |
| 406 * Return true if it was deleted | |
| 407 */ | |
| 408 BinarySearchTree.prototype.deleteIfLeaf = function () { | |
| 409 if (this.left || this.right) { return false; } | |
| 410 | |
| 411 // The leaf is itself a root | |
| 412 if (!this.parent) { | |
| 413 delete this.key; | |
| 414 this.data = []; | |
| 415 return true; | |
| 416 } | |
| 417 | |
| 418 if (this.parent.left === this) { | |
| 419 this.parent.left = null; | |
| 420 } else { | |
| 421 this.parent.right = null; | |
| 422 } | |
| 423 | |
| 424 return true; | |
| 425 }; | |
| 426 | |
| 427 | |
| 428 /** | |
| 429 * Delete the current node if it has only one child | |
| 430 * Return true if it was deleted | |
| 431 */ | |
| 432 BinarySearchTree.prototype.deleteIfOnlyOneChild = function () { | |
| 433 var child; | |
| 434 | |
| 435 if (this.left && !this.right) { child = this.left; } | |
| 436 if (!this.left && this.right) { child = this.right; } | |
| 437 if (!child) { return false; } | |
| 438 | |
| 439 // Root | |
| 440 if (!this.parent) { | |
| 441 this.key = child.key; | |
| 442 this.data = child.data; | |
| 443 | |
| 444 this.left = null; | |
| 445 if (child.left) { | |
| 446 this.left = child.left; | |
| 447 child.left.parent = this; | |
| 448 } | |
| 449 | |
| 450 this.right = null; | |
| 451 if (child.right) { | |
| 452 this.right = child.right; | |
| 453 child.right.parent = this; | |
| 454 } | |
| 455 | |
| 456 return true; | |
| 457 } | |
| 458 | |
| 459 if (this.parent.left === this) { | |
| 460 this.parent.left = child; | |
| 461 child.parent = this.parent; | |
| 462 } else { | |
| 463 this.parent.right = child; | |
| 464 child.parent = this.parent; | |
| 465 } | |
| 466 | |
| 467 return true; | |
| 468 }; | |
| 469 | |
| 470 | |
| 471 /** | |
| 472 * Delete a key or just a value | |
| 473 * @param {Key} key | |
| 474 * @param {Value} value Optional. If not set, the whole key is deleted. If set, only this value is deleted | |
| 475 */ | |
| 476 BinarySearchTree.prototype.delete = function (key, value) { | |
| 477 var newData = [], replaceWith | |
| 478 , self = this | |
| 479 ; | |
| 480 | |
| 481 if (!this.hasOwnProperty('key')) { return; } | |
| 482 | |
| 483 if (this.compareKeys(key, this.key) < 0) { | |
| 484 if (this.left) { this.left.delete(key, value); } | |
| 485 return; | |
| 486 } | |
| 487 | |
| 488 if (this.compareKeys(key, this.key) > 0) { | |
| 489 if (this.right) { this.right.delete(key, value); } | |
| 490 return; | |
| 491 } | |
| 492 | |
| 493 if (!this.compareKeys(key, this.key) === 0) { return; } | |
| 494 | |
| 495 // Delete only a value | |
| 496 if (this.data.length > 1 && value !== undefined) { | |
| 497 this.data.forEach(function (d) { | |
| 498 if (!self.checkValueEquality(d, value)) { newData.push(d); } | |
| 499 }); | |
| 500 self.data = newData; | |
| 501 return; | |
| 502 } | |
| 503 | |
| 504 // Delete the whole node | |
| 505 if (this.deleteIfLeaf()) { | |
| 506 return; | |
| 507 } | |
| 508 if (this.deleteIfOnlyOneChild()) { | |
| 509 return; | |
| 510 } | |
| 511 | |
| 512 // We are in the case where the node to delete has two children | |
| 513 if (Math.random() >= 0.5) { // Randomize replacement to avoid unbalancing th e tree too much | |
| 514 // Use the in-order predecessor | |
| 515 replaceWith = this.left.getMaxKeyDescendant(); | |
| 516 | |
| 517 this.key = replaceWith.key; | |
| 518 this.data = replaceWith.data; | |
| 519 | |
| 520 if (this === replaceWith.parent) { // Special case | |
| 521 this.left = replaceWith.left; | |
| 522 if (replaceWith.left) { replaceWith.left.parent = replaceWith.parent; } | |
| 523 } else { | |
| 524 replaceWith.parent.right = replaceWith.left; | |
| 525 if (replaceWith.left) { replaceWith.left.parent = replaceWith.parent; } | |
| 526 } | |
| 527 } else { | |
| 528 // Use the in-order successor | |
| 529 replaceWith = this.right.getMinKeyDescendant(); | |
| 530 | |
| 531 this.key = replaceWith.key; | |
| 532 this.data = replaceWith.data; | |
| 533 | |
| 534 if (this === replaceWith.parent) { // Special case | |
| 535 this.right = replaceWith.right; | |
| 536 if (replaceWith.right) { replaceWith.right.parent = replaceWith.parent; } | |
| 537 } else { | |
| 538 replaceWith.parent.left = replaceWith.right; | |
| 539 if (replaceWith.right) { replaceWith.right.parent = replaceWith.parent; } | |
| 540 } | |
| 541 } | |
| 542 }; | |
| 543 | |
| 544 | |
| 545 /** | |
| 546 * Execute a function on every node of the tree, in key order | |
| 547 * @param {Function} fn Signature: node. Most useful will probably be node.key a nd node.data | |
| 548 */ | |
| 549 BinarySearchTree.prototype.executeOnEveryNode = function (fn) { | |
| 550 if (this.left) { this.left.executeOnEveryNode(fn); } | |
| 551 fn(this); | |
| 552 if (this.right) { this.right.executeOnEveryNode(fn); } | |
| 553 }; | |
| 554 | |
| 555 | |
| 556 /** | |
| 557 * Pretty print a tree | |
| 558 * @param {Boolean} printData To print the nodes' data along with the key | |
| 559 */ | |
| 560 BinarySearchTree.prototype.prettyPrint = function (printData, spacing) { | |
| 561 spacing = spacing || ""; | |
| 562 | |
| 563 console.log(spacing + "* " + this.key); | |
| 564 if (printData) { console.log(spacing + "* " + this.data); } | |
| 565 | |
| 566 if (!this.left && !this.right) { return; } | |
| 567 | |
| 568 if (this.left) { | |
| 569 this.left.prettyPrint(printData, spacing + " "); | |
| 570 } else { | |
| 571 console.log(spacing + " *"); | |
| 572 } | |
| 573 if (this.right) { | |
| 574 this.right.prettyPrint(printData, spacing + " "); | |
| 575 } else { | |
| 576 console.log(spacing + " *"); | |
| 577 } | |
| 578 }; | |
| 579 | |
| 580 /** | |
| 581 * Created by Hanwen on 01.07.2014. | |
| 582 */ | |
| 583 | |
| 584 /** | |
| 585 * Constructor | |
| 586 * @param {Object} [options] config map | |
| 587 * @param {Number} options.minLength the shortest length of the fragment. | |
| 588 * @param {Number} options.minOccurrence the minimum occurrence of the fragment . | |
| 589 * @param {Boolean} options.debug whether show the console message and set the minLength and minOccurrence. | |
| 590 * @constructor | |
| 591 */ | |
| 592 function SuffixTrie(options) { | |
| 593 this.options = options || {}; | |
| 594 | |
| 595 this.structure = {}; | |
| 596 this.debug = this.options.debug === undefined ? false : options.debug; | |
| 597 | |
| 598 | |
| 599 this.minLENGTH = this.options.minLength === undefined ? 3 : options.minLengt h; | |
| 600 this.minOccurrence = this.options.minOccurrence === undefined ? 2 : options. minOccurrence; | |
| 601 this.isByLength = this.options.byLength === undefined ? false : options.byLe ngth; | |
| 602 | |
| 603 //this.options.limit is a number | |
| 604 this.save = []; | |
| 605 this.array = null; | |
| 606 this.labelArray = null; | |
| 607 this.fragmentsArray = null; | |
| 608 this.fragmentTrie = {}; | |
| 609 this.rebuildArray; | |
| 610 //default test environment | |
| 611 | |
| 612 } | |
| 613 | |
| 614 //help function to clear the trie | |
| 615 SuffixTrie.prototype.refresh = function(){ | |
| 616 this.structure = {}; | |
| 617 this.save = []; | |
| 618 this.array = null; | |
| 619 this.labelArray = null; | |
| 620 this.fragmentsArray = null; | |
| 621 this.fragmentTrie = {}; | |
| 622 this.rebuildArray; | |
| 623 }; | |
| 624 | |
| 625 | |
| 626 //help function for quick weight | |
| 627 SuffixTrie.prototype.weigh = function(){ | |
| 628 if(this.isByLength){ | |
| 629 return this.weighByMax() | |
| 630 }else{ | |
| 631 return this.weighByAverage() | |
| 632 } | |
| 633 } | |
| 634 | |
| 635 // ========================================================================== ============================================ | |
| 636 // ==================================================== Public Functions ==== ============================================ | |
| 637 // ========================================================================== ============================================ | |
| 638 /** | |
| 639 * add fragment string to the trie | |
| 640 * @param array {Array} adding word | |
| 641 */ | |
| 642 SuffixTrie.prototype.build = function (array) { | |
| 643 var debug = this.debug; | |
| 644 this.buildLabelTrie(array); | |
| 645 this.optimize(this.structure); | |
| 646 if(debug) console.log('after optimization our label trie of array length ' + this.array.length + ' is ', this.structure); | |
| 647 | |
| 648 this.listLabel(); | |
| 649 if(debug) console.log('get the compressed label array (without duplicate fra gments ) of array length ' + this.array.length + ' is ', this.labelArray); | |
| 650 if(debug) console.log('and fragments array of length ' + this.fragmentsArray .length + ' is ', this.fragmentsArray); | |
| 651 | |
| 652 this.clearRedundantFragment(); | |
| 653 if(debug) console.log('get the cleared label array (without duplicate fragme nts ) of array length ' + this.array.length + ' is ', this.labelArray); | |
| 654 if(debug) console.log('and cleared fragments array of length ' + this.fragme ntsArray.length + ' is ', this.fragmentsArray); | |
| 655 | |
| 656 this.rebuild(); | |
| 657 if(debug) console.log('rebuild ended' , this.rebuildArray); | |
| 658 }; | |
| 659 | |
| 660 SuffixTrie.prototype.buildLabelTrie = function (array) { | |
| 661 this.array = array; | |
| 662 var root = this.structure; | |
| 663 var LENGTH = this.minLENGTH; | |
| 664 | |
| 665 array.forEach(function (word, index) { | |
| 666 var first = true; | |
| 667 //used to store the loop element, for connect the suffixes, e.g. 'foob' next is 'oob', by this connection, we get all the substrings. | |
| 668 var last = {}; | |
| 669 | |
| 670 //loop every suffix in one word, e.g. 'oss' in 'boss' | |
| 671 for (var i = 0, l = word.length; i <= l - LENGTH; i++) { | |
| 672 var cur = root; | |
| 673 var suffix = word.substring(i); | |
| 674 var letters = suffix.split(""); | |
| 675 | |
| 676 //loop every letter in the suffix, e.g. 'o' in 'oss' | |
| 677 for (var j = 0; j < letters.length; j++) { | |
| 678 var letter = letters[j], pos = cur[ letter ]; | |
| 679 | |
| 680 if (j === letters.length - 1) { | |
| 681 if (pos == null) { | |
| 682 //create new node and add the information. | |
| 683 cur[letter] = {source: [index], listed: false}; | |
| 684 } else if (pos.hasOwnProperty('source')) { | |
| 685 //just add occurrence | |
| 686 pos["source"].push(index); | |
| 687 } else { | |
| 688 //node already existed, add information. | |
| 689 cur[letter]['source'] = [index]; | |
| 690 cur[letter]['listed'] = false; | |
| 691 } | |
| 692 cur = cur[letter]; | |
| 693 | |
| 694 if (!first) { | |
| 695 last['next'] = suffix; | |
| 696 } else { | |
| 697 first = false; | |
| 698 } | |
| 699 | |
| 700 last = cur; | |
| 701 } else if (pos == null) { | |
| 702 //create node | |
| 703 cur = cur[letter] = {}; | |
| 704 } else { | |
| 705 //no creation, loop to next node | |
| 706 cur = cur[ letter ]; | |
| 707 } | |
| 708 } | |
| 709 } | |
| 710 }); | |
| 711 | |
| 712 }; | |
| 713 | |
| 714 /** | |
| 715 * add the origin for the split node and trunk node && integrate prefix in each branch | |
| 716 * @param {Object} root the start node | |
| 717 * @param {Number} [rootLevel] the start level of the algorithm, should be -1 | |
| 718 * @returns {Array} array with the origin of the word | |
| 719 * | |
| 720 */ | |
| 721 SuffixTrie.prototype.optimize = function (root, rootLevel) { | |
| 722 var occurrence = this.minOccurrence; | |
| 723 //self origin indexes | |
| 724 var self_save = []; | |
| 725 rootLevel = rootLevel === undefined ? -1 : rootLevel; | |
| 726 rootLevel++; | |
| 727 //whether the word is long enough. ignored the short part. | |
| 728 var is_allowed = rootLevel >= this.minLENGTH; | |
| 729 | |
| 730 for (var child in root) { | |
| 731 | |
| 732 if (root.hasOwnProperty(child)) { //&& child !== 'next' && child !== 'le vel' | |
| 733 //loop to new child and combine the index information | |
| 734 if (child.length === 1) { | |
| 735 | |
| 736 // returned indexes from children (iterate from the leaf) | |
| 737 var children_save = this.optimize(root[child], rootLevel); | |
| 738 if (is_allowed) { | |
| 739 self_save = self_save.concat(children_save); | |
| 740 } | |
| 741 } else if (child === 'source' && is_allowed) { | |
| 742 self_save = self_save.concat(root['source']); | |
| 743 } | |
| 744 } | |
| 745 } | |
| 746 | |
| 747 // this part is to test if the fragment fulfil the occurrence requirement, d elete if not, which reduce time significantly. | |
| 748 self_save = uniqueArray(self_save); | |
| 749 var isEnoughOccurred = self_save.length >= occurrence; | |
| 750 is_allowed = is_allowed && isEnoughOccurred; | |
| 751 | |
| 752 if (is_allowed) { | |
| 753 //leaf will at least has property of 'listed' and 'next' | |
| 754 var is_SoleNode = Object.keys(root).length === 1; | |
| 755 if (!is_SoleNode) { | |
| 756 //this is a split node or a flaged node, add information include sou rce here | |
| 757 root['source'] = self_save; //update it | |
| 758 root['level'] = rootLevel; | |
| 759 root['listed'] = false; //indicate this node should be check | |
| 760 root['weight'] = self_save.length * rootLevel; | |
| 761 self_save = []; //clear the array: this is important if we don't want the node with former indexes! | |
| 762 } else { | |
| 763 //this is just a sole node with one child, do nothing | |
| 764 } | |
| 765 } | |
| 766 else { | |
| 767 //it is lower than the request level , or it is a leaf node, so not calc ulate here | |
| 768 delete root['source']; | |
| 769 } | |
| 770 | |
| 771 return self_save; | |
| 772 }; | |
| 773 | |
| 774 /** | |
| 775 * list the repeat part with certain order && integrate suffix in 'next' branch | |
| 776 * @param {Number} [start] start point in the array [0 - array.length] | |
| 777 */ | |
| 778 SuffixTrie.prototype.listLabel = function (start) { | |
| 779 var array = this.array; | |
| 780 var root = this.structure; | |
| 781 var label_array = []; | |
| 782 var fragments_array = []; | |
| 783 var length = this.minLENGTH; | |
| 784 var occurrence = this.minOccurrence; | |
| 785 start = start === undefined ? 1 : start; | |
| 786 | |
| 787 //loop from the certain index, lead to different rebuild array. | |
| 788 for (var index = start - 1, i = 0; i < array.length; index++, index = index % (array.length), i++) { | |
| 789 var word = array[index]; | |
| 790 | |
| 791 var fragments = {}; | |
| 792 //skip the short word, just push the empty object. | |
| 793 if (word.length < length) { | |
| 794 label_array.push(fragments); | |
| 795 continue; | |
| 796 } | |
| 797 | |
| 798 findFragments(word, fragments, root, length, occurrence, false); | |
| 799 | |
| 800 //accumulate the fragment to another array. | |
| 801 for (var fragment in fragments) { | |
| 802 if (fragments.hasOwnProperty(fragment)) { | |
| 803 //get all the origin label index and push them into fragment arr ay with default order | |
| 804 var fragmentsArrayIndex = fragments_array.push(fragments[fragmen t]) - 1; | |
| 805 fragments_array[fragmentsArrayIndex]['name'] = fragment; | |
| 806 } | |
| 807 } | |
| 808 | |
| 809 //build another array to map each label and all of its fragments. | |
| 810 label_array.push(fragments); | |
| 811 } | |
| 812 this.labelArray = label_array; | |
| 813 this.fragmentsArray = fragments_array; | |
| 814 return label_array; | |
| 815 }; | |
| 816 | |
| 817 /** | |
| 818 * rebuild the label array, make sure every label has all of its own fragments | |
| 819 */ | |
| 820 SuffixTrie.prototype.rebuild = function () { | |
| 821 var rebuildArray = JSON.parse(JSON.stringify(this.labelArray));//deep copy a rray | |
| 822 rebuildArray.forEach(function (object, index) { | |
| 823 for (var fragment in object) { | |
| 824 if (object.hasOwnProperty(fragment)) { | |
| 825 object[fragment]['source'].forEach(function (labelIndex) { | |
| 826 if (labelIndex > index) { | |
| 827 rebuildArray[labelIndex][fragment] = object[fragment]; | |
| 828 } | |
| 829 }); | |
| 830 } | |
| 831 } | |
| 832 }); | |
| 833 this.rebuildArray = rebuildArray; | |
| 834 }; | |
| 835 | |
| 836 /** | |
| 837 * weight the fragments and order them by the product of their occurrence and le ngth, | |
| 838 * should use this function after the build and rebuild. | |
| 839 */ | |
| 840 SuffixTrie.prototype.weighByAverage = function () { | |
| 841 var debug = this.debug; | |
| 842 var fragmentsArray = this.fragmentsArray; | |
| 843 var fragments_Num = this.fragmentsArray.length; | |
| 844 fragmentsArray.sort(function (f1, f2) { | |
| 845 return f2['weight'] - f1['weight']; | |
| 846 }); | |
| 847 if (debug) console.log('weigh by average: result of length ' + fragments_Nu m + ' is', fragmentsArray); | |
| 848 | |
| 849 if(typeof this.options.limit == "number") | |
| 850 fragments_Num = this.options.limit < fragments_Num ? this.options.limit : fragments_Num; | |
| 851 | |
| 852 return fragmentsArray.slice(0, fragments_Num); | |
| 853 }; | |
| 854 | |
| 855 /** | |
| 856 * weight the fragments and order them by max label length. | |
| 857 */ | |
| 858 SuffixTrie.prototype.weighByMax = function () { | |
| 859 var debug = this.debug; | |
| 860 | |
| 861 buildFragmentTrie(this.fragmentTrie, this.fragmentsArray); | |
| 862 | |
| 863 var rebuildArray = JSON.parse(JSON.stringify(this.rebuildArray)); | |
| 864 // var rebuildArray = this.rebuildArray; | |
| 865 var fragmentsArray = []; | |
| 866 var fragmentsTrie = this.fragmentTrie; | |
| 867 var fragments_Num = this.fragmentsArray.length; | |
| 868 var label_Lengths = this.array.map(function (s) { | |
| 869 return s.length; | |
| 870 }); | |
| 871 | |
| 872 var label_Tree = new BinarySearchTree(); | |
| 873 label_Lengths.forEach(function (length, index) { | |
| 874 label_Tree.insert(length, index); | |
| 875 }); | |
| 876 | |
| 877 if (typeof this.options.limit == "number") | |
| 878 fragments_Num = this.options.limit < fragments_Num ? this.options.limit : fragments_Num; | |
| 879 | |
| 880 while (fragmentsArray.length < fragments_Num) { | |
| 881 var maxKey = label_Tree.getMaxKey(); | |
| 882 | |
| 883 var labels_In_Key = label_Tree.search(maxKey); | |
| 884 | |
| 885 if (labels_In_Key.length === 0) { | |
| 886 label_Tree.delete(maxKey); | |
| 887 continue; | |
| 888 } | |
| 889 //choose a random labels index with the biggest length | |
| 890 var longest_Label = labels_In_Key[0]; | |
| 891 | |
| 892 //get all the fragments in this label | |
| 893 //in case the there is no fragments under it. | |
| 894 var fragments = Object.keys(rebuildArray[longest_Label]); | |
| 895 if (fragments.length > 0) { | |
| 896 //then find the longest fragment | |
| 897 fragments.sort(function (f1, f2) { | |
| 898 return f2.length - f1.length; | |
| 899 }); | |
| 900 removeTrie(fragments[0], fragmentsTrie, fragmentsArray, label_Length s, rebuildArray, label_Tree); | |
| 901 } | |
| 902 //in case there is no this label in fragments array -- which could be po ssible | |
| 903 label_Tree.delete(maxKey, longest_Label); | |
| 904 } | |
| 905 | |
| 906 this.fragmentsArray = fragmentsArray; | |
| 907 if (debug) console.log('weigh by max: result of length ' + fragmentsArray.l ength + ' is', fragmentsArray); | |
| 908 | |
| 909 return fragmentsArray.slice(0, fragments_Num); | |
| 910 }; | |
| 911 | |
| 912 /** | |
| 913 * delete the redundant fragment, always keep the longer one. | |
| 914 */ | |
| 915 SuffixTrie.prototype.clearRedundantFragment = function () { | |
| 916 var fragments = this.fragmentsArray; | |
| 917 var occurrence = this.minOccurrence; | |
| 918 var newFragmentArray = []; | |
| 919 var labelArray = this.labelArray; | |
| 920 //sort the fragments array from short to long. | |
| 921 fragments.sort(function (a, b) { | |
| 922 return a.name.length - b.name.length; | |
| 923 }); | |
| 924 | |
| 925 fragments.forEach(function (fragment, index) { | |
| 926 //backup fragment's occurrence. | |
| 927 var backupsource = fragment.source.slice(); | |
| 928 //check if the longer contain duplicated occurrence, filter such occurre nce | |
| 929 for (var i = index + 1; i < fragments.length; i++) { | |
| 930 var longerFragment = fragments[i]; | |
| 931 if (longerFragment.name.indexOf(fragment.name) !== -1) { | |
| 932 fragment.source = fragment.source.filter(function (i) { | |
| 933 return this.indexOf(i) === -1; | |
| 934 }, longerFragment.source); | |
| 935 } | |
| 936 } | |
| 937 | |
| 938 //build the new fragment array with no duplication | |
| 939 if (fragment.source.length >= occurrence) { | |
| 940 newFragmentArray.push(fragment); | |
| 941 } | |
| 942 else { | |
| 943 if (backupsource.length !== fragment.source.length) { | |
| 944 deleteFragmentInLabelArray(backupsource, fragment.name, labelArr ay); | |
| 945 } | |
| 946 } | |
| 947 }); | |
| 948 this.fragmentsArray = newFragmentArray; | |
| 949 }; | |
| 950 | |
| 951 // ========================================================================== ============================================ | |
| 952 // ==================================================== Helper Functions ==== ============================================ | |
| 953 // ========================================================================== ============================================ | |
| 954 | |
| 955 /** | |
| 956 * Most Important Helper function | |
| 957 * help function to find the all the fragment of one word from the root of the t ree, and finally link to another suffix | |
| 958 * @param word searching objects | |
| 959 * @param fragments an object to store the sources of the common fragments in th e path to find the word | |
| 960 * @param root start point | |
| 961 * @param length minimum length requirement | |
| 962 * @param occurrence minimum occurrence requirement | |
| 963 * @param [iterate] {Boolean} whether it is the complete word or suffix | |
| 964 */ | |
| 965 function findFragments(word, fragments, root, length, occurrence, iterate) { | |
| 966 | |
| 967 var cur = root; | |
| 968 var letters = word.split(""); | |
| 969 //loop every letter in the suffix, e.g. 'o' in 'oss' | |
| 970 for (var j = 0; j < letters.length; j++) { | |
| 971 var letter = letters[j], pos = cur[ letter ]; | |
| 972 if (j + 1 >= length) { | |
| 973 var fragment = word.substring(0, j + 1); | |
| 974 | |
| 975 if (pos.hasOwnProperty('listed')) { | |
| 976 if (!pos['listed']) { | |
| 977 //if it is a node fulfil our requirement | |
| 978 if (pos.hasOwnProperty('source')) { | |
| 979 fragments[fragment] = {}; | |
| 980 fragments[fragment]['source'] = pos.source; | |
| 981 fragments[fragment]['weight'] = pos['weight']; | |
| 982 | |
| 983 //debug whether we include all the situation. | |
| 984 if (pos['weight'] == null) { | |
| 985 console.warn('escape case! fragment does not has wei ght property, node is ', fragments[fragment]); | |
| 986 } | |
| 987 if (fragments.hasOwnProperty(fragment)) { | |
| 988 //if this is not the full word (but a suffix) we che ck the duplication | |
| 989 if (iterate) { | |
| 990 for (var property in fragments) { | |
| 991 if (property !== fragment && fragments.hasOw nProperty(property) && fragments.hasOwnProperty(fragment)) { | |
| 992 if (property.indexOf(fragment) !== -1) { | |
| 993 fragments[fragment]['source'] = frag ments[fragment]['source'].filter(function (i) { | |
| 994 return this.indexOf(i) < 0 | |
| 995 }, fragments[property].source); | |
| 996 } | |
| 997 } | |
| 998 } | |
| 999 | |
| 1000 if (fragments[fragment]['source'].length < occur rence) { | |
| 1001 delete fragments[fragment]; | |
| 1002 } | |
| 1003 } | |
| 1004 } | |
| 1005 pos['listed'] = true; | |
| 1006 } | |
| 1007 //iterate to its suffix from root | |
| 1008 if (pos.hasOwnProperty('next')) { | |
| 1009 findFragments(pos['next'], fragments, root, length, occu rrence, true); | |
| 1010 } | |
| 1011 } | |
| 1012 } | |
| 1013 } | |
| 1014 cur = pos; | |
| 1015 } | |
| 1016 } | |
| 1017 | |
| 1018 /** | |
| 1019 * A helper function to delete the reference in Label Array | |
| 1020 * @param {Array} source fragment.source | |
| 1021 * @param {String} name fragment.name | |
| 1022 * @param labelArray LabelArray | |
| 1023 */ | |
| 1024 function deleteFragmentInLabelArray(source, name, labelArray) { | |
| 1025 var label = labelArray[source[0]]; | |
| 1026 if (label.hasOwnProperty(name)) { | |
| 1027 delete label[name]; | |
| 1028 } | |
| 1029 } | |
| 1030 | |
| 1031 /** | |
| 1032 * Helper function for build a dictionary trie for searching the fragment. | |
| 1033 * @param trie the fragment trie will be build in this variable. | |
| 1034 * @param array the fragment array used for building trie. | |
| 1035 */ | |
| 1036 function buildFragmentTrie(trie, array) { | |
| 1037 for (var i = 0; i < array.length; i++) { | |
| 1038 | |
| 1039 var word = array[i]['name'], letters = word.split(""), cur = trie; | |
| 1040 | |
| 1041 // Loop through the letters | |
| 1042 for (var j = 0; j < letters.length; j++) { | |
| 1043 var letter = letters[j]; | |
| 1044 | |
| 1045 if (!cur.hasOwnProperty(letter)) { | |
| 1046 cur[ letter ] = {}; | |
| 1047 } | |
| 1048 cur = cur[letter]; | |
| 1049 if (j === letters.length - 1) { | |
| 1050 cur['source'] = array[i]['source']; | |
| 1051 cur['name'] = array[i]['name']; | |
| 1052 cur['weight'] = array[i]['weight']; | |
| 1053 cur['list'] = false; | |
| 1054 } | |
| 1055 | |
| 1056 } | |
| 1057 } | |
| 1058 } | |
| 1059 | |
| 1060 /** | |
| 1061 * Helper function to remove the fragment from the trie and list it in the resul t array (fragmentsArray). | |
| 1062 * @param word the word to be searched for. | |
| 1063 * @param cur the node where the search start. | |
| 1064 * @param array the array to store the result fragment | |
| 1065 * @param labelLengthArray the array map to the length of the label | |
| 1066 * @param rebuildArray this.rebuildArray | |
| 1067 * @param label_trie the binary search trie where store all the labels with thei r length. | |
| 1068 * @returns {Number|Boolean} if find return the length of fragment, else return false. | |
| 1069 */ | |
| 1070 function removeTrie(word, cur, array, labelLengthArray, rebuildArray, label_trie ) { | |
| 1071 for (var node in cur) { | |
| 1072 | |
| 1073 if (cur.hasOwnProperty(node) && node.length === 1) { | |
| 1074 | |
| 1075 if (word.indexOf(node) === 0) { | |
| 1076 //if this is the leaf of the trie | |
| 1077 if (word.length === 1) { | |
| 1078 if (cur[node].hasOwnProperty('list')) { | |
| 1079 var fragment = {}; | |
| 1080 fragment['source'] = cur[node]['source']; | |
| 1081 var fragmentName = cur[node]['name']; | |
| 1082 fragment['name'] = fragmentName; | |
| 1083 fragment['weight'] = cur[node]['weight']; | |
| 1084 delete cur[node]['list']; | |
| 1085 //store it into our result | |
| 1086 array.push(fragment); | |
| 1087 | |
| 1088 //for other node who has same fragment | |
| 1089 cur[node]['source'].forEach(function (indexLabel) { | |
| 1090 //delete the fragment reference in the label | |
| 1091 delete rebuildArray[indexLabel][fragmentName]; | |
| 1092 //update the label length in the trie | |
| 1093 var labelLength = labelLengthArray[indexLabel]; | |
| 1094 label_trie.delete(labelLength, indexLabel); | |
| 1095 labelLength -= fragmentName.length; | |
| 1096 label_trie.insert(labelLength, indexLabel); | |
| 1097 }); | |
| 1098 } | |
| 1099 } else { | |
| 1100 removeTrie(word.slice(1), cur[node], array, labelLengthArray , rebuildArray, label_trie); | |
| 1101 } | |
| 1102 } | |
| 1103 | |
| 1104 } | |
| 1105 } | |
| 1106 } | |
| 1107 | |
| 1108 /** | |
| 1109 * Helper function to remove the repeat elements in an array | |
| 1110 * @param {Array} array | |
| 1111 * @returns {Array} the result array without duplicate elments | |
| 1112 */ | |
| 1113 function uniqueArray(array) { | |
| 1114 var a = array.concat(); | |
| 1115 for (var i = 0; i < a.length; ++i) { | |
| 1116 for (var j = i + 1; j < a.length; ++j) { | |
| 1117 if (a[i] === a[j]) | |
| 1118 a.splice(j--, 1); | |
| 1119 } | |
| 1120 } | |
| 1121 return a; | |
| 1122 } | |
| 1123 | |
| 1124 window.Substrings = SuffixTrie; | |
| 1125 | |
| 1126 })(); | |
| OLD | NEW |