| OLD | NEW |
| (Empty) |
| 1 library levenshtein; | |
| 2 | |
| 3 import 'dart:math' as math; | |
| 4 | |
| 5 /** | |
| 6 * The value returned by [levenshtein] if the distance is determined | |
| 7 * to be over the specified threshold. | |
| 8 */ | |
| 9 const int LEVENSHTEIN_MAX = 1 << 20; | |
| 10 | |
| 11 const int _MAX_VALUE = 1 << 10; | |
| 12 | |
| 13 /** | |
| 14 * Find the Levenshtein distance between two [String]s if it's less than or | |
| 15 * equal to a given threshold. | |
| 16 * | |
| 17 * This is the number of changes needed to change one String into another, | |
| 18 * where each change is a single character modification (deletion, insertion or | |
| 19 * substitution). | |
| 20 * | |
| 21 * This implementation follows from Algorithms on Strings, Trees and Sequences | |
| 22 * by Dan Gusfield and Chas Emerick's implementation of the Levenshtein distance | |
| 23 * algorithm. | |
| 24 */ | |
| 25 int levenshtein(String s, String t, int threshold, {bool caseSensitive: true}) { | |
| 26 if (s == null || t == null) { | |
| 27 throw new ArgumentError('Strings must not be null'); | |
| 28 } | |
| 29 if (threshold < 0) { | |
| 30 throw new ArgumentError('Threshold must not be negative'); | |
| 31 } | |
| 32 | |
| 33 if (!caseSensitive) { | |
| 34 s = s.toLowerCase(); | |
| 35 t = t.toLowerCase(); | |
| 36 } | |
| 37 | |
| 38 int s_len = s.length; | |
| 39 int t_len = t.length; | |
| 40 | |
| 41 // if one string is empty, | |
| 42 // the edit distance is necessarily the length of the other | |
| 43 if (s_len == 0) { | |
| 44 return t_len <= threshold ? t_len : LEVENSHTEIN_MAX; | |
| 45 } | |
| 46 if (t_len == 0) { | |
| 47 return s_len <= threshold ? s_len : LEVENSHTEIN_MAX; | |
| 48 } | |
| 49 // the distance can never be less than abs(s_len - t_len) | |
| 50 if ((s_len - t_len).abs() > threshold) { | |
| 51 return LEVENSHTEIN_MAX; | |
| 52 } | |
| 53 | |
| 54 // swap the two strings to consume less memory | |
| 55 if (s_len > t_len) { | |
| 56 String tmp = s; | |
| 57 s = t; | |
| 58 t = tmp; | |
| 59 s_len = t_len; | |
| 60 t_len = t.length; | |
| 61 } | |
| 62 | |
| 63 // 'previous' cost array, horizontally | |
| 64 List<int> p = new List<int>.filled(s_len + 1, 0); | |
| 65 // cost array, horizontally | |
| 66 List<int> d = new List<int>.filled(s_len + 1, 0); | |
| 67 // placeholder to assist in swapping p and d | |
| 68 List<int> _d; | |
| 69 | |
| 70 // fill in starting table values | |
| 71 int boundary = math.min(s_len, threshold) + 1; | |
| 72 for (int i = 0; i < boundary; i++) { | |
| 73 p[i] = i; | |
| 74 } | |
| 75 | |
| 76 // these fills ensure that the value above the rightmost entry of our | |
| 77 // stripe will be ignored in following loop iterations | |
| 78 _setRange(p, boundary, p.length, _MAX_VALUE); | |
| 79 _setRange(d, 0, d.length, _MAX_VALUE); | |
| 80 | |
| 81 // iterates through t | |
| 82 for (int j = 1; j <= t_len; j++) { | |
| 83 // jth character of t | |
| 84 int t_j = t.codeUnitAt(j - 1); | |
| 85 d[0] = j; | |
| 86 | |
| 87 // compute stripe indices, constrain to array size | |
| 88 int min = math.max(1, j - threshold); | |
| 89 int max = math.min(s_len, j + threshold); | |
| 90 | |
| 91 // the stripe may lead off of the table if s and t are of different sizes | |
| 92 if (min > max) { | |
| 93 return LEVENSHTEIN_MAX; | |
| 94 } | |
| 95 | |
| 96 // ignore entry left of leftmost | |
| 97 if (min > 1) { | |
| 98 d[min - 1] = _MAX_VALUE; | |
| 99 } | |
| 100 | |
| 101 // iterates through [min, max] in s | |
| 102 for (int i = min; i <= max; i++) { | |
| 103 if (s.codeUnitAt(i - 1) == t_j) { | |
| 104 // diagonally left and up | |
| 105 d[i] = p[i - 1]; | |
| 106 } else { | |
| 107 // 1 + minimum of cell to the left, to the top, diagonally left and up | |
| 108 d[i] = 1 + math.min(math.min(d[i - 1], p[i]), p[i - 1]); | |
| 109 } | |
| 110 } | |
| 111 | |
| 112 // copy current distance counts to 'previous row' distance counts | |
| 113 _d = p; | |
| 114 p = d; | |
| 115 d = _d; | |
| 116 } | |
| 117 | |
| 118 // if p[n] is greater than the threshold, | |
| 119 // there's no guarantee on it being the correct distance | |
| 120 if (p[s_len] <= threshold) { | |
| 121 return p[s_len]; | |
| 122 } | |
| 123 | |
| 124 return LEVENSHTEIN_MAX; | |
| 125 } | |
| 126 | |
| 127 void _setRange(List<int> a, int start, int end, int value) { | |
| 128 for (int i = start; i < end; i++) { | |
| 129 a[i] = value; | |
| 130 } | |
| 131 } | |
| OLD | NEW |