OLD | NEW |
1 // | 1 // |
2 // Copyright 2014 Google Inc. All rights reserved. | 2 // Copyright 2014 Google Inc. All rights reserved. |
3 // | 3 // |
4 // Use of this source code is governed by a BSD-style | 4 // Use of this source code is governed by a BSD-style |
5 // license that can be found in the LICENSE file or at | 5 // license that can be found in the LICENSE file or at |
6 // https://developers.google.com/open-source/licenses/bsd | 6 // https://developers.google.com/open-source/licenses/bsd |
7 // | 7 // |
8 | 8 |
9 library charted.core.text_metrics.segmentation; | 9 library charted.core.text_metrics.segmentation; |
10 | 10 |
11 import "package:charted/core/text_metrics/segmentation_utils.dart"; | 11 import "package:charted/core/text_metrics/segmentation_utils.dart"; |
12 import "package:charted/core/text_metrics/segmentation_data.dart"; | 12 import "package:charted/core/text_metrics/segmentation_data.dart"; |
13 | 13 |
14 /// Current unicode version. | 14 /// Current unicode version. |
15 /// Character database available at http://www.unicode.org/Public/7.0.0/ucd/ | 15 /// Character database available at http://www.unicode.org/Public/7.0.0/ucd/ |
16 const UNICODE_VERSION = '7.0.0'; | 16 const UNICODE_VERSION = '7.0.0'; |
17 | 17 |
18 // Code table based on: | 18 // Code table based on: |
19 // http://www.unicode.org/Public/7.0.0/ucd/auxiliary/GraphemeBreakTest.html | 19 // http://www.unicode.org/Public/7.0.0/ucd/auxiliary/GraphemeBreakTest.html |
20 // GRAPHEME_BREAK_TABLE[prevType * TYPE_COUNT + curType] == 1 means break. | 20 // GRAPHEME_BREAK_TABLE[prevType * TYPE_COUNT + curType] == 1 means break. |
21 const GRAPHEME_BREAK_TABLE = const[ | 21 const GRAPHEME_BREAK_TABLE = const [ |
22 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, | 22 1, |
23 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, | 23 1, |
24 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | 24 1, |
25 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | 25 1, |
26 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, | 26 0, |
27 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, | 27 0, |
28 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, | 28 1, |
29 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, | 29 1, |
30 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, | 30 1, |
31 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, | 31 1, |
32 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, | 32 1, |
33 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0 | 33 1, |
| 34 1, |
| 35 1, |
| 36 0, |
| 37 1, |
| 38 1, |
| 39 1, |
| 40 1, |
| 41 1, |
| 42 1, |
| 43 1, |
| 44 1, |
| 45 1, |
| 46 1, |
| 47 1, |
| 48 1, |
| 49 1, |
| 50 1, |
| 51 1, |
| 52 1, |
| 53 1, |
| 54 1, |
| 55 1, |
| 56 1, |
| 57 1, |
| 58 1, |
| 59 1, |
| 60 1, |
| 61 1, |
| 62 1, |
| 63 1, |
| 64 1, |
| 65 1, |
| 66 1, |
| 67 1, |
| 68 1, |
| 69 1, |
| 70 1, |
| 71 1, |
| 72 1, |
| 73 1, |
| 74 0, |
| 75 0, |
| 76 1, |
| 77 1, |
| 78 1, |
| 79 1, |
| 80 1, |
| 81 1, |
| 82 1, |
| 83 1, |
| 84 1, |
| 85 1, |
| 86 0, |
| 87 0, |
| 88 1, |
| 89 1, |
| 90 1, |
| 91 1, |
| 92 1, |
| 93 1, |
| 94 1, |
| 95 1, |
| 96 1, |
| 97 1, |
| 98 0, |
| 99 0, |
| 100 0, |
| 101 0, |
| 102 1, |
| 103 0, |
| 104 0, |
| 105 1, |
| 106 1, |
| 107 1, |
| 108 1, |
| 109 1, |
| 110 0, |
| 111 0, |
| 112 1, |
| 113 0, |
| 114 0, |
| 115 1, |
| 116 1, |
| 117 1, |
| 118 1, |
| 119 1, |
| 120 1, |
| 121 1, |
| 122 0, |
| 123 0, |
| 124 1, |
| 125 1, |
| 126 0, |
| 127 1, |
| 128 1, |
| 129 1, |
| 130 1, |
| 131 1, |
| 132 1, |
| 133 1, |
| 134 0, |
| 135 0, |
| 136 1, |
| 137 0, |
| 138 0, |
| 139 1, |
| 140 1, |
| 141 1, |
| 142 1, |
| 143 1, |
| 144 1, |
| 145 1, |
| 146 0, |
| 147 0, |
| 148 1, |
| 149 1, |
| 150 0, |
| 151 1, |
| 152 1, |
| 153 1, |
| 154 1, |
| 155 1, |
| 156 1, |
| 157 1, |
| 158 0, |
| 159 0, |
| 160 1, |
| 161 1, |
| 162 1, |
| 163 1, |
| 164 1, |
| 165 0 |
34 ]; | 166 ]; |
35 | 167 |
36 /// Get type of a given char code. | 168 /// Get type of a given char code. |
37 int _typeForRune(int rune) { | 169 int _typeForRune(int rune) { |
38 int count = CODE_POINT_BLOCKS.length ~/ 3; | 170 int count = CODE_POINT_BLOCKS.length ~/ 3; |
39 int min = 0; | 171 int min = 0; |
40 int max = count - 1; | 172 int max = count - 1; |
41 while (max >= min) { | 173 while (max >= min) { |
42 int mid = (max + min) ~/ 2; | 174 int mid = (max + min) ~/ 2; |
43 int idx = mid * 3; | 175 int idx = mid * 3; |
44 if (CODE_POINT_BLOCKS[idx] <= rune && rune <= CODE_POINT_BLOCKS[idx+1]) { | 176 if (CODE_POINT_BLOCKS[idx] <= rune && rune <= CODE_POINT_BLOCKS[idx + 1]) { |
45 return CODE_POINT_BLOCKS[idx+2]; // Return the found character type | 177 return CODE_POINT_BLOCKS[idx + 2]; // Return the found character type |
46 } | 178 } |
47 if (CODE_POINT_BLOCKS[idx] > rune) { | 179 if (CODE_POINT_BLOCKS[idx] > rune) { |
48 max = mid - 1; | 180 max = mid - 1; |
49 } | 181 } else if (CODE_POINT_BLOCKS[idx + 1] < rune) { |
50 else if (CODE_POINT_BLOCKS[idx+1] < rune) { | |
51 min = max + 1; | 182 min = max + 1; |
52 } | 183 } |
53 } | 184 } |
54 return CODE_CATEGORY_OTHER; // Defaults to OTHER. | 185 return CODE_CATEGORY_OTHER; // Defaults to OTHER. |
55 } | 186 } |
56 | 187 |
57 Iterable<int> graphemeBreakIndices(String s) { | 188 Iterable<int> graphemeBreakIndices(String s) { |
58 List<int> indices = []; | 189 List<int> indices = []; |
59 int previousType = 0; | 190 int previousType = 0; |
60 for (var iter = s.runes.iterator; iter.moveNext();) { | 191 for (var iter = s.runes.iterator; iter.moveNext();) { |
61 int currentType = _typeForRune(iter.current); | 192 int currentType = _typeForRune(iter.current); |
62 if (GRAPHEME_BREAK_TABLE[previousType * 12 + currentType] == 1) { | 193 if (GRAPHEME_BREAK_TABLE[previousType * 12 + currentType] == 1) { |
63 indices.add(iter.rawIndex); | 194 indices.add(iter.rawIndex); |
64 } | 195 } |
65 previousType = currentType; | 196 previousType = currentType; |
66 } | 197 } |
67 return indices; | 198 return indices; |
68 } | 199 } |
OLD | NEW |