OLD | NEW |
1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file |
2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
4 // | 4 // |
5 // This file was generated at 2014-10-08 15:25:47.940335 (in v8, copied to dart) | 5 // This file was generated at 2014-10-08 15:25:47.940335 (in v8, copied to dart) |
6 | 6 |
| 7 #include "vm/unibrow.h" |
7 #include "vm/unibrow-inl.h" | 8 #include "vm/unibrow-inl.h" |
8 #include "vm/unibrow.h" | |
9 | 9 |
10 #include <stdio.h> | 10 #include <stdio.h> |
11 #include <stdlib.h> | 11 #include <stdlib.h> |
12 | 12 |
13 namespace unibrow { | 13 namespace unibrow { |
14 | 14 |
15 static const intptr_t kStartBit = (1 << 30); | 15 static const intptr_t kStartBit = (1 << 30); |
16 static const intptr_t kChunkBits = (1 << 13); | 16 static const intptr_t kChunkBits = (1 << 13); |
17 static const int32_t kSentinel = static_cast<int32_t>(-1); | 17 static const int32_t kSentinel = static_cast<int32_t>(-1); |
18 | 18 |
19 /** | 19 /** |
20 * \file | 20 * \file |
21 * Implementations of functions for working with unicode. | 21 * Implementations of functions for working with unicode. |
22 */ | 22 */ |
23 | 23 |
24 // All access to the character table should go through this function. | 24 // All access to the character table should go through this function. |
25 template <intptr_t D> | 25 template <intptr_t D> |
26 static inline int32_t TableGet(const int32_t* table, intptr_t index) { | 26 static inline int32_t TableGet(const int32_t* table, intptr_t index) { |
27 return table[D * index]; | 27 return table[D * index]; |
28 } | 28 } |
29 | 29 |
30 | |
31 static inline int32_t GetEntry(int32_t entry) { | 30 static inline int32_t GetEntry(int32_t entry) { |
32 return entry & (kStartBit - 1); | 31 return entry & (kStartBit - 1); |
33 } | 32 } |
34 | 33 |
35 | |
36 static inline bool IsStart(int32_t entry) { | 34 static inline bool IsStart(int32_t entry) { |
37 return (entry & kStartBit) != 0; | 35 return (entry & kStartBit) != 0; |
38 } | 36 } |
39 | 37 |
40 | |
41 /** | 38 /** |
42 * Look up a character in the unicode table using a mix of binary and | 39 * Look up a character in the unicode table using a mix of binary and |
43 * interpolation search. For a uniformly distributed array | 40 * interpolation search. For a uniformly distributed array |
44 * interpolation search beats binary search by a wide margin. However, | 41 * interpolation search beats binary search by a wide margin. However, |
45 * in this case interpolation search degenerates because of some very | 42 * in this case interpolation search degenerates because of some very |
46 * high values in the lower end of the table so this function uses a | 43 * high values in the lower end of the table so this function uses a |
47 * combination. The average number of steps to look up the information | 44 * combination. The average number of steps to look up the information |
48 * about a character is around 10, slightly higher if there is no | 45 * about a character is around 10, slightly higher if there is no |
49 * information available about the character. | 46 * information available about the character. |
50 */ | 47 */ |
(...skipping 26 matching lines...) Expand all Loading... |
77 bool is_start = IsStart(field); | 74 bool is_start = IsStart(field); |
78 return (entry == value) || (entry < value && is_start); | 75 return (entry == value) || (entry < value && is_start); |
79 } | 76 } |
80 | 77 |
81 template <intptr_t kW> | 78 template <intptr_t kW> |
82 struct MultiCharacterSpecialCase { | 79 struct MultiCharacterSpecialCase { |
83 static const int32_t kEndOfEncoding = kSentinel; | 80 static const int32_t kEndOfEncoding = kSentinel; |
84 int32_t chars[kW]; | 81 int32_t chars[kW]; |
85 }; | 82 }; |
86 | 83 |
87 | |
88 // Look up the mapping for the given character in the specified table, | 84 // Look up the mapping for the given character in the specified table, |
89 // which is of the specified length and uses the specified special case | 85 // which is of the specified length and uses the specified special case |
90 // mapping for multi-char mappings. The next parameter is the character | 86 // mapping for multi-char mappings. The next parameter is the character |
91 // following the one to map. The result will be written in to the result | 87 // following the one to map. The result will be written in to the result |
92 // buffer and the number of characters written will be returned. Finally, | 88 // buffer and the number of characters written will be returned. Finally, |
93 // if the allow_caching_ptr is non-null then false will be stored in | 89 // if the allow_caching_ptr is non-null then false will be stored in |
94 // it if the result contains multiple characters or depends on the | 90 // it if the result contains multiple characters or depends on the |
95 // context. | 91 // context. |
96 // If ranges are linear, a match between a start and end point is | 92 // If ranges are linear, a match between a start and end point is |
97 // offset by the distance between the match and the start. Otherwise | 93 // offset by the distance between the match and the start. Otherwise |
(...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
179 default: | 175 default: |
180 return 0; | 176 return 0; |
181 } | 177 } |
182 return -1; | 178 return -1; |
183 } | 179 } |
184 } else { | 180 } else { |
185 return 0; | 181 return 0; |
186 } | 182 } |
187 } | 183 } |
188 | 184 |
189 | |
190 // Letter: point.category in ['Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl'] | 185 // Letter: point.category in ['Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl'] |
191 // clang-format off | 186 // clang-format off |
192 static const uint16_t kLetterTable0Size = 431; | 187 static const uint16_t kLetterTable0Size = 431; |
193 static const int32_t kLetterTable0[431] = { | 188 static const int32_t kLetterTable0[431] = { |
194 1073741889, 90, 1073741921, 122, | 189 1073741889, 90, 1073741921, 122, |
195 170, 181, 186, 1073742016, // NOLINT | 190 170, 181, 186, 1073742016, // NOLINT |
196 214, 1073742040, 246, 1073742072, | 191 214, 1073742040, 246, 1073742072, |
197 705, 1073742534, 721, 1073742560, // NOLINT | 192 705, 1073742534, 721, 1073742560, // NOLINT |
198 740, 748, 750, 1073742704, | 193 740, 748, 750, 1073742704, |
199 884, 1073742710, 887, 1073742714, // NOLINT | 194 884, 1073742710, 887, 1073742714, // NOLINT |
(...skipping 1643 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1843 case 7: | 1838 case 7: |
1844 return LookupMapping<false>( | 1839 return LookupMapping<false>( |
1845 kCanonicalizationRangeTable7, kCanonicalizationRangeTable7Size, | 1840 kCanonicalizationRangeTable7, kCanonicalizationRangeTable7Size, |
1846 kCanonicalizationRangeMultiStrings7, c, n, result, allow_caching_ptr); | 1841 kCanonicalizationRangeMultiStrings7, c, n, result, allow_caching_ptr); |
1847 default: | 1842 default: |
1848 return 0; | 1843 return 0; |
1849 } | 1844 } |
1850 } | 1845 } |
1851 | 1846 |
1852 } // namespace unibrow | 1847 } // namespace unibrow |
OLD | NEW |