| OLD | NEW |
| 1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
| 4 // | 4 // |
| 5 // This file was generated at 2014-10-08 15:25:47.940335 (in v8, copied to dart) | 5 // This file was generated at 2014-10-08 15:25:47.940335 (in v8, copied to dart) |
| 6 | 6 |
| 7 #include "vm/unibrow.h" |
| 7 #include "vm/unibrow-inl.h" | 8 #include "vm/unibrow-inl.h" |
| 8 #include "vm/unibrow.h" | |
| 9 | 9 |
| 10 #include <stdio.h> | 10 #include <stdio.h> |
| 11 #include <stdlib.h> | 11 #include <stdlib.h> |
| 12 | 12 |
| 13 namespace unibrow { | 13 namespace unibrow { |
| 14 | 14 |
| 15 static const intptr_t kStartBit = (1 << 30); | 15 static const intptr_t kStartBit = (1 << 30); |
| 16 static const intptr_t kChunkBits = (1 << 13); | 16 static const intptr_t kChunkBits = (1 << 13); |
| 17 static const int32_t kSentinel = static_cast<int32_t>(-1); | 17 static const int32_t kSentinel = static_cast<int32_t>(-1); |
| 18 | 18 |
| 19 /** | 19 /** |
| 20 * \file | 20 * \file |
| 21 * Implementations of functions for working with unicode. | 21 * Implementations of functions for working with unicode. |
| 22 */ | 22 */ |
| 23 | 23 |
| 24 // All access to the character table should go through this function. | 24 // All access to the character table should go through this function. |
| 25 template <intptr_t D> | 25 template <intptr_t D> |
| 26 static inline int32_t TableGet(const int32_t* table, intptr_t index) { | 26 static inline int32_t TableGet(const int32_t* table, intptr_t index) { |
| 27 return table[D * index]; | 27 return table[D * index]; |
| 28 } | 28 } |
| 29 | 29 |
| 30 | |
| 31 static inline int32_t GetEntry(int32_t entry) { | 30 static inline int32_t GetEntry(int32_t entry) { |
| 32 return entry & (kStartBit - 1); | 31 return entry & (kStartBit - 1); |
| 33 } | 32 } |
| 34 | 33 |
| 35 | |
| 36 static inline bool IsStart(int32_t entry) { | 34 static inline bool IsStart(int32_t entry) { |
| 37 return (entry & kStartBit) != 0; | 35 return (entry & kStartBit) != 0; |
| 38 } | 36 } |
| 39 | 37 |
| 40 | |
| 41 /** | 38 /** |
| 42 * Look up a character in the unicode table using a mix of binary and | 39 * Look up a character in the unicode table using a mix of binary and |
| 43 * interpolation search. For a uniformly distributed array | 40 * interpolation search. For a uniformly distributed array |
| 44 * interpolation search beats binary search by a wide margin. However, | 41 * interpolation search beats binary search by a wide margin. However, |
| 45 * in this case interpolation search degenerates because of some very | 42 * in this case interpolation search degenerates because of some very |
| 46 * high values in the lower end of the table so this function uses a | 43 * high values in the lower end of the table so this function uses a |
| 47 * combination. The average number of steps to look up the information | 44 * combination. The average number of steps to look up the information |
| 48 * about a character is around 10, slightly higher if there is no | 45 * about a character is around 10, slightly higher if there is no |
| 49 * information available about the character. | 46 * information available about the character. |
| 50 */ | 47 */ |
| (...skipping 26 matching lines...) Expand all Loading... |
| 77 bool is_start = IsStart(field); | 74 bool is_start = IsStart(field); |
| 78 return (entry == value) || (entry < value && is_start); | 75 return (entry == value) || (entry < value && is_start); |
| 79 } | 76 } |
| 80 | 77 |
| 81 template <intptr_t kW> | 78 template <intptr_t kW> |
| 82 struct MultiCharacterSpecialCase { | 79 struct MultiCharacterSpecialCase { |
| 83 static const int32_t kEndOfEncoding = kSentinel; | 80 static const int32_t kEndOfEncoding = kSentinel; |
| 84 int32_t chars[kW]; | 81 int32_t chars[kW]; |
| 85 }; | 82 }; |
| 86 | 83 |
| 87 | |
| 88 // Look up the mapping for the given character in the specified table, | 84 // Look up the mapping for the given character in the specified table, |
| 89 // which is of the specified length and uses the specified special case | 85 // which is of the specified length and uses the specified special case |
| 90 // mapping for multi-char mappings. The next parameter is the character | 86 // mapping for multi-char mappings. The next parameter is the character |
| 91 // following the one to map. The result will be written in to the result | 87 // following the one to map. The result will be written in to the result |
| 92 // buffer and the number of characters written will be returned. Finally, | 88 // buffer and the number of characters written will be returned. Finally, |
| 93 // if the allow_caching_ptr is non-null then false will be stored in | 89 // if the allow_caching_ptr is non-null then false will be stored in |
| 94 // it if the result contains multiple characters or depends on the | 90 // it if the result contains multiple characters or depends on the |
| 95 // context. | 91 // context. |
| 96 // If ranges are linear, a match between a start and end point is | 92 // If ranges are linear, a match between a start and end point is |
| 97 // offset by the distance between the match and the start. Otherwise | 93 // offset by the distance between the match and the start. Otherwise |
| (...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 179 default: | 175 default: |
| 180 return 0; | 176 return 0; |
| 181 } | 177 } |
| 182 return -1; | 178 return -1; |
| 183 } | 179 } |
| 184 } else { | 180 } else { |
| 185 return 0; | 181 return 0; |
| 186 } | 182 } |
| 187 } | 183 } |
| 188 | 184 |
| 189 | |
| 190 // Letter: point.category in ['Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl'] | 185 // Letter: point.category in ['Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl'] |
| 191 // clang-format off | 186 // clang-format off |
| 192 static const uint16_t kLetterTable0Size = 431; | 187 static const uint16_t kLetterTable0Size = 431; |
| 193 static const int32_t kLetterTable0[431] = { | 188 static const int32_t kLetterTable0[431] = { |
| 194 1073741889, 90, 1073741921, 122, | 189 1073741889, 90, 1073741921, 122, |
| 195 170, 181, 186, 1073742016, // NOLINT | 190 170, 181, 186, 1073742016, // NOLINT |
| 196 214, 1073742040, 246, 1073742072, | 191 214, 1073742040, 246, 1073742072, |
| 197 705, 1073742534, 721, 1073742560, // NOLINT | 192 705, 1073742534, 721, 1073742560, // NOLINT |
| 198 740, 748, 750, 1073742704, | 193 740, 748, 750, 1073742704, |
| 199 884, 1073742710, 887, 1073742714, // NOLINT | 194 884, 1073742710, 887, 1073742714, // NOLINT |
| (...skipping 1643 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1843 case 7: | 1838 case 7: |
| 1844 return LookupMapping<false>( | 1839 return LookupMapping<false>( |
| 1845 kCanonicalizationRangeTable7, kCanonicalizationRangeTable7Size, | 1840 kCanonicalizationRangeTable7, kCanonicalizationRangeTable7Size, |
| 1846 kCanonicalizationRangeMultiStrings7, c, n, result, allow_caching_ptr); | 1841 kCanonicalizationRangeMultiStrings7, c, n, result, allow_caching_ptr); |
| 1847 default: | 1842 default: |
| 1848 return 0; | 1843 return 0; |
| 1849 } | 1844 } |
| 1850 } | 1845 } |
| 1851 | 1846 |
| 1852 } // namespace unibrow | 1847 } // namespace unibrow |
| OLD | NEW |