| OLD | NEW |
| 1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 // | 4 // |
| 5 // This file was generated at 2014-10-08 15:25:47.940335 | 5 // This file was generated at 2014-10-08 15:25:47.940335 |
| 6 | 6 |
| 7 #include "src/unicode.h" | 7 #include "src/unicode.h" |
| 8 #include "src/unicode-inl.h" | 8 #include "src/unicode-inl.h" |
| 9 #include <stdio.h> | 9 #include <stdio.h> |
| 10 #include <stdlib.h> | 10 #include <stdlib.h> |
| (...skipping 217 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 228 // This method decodes an UTF-8 value according to RFC 3629. | 228 // This method decodes an UTF-8 value according to RFC 3629. |
| 229 uchar Utf8::CalculateValue(const byte* str, size_t max_length, size_t* cursor) { | 229 uchar Utf8::CalculateValue(const byte* str, size_t max_length, size_t* cursor) { |
| 230 size_t length = NonASCIISequenceLength(str[0]); | 230 size_t length = NonASCIISequenceLength(str[0]); |
| 231 | 231 |
| 232 // Check continuation characters. | 232 // Check continuation characters. |
| 233 size_t max_count = std::min(length, max_length); | 233 size_t max_count = std::min(length, max_length); |
| 234 size_t count = 1; | 234 size_t count = 1; |
| 235 while (count < max_count && IsContinuationCharacter(str[count])) { | 235 while (count < max_count && IsContinuationCharacter(str[count])) { |
| 236 count++; | 236 count++; |
| 237 } | 237 } |
| 238 *cursor += count; |
| 238 | 239 |
| 239 // Check overly long sequences & other conditions. Use length as error | 240 // There must be enough continuation characters. |
| 240 // indicator. | 241 if (count != length) return kBadChar; |
| 242 |
| 243 // Check overly long sequences & other conditions. |
| 241 if (length == 3) { | 244 if (length == 3) { |
| 242 if (str[0] == 0xE0 && (str[1] < 0xA0 || str[1] > 0xBF)) { | 245 if (str[0] == 0xE0 && (str[1] < 0xA0 || str[1] > 0xBF)) { |
| 243 // Overlong three-byte sequence? | 246 // Overlong three-byte sequence? |
| 244 length = 0; | 247 return kBadChar; |
| 245 } else if (str[0] == 0xED && (str[1] < 0x80 || str[1] > 0x9F)) { | 248 } else if (str[0] == 0xED && (str[1] < 0x80 || str[1] > 0x9F)) { |
| 246 // High and low surrogate halves? | 249 // High and low surrogate halves? |
| 247 length = 0; | 250 return kBadChar; |
| 248 } | 251 } |
| 249 } else if (length == 4) { | 252 } else if (length == 4) { |
| 250 if (str[0] == 0xF0 && (str[1] < 0x90 || str[1] > 0xBF)) { | 253 if (str[0] == 0xF0 && (str[1] < 0x90 || str[1] > 0xBF)) { |
| 251 // Overlong four-byte sequence. | 254 // Overlong four-byte sequence. |
| 252 length = 0; | 255 return kBadChar; |
| 253 } else if (str[0] == 0xF4 && (str[1] < 0x80 || str[1] > 0x8F)) { | 256 } else if (str[0] == 0xF4 && (str[1] < 0x80 || str[1] > 0x8F)) { |
| 254 // Code points outside of the unicode range. | 257 // Code points outside of the unicode range. |
| 255 length = 0; | 258 return kBadChar; |
| 256 } | 259 } |
| 257 } | 260 } |
| 258 | 261 |
| 259 if (count != length) { | |
| 260 // All invalid encodings should land here. | |
| 261 *cursor += count; | |
| 262 return kBadChar; | |
| 263 } | |
| 264 | |
| 265 // All errors have been handled, so we only have to assemble the result. | 262 // All errors have been handled, so we only have to assemble the result. |
| 266 *cursor += length; | |
| 267 switch (length) { | 263 switch (length) { |
| 268 case 1: | 264 case 1: |
| 269 return str[0]; | 265 return str[0]; |
| 270 case 2: | 266 case 2: |
| 271 return ((str[0] << 6) + str[1]) - 0x00003080; | 267 return ((str[0] << 6) + str[1]) - 0x00003080; |
| 272 case 3: | 268 case 3: |
| 273 return ((str[0] << 12) + (str[1] << 6) + str[2]) - 0x000E2080; | 269 return ((str[0] << 12) + (str[1] << 6) + str[2]) - 0x000E2080; |
| 274 case 4: | 270 case 4: |
| 275 return ((str[0] << 18) + (str[1] << 12) + (str[2] << 6) + str[3]) - | 271 return ((str[0] << 18) + (str[1] << 12) + (str[2] << 6) + str[3]) - |
| 276 0x03C82080; | 272 0x03C82080; |
| (...skipping 3230 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3507 sizeof(MultiCharacterSpecialCase<1>) // NOLINT | 3503 sizeof(MultiCharacterSpecialCase<1>) // NOLINT |
| 3508 + | 3504 + |
| 3509 kCanonicalizationRangeMultiStrings1Size * | 3505 kCanonicalizationRangeMultiStrings1Size * |
| 3510 sizeof(MultiCharacterSpecialCase<1>) // NOLINT | 3506 sizeof(MultiCharacterSpecialCase<1>) // NOLINT |
| 3511 + | 3507 + |
| 3512 kCanonicalizationRangeMultiStrings7Size * | 3508 kCanonicalizationRangeMultiStrings7Size * |
| 3513 sizeof(MultiCharacterSpecialCase<1>); // NOLINT | 3509 sizeof(MultiCharacterSpecialCase<1>); // NOLINT |
| 3514 } | 3510 } |
| 3515 | 3511 |
| 3516 } // namespace unibrow | 3512 } // namespace unibrow |
| OLD | NEW |