| OLD | NEW |
| 1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
| 4 | 4 |
| 5 #include "vm/unicode.h" | 5 #include "vm/unicode.h" |
| 6 | 6 |
| 7 #include "vm/allocation.h" | 7 #include "vm/allocation.h" |
| 8 #include "vm/globals.h" | 8 #include "vm/globals.h" |
| 9 #include "vm/object.h" | 9 #include "vm/object.h" |
| 10 | 10 |
| (...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 51 0xFFFFFFFF, | 51 0xFFFFFFFF, |
| 52 0xFFFFFFFF | 52 0xFFFFFFFF |
| 53 }; | 53 }; |
| 54 | 54 |
| 55 | 55 |
| 56 static bool IsTrailByte(uint8_t code_unit) { | 56 static bool IsTrailByte(uint8_t code_unit) { |
| 57 return (code_unit & 0xc0) == 0x80; | 57 return (code_unit & 0xc0) == 0x80; |
| 58 } | 58 } |
| 59 | 59 |
| 60 | 60 |
| 61 static bool IsAsciiSequenceStart(uint8_t code_unit) { | 61 static bool IsLatin1SequenceStart(uint8_t code_unit) { |
| 62 // Check is codepoint is <= U+007F | 62 // Check is codepoint is <= U+00FF |
| 63 return (code_unit <= Utf8::kMaxOneByteChar); | 63 return (code_unit <= Utf8::kMaxOneByteChar); |
| 64 } | 64 } |
| 65 | 65 |
| 66 | 66 |
| 67 static bool IsSmpSequenceStart(uint8_t code_unit) { | 67 static bool IsSmpSequenceStart(uint8_t code_unit) { |
| 68 // Check is codepoint is >= U+10000. | 68 // Check is codepoint is >= U+10000. |
| 69 return (code_unit >= 0xF0); | 69 return (code_unit >= 0xF0); |
| 70 } | 70 } |
| 71 | 71 |
| 72 | 72 |
| (...skipping 21 matching lines...) Expand all Loading... |
| 94 dst[0] = (Utf8::kLeadOffset + (codepoint >> 10)); | 94 dst[0] = (Utf8::kLeadOffset + (codepoint >> 10)); |
| 95 dst[1] = (0xDC00 + (codepoint & 0x3FF)); | 95 dst[1] = (0xDC00 + (codepoint & 0x3FF)); |
| 96 } | 96 } |
| 97 | 97 |
| 98 | 98 |
| 99 // Returns a count of the number of UTF-8 trail bytes. | 99 // Returns a count of the number of UTF-8 trail bytes. |
| 100 intptr_t Utf8::CodePointCount(const uint8_t* utf8_array, | 100 intptr_t Utf8::CodePointCount(const uint8_t* utf8_array, |
| 101 intptr_t array_len, | 101 intptr_t array_len, |
| 102 Type* type) { | 102 Type* type) { |
| 103 intptr_t len = 0; | 103 intptr_t len = 0; |
| 104 Type char_type = kAscii; | 104 Type char_type = kLatin1; |
| 105 for (intptr_t i = 0; i < array_len; i++) { | 105 for (intptr_t i = 0; i < array_len; i++) { |
| 106 uint8_t code_unit = utf8_array[i]; | 106 uint8_t code_unit = utf8_array[i]; |
| 107 if (!IsTrailByte(code_unit)) { | 107 if (!IsTrailByte(code_unit)) { |
| 108 ++len; | 108 ++len; |
| 109 } | 109 } |
| 110 if (!IsAsciiSequenceStart(code_unit)) { // > U+007F | 110 if (!IsLatin1SequenceStart(code_unit)) { // > U+00FF |
| 111 if (IsSmpSequenceStart(code_unit)) { // >= U+10000 | 111 if (IsSmpSequenceStart(code_unit)) { // >= U+10000 |
| 112 char_type = kSMP; | 112 char_type = kSMP; |
| 113 ++len; | 113 ++len; |
| 114 } else if (char_type == kAscii) { | 114 } else if (char_type == kLatin1) { |
| 115 char_type = kBMP; | 115 char_type = kBMP; |
| 116 } | 116 } |
| 117 } | 117 } |
| 118 } | 118 } |
| 119 *type = char_type; | 119 *type = char_type; |
| 120 return len; | 120 return len; |
| 121 } | 121 } |
| 122 | 122 |
| 123 | 123 |
| 124 // Returns true if str is a valid NUL-terminated UTF-8 string. | 124 // Returns true if str is a valid NUL-terminated UTF-8 string. |
| (...skipping 119 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 244 !IsSurrogate(ch))) { | 244 !IsSurrogate(ch))) { |
| 245 *dst = -1; | 245 *dst = -1; |
| 246 return 0; | 246 return 0; |
| 247 } | 247 } |
| 248 } | 248 } |
| 249 *dst = ch; | 249 *dst = ch; |
| 250 return i; | 250 return i; |
| 251 } | 251 } |
| 252 | 252 |
| 253 | 253 |
| 254 bool Utf8::DecodeToAscii(const uint8_t* utf8_array, | 254 bool Utf8::DecodeToLatin1(const uint8_t* utf8_array, |
| 255 intptr_t array_len, | 255 intptr_t array_len, |
| 256 uint8_t* dst, | 256 uint8_t* dst, |
| 257 intptr_t len) { | 257 intptr_t len) { |
| 258 if (len < array_len) { | 258 intptr_t i = 0; |
| 259 intptr_t j = 0; |
| 260 intptr_t num_bytes; |
| 261 for (; (i < array_len) && (j < len); i += num_bytes, ++j) { |
| 262 int32_t ch; |
| 263 ASSERT(IsLatin1SequenceStart(utf8_array[i])); |
| 264 num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch); |
| 265 if (ch == -1) { |
| 266 return false; // invalid input |
| 267 } |
| 268 ASSERT(ch <= 0xff); |
| 269 dst[j] = ch; |
| 270 } |
| 271 if ((i < array_len) && (j == len)) { |
| 259 return false; // output overflow | 272 return false; // output overflow |
| 260 } | 273 } |
| 261 #ifdef DEBUG | |
| 262 for (intptr_t i = 0; i < array_len; i++) { | |
| 263 ASSERT(IsAsciiSequenceStart(utf8_array[i])); | |
| 264 } | |
| 265 #endif | |
| 266 memmove(dst, utf8_array, array_len); | |
| 267 return true; // success | 274 return true; // success |
| 268 } | 275 } |
| 269 | 276 |
| 270 | 277 |
| 271 bool Utf8::DecodeToUTF16(const uint8_t* utf8_array, | 278 bool Utf8::DecodeToUTF16(const uint8_t* utf8_array, |
| 272 intptr_t array_len, | 279 intptr_t array_len, |
| 273 uint16_t* dst, | 280 uint16_t* dst, |
| 274 intptr_t len) { | 281 intptr_t len) { |
| 275 intptr_t i = 0; | 282 intptr_t i = 0; |
| 276 intptr_t j = 0; | 283 intptr_t j = 0; |
| (...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 311 } | 318 } |
| 312 dst[j] = ch; | 319 dst[j] = ch; |
| 313 } | 320 } |
| 314 if ((i < array_len) && (j == len)) { | 321 if ((i < array_len) && (j == len)) { |
| 315 return false; // output overflow | 322 return false; // output overflow |
| 316 } | 323 } |
| 317 return true; // success | 324 return true; // success |
| 318 } | 325 } |
| 319 | 326 |
| 320 } // namespace dart | 327 } // namespace dart |
| OLD | NEW |