| OLD | NEW |
| 1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
| 4 | 4 |
| 5 #include "vm/unicode.h" | 5 #include "vm/unicode.h" |
| 6 | 6 |
| 7 #include "vm/allocation.h" | 7 #include "vm/allocation.h" |
| 8 #include "vm/globals.h" | 8 #include "vm/globals.h" |
| 9 #include "vm/object.h" | 9 #include "vm/object.h" |
| 10 | 10 |
| 11 namespace dart { | 11 namespace dart { |
| (...skipping 12 matching lines...) Expand all Loading... |
| 24 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | 24 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 25 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | 25 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 26 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | 26 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
| 27 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | 27 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
| 28 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | 28 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, |
| 29 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 0, 0 | 29 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 0, 0 |
| 30 }; | 30 }; |
| 31 | 31 |
| 32 | 32 |
| 33 const uint32_t Utf8::kMagicBits[7] = { | 33 const uint32_t Utf8::kMagicBits[7] = { |
| 34 0, // padding | 34 0, // Padding. |
| 35 0x00000000, | 35 0x00000000, |
| 36 0x00003080, | 36 0x00003080, |
| 37 0x000E2080, | 37 0x000E2080, |
| 38 0x03C82080, | 38 0x03C82080, |
| 39 0xFA082080, | 39 0xFA082080, |
| 40 0x82082080 | 40 0x82082080 |
| 41 }; | 41 }; |
| 42 | 42 |
| 43 | 43 |
| 44 // Minimum values of code points used to check shortest form. | 44 // Minimum values of code points used to check shortest form. |
| 45 const uint32_t Utf8::kOverlongMinimum[7] = { | 45 const uint32_t Utf8::kOverlongMinimum[7] = { |
| 46 0, // padding | 46 0, // Padding. |
| 47 0x0, | 47 0x0, |
| 48 0x80, | 48 0x80, |
| 49 0x800, | 49 0x800, |
| 50 0x10000, | 50 0x10000, |
| 51 0xFFFFFFFF, | 51 0xFFFFFFFF, |
| 52 0xFFFFFFFF | 52 0xFFFFFFFF |
| 53 }; | 53 }; |
| 54 | 54 |
| 55 | 55 |
| 56 // Returns a count of the number of UTF-8 trail bytes. | 56 // Returns a count of the number of UTF-8 trail bytes. |
| (...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 129 String::CodePointIterator it(str); | 129 String::CodePointIterator it(str); |
| 130 while (it.Next()) { | 130 while (it.Next()) { |
| 131 int32_t ch = it.Current(); | 131 int32_t ch = it.Current(); |
| 132 length += Utf8::Length(ch); | 132 length += Utf8::Length(ch); |
| 133 } | 133 } |
| 134 return length; | 134 return length; |
| 135 } | 135 } |
| 136 | 136 |
| 137 | 137 |
| 138 intptr_t Utf8::Encode(int32_t ch, char* dst) { | 138 intptr_t Utf8::Encode(int32_t ch, char* dst) { |
| 139 ASSERT(!Utf16::IsSurrogate(ch)); |
| 139 static const int kMask = ~(1 << 6); | 140 static const int kMask = ~(1 << 6); |
| 140 if (ch <= kMaxOneByteChar) { | 141 if (ch <= kMaxOneByteChar) { |
| 141 dst[0] = ch; | 142 dst[0] = ch; |
| 142 return 1; | 143 return 1; |
| 143 } | 144 } |
| 144 if (ch <= kMaxTwoByteChar) { | 145 if (ch <= kMaxTwoByteChar) { |
| 145 dst[0] = 0xC0 | (ch >> 6); | 146 dst[0] = 0xC0 | (ch >> 6); |
| 146 dst[1] = 0x80 | (ch & kMask); | 147 dst[1] = 0x80 | (ch & kMask); |
| 147 return 2; | 148 return 2; |
| 148 } | 149 } |
| (...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 215 uint8_t* dst, | 216 uint8_t* dst, |
| 216 intptr_t len) { | 217 intptr_t len) { |
| 217 intptr_t i = 0; | 218 intptr_t i = 0; |
| 218 intptr_t j = 0; | 219 intptr_t j = 0; |
| 219 intptr_t num_bytes; | 220 intptr_t num_bytes; |
| 220 for (; (i < array_len) && (j < len); i += num_bytes, ++j) { | 221 for (; (i < array_len) && (j < len); i += num_bytes, ++j) { |
| 221 int32_t ch; | 222 int32_t ch; |
| 222 ASSERT(IsLatin1SequenceStart(utf8_array[i])); | 223 ASSERT(IsLatin1SequenceStart(utf8_array[i])); |
| 223 num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch); | 224 num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch); |
| 224 if (ch == -1) { | 225 if (ch == -1) { |
| 225 return false; // invalid input | 226 return false; // Invalid input. |
| 226 } | 227 } |
| 227 ASSERT(Utf::IsLatin1(ch)); | 228 ASSERT(Utf::IsLatin1(ch)); |
| 228 dst[j] = ch; | 229 dst[j] = ch; |
| 229 } | 230 } |
| 230 if ((i < array_len) && (j == len)) { | 231 if ((i < array_len) && (j == len)) { |
| 231 return false; // output overflow | 232 return false; // Output overflow. |
| 232 } | 233 } |
| 233 return true; // success | 234 return true; // Success. |
| 234 } | 235 } |
| 235 | 236 |
| 236 | 237 |
| 237 bool Utf8::DecodeToUTF16(const uint8_t* utf8_array, | 238 bool Utf8::DecodeToUTF16(const uint8_t* utf8_array, |
| 238 intptr_t array_len, | 239 intptr_t array_len, |
| 239 uint16_t* dst, | 240 uint16_t* dst, |
| 240 intptr_t len) { | 241 intptr_t len) { |
| 241 intptr_t i = 0; | 242 intptr_t i = 0; |
| 242 intptr_t j = 0; | 243 intptr_t j = 0; |
| 243 intptr_t num_bytes; | 244 intptr_t num_bytes; |
| 244 for (; (i < array_len) && (j < len); i += num_bytes, ++j) { | 245 for (; (i < array_len) && (j < len); i += num_bytes, ++j) { |
| 245 int32_t ch; | 246 int32_t ch; |
| 246 bool is_supplementary = IsSupplementarySequenceStart(utf8_array[i]); | 247 bool is_supplementary = IsSupplementarySequenceStart(utf8_array[i]); |
| 247 num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch); | 248 num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch); |
| 248 if (ch == -1) { | 249 if (ch == -1) { |
| 249 return false; // invalid input | 250 return false; // Invalid input. |
| 250 } | 251 } |
| 251 if (is_supplementary) { | 252 if (is_supplementary) { |
| 252 Utf16::Encode(ch, &dst[j]); | 253 Utf16::Encode(ch, &dst[j]); |
| 253 j = j + 1; | 254 j = j + 1; |
| 254 } else { | 255 } else { |
| 255 dst[j] = ch; | 256 dst[j] = ch; |
| 256 } | 257 } |
| 257 } | 258 } |
| 258 if ((i < array_len) && (j == len)) { | 259 if ((i < array_len) && (j == len)) { |
| 259 return false; // output overflow | 260 return false; // Output overflow. |
| 260 } | 261 } |
| 261 return true; // success | 262 return true; // Success. |
| 262 } | 263 } |
| 263 | 264 |
| 264 | 265 |
| 265 bool Utf8::DecodeToUTF32(const uint8_t* utf8_array, | 266 bool Utf8::DecodeToUTF32(const uint8_t* utf8_array, |
| 266 intptr_t array_len, | 267 intptr_t array_len, |
| 267 int32_t* dst, | 268 int32_t* dst, |
| 268 intptr_t len) { | 269 intptr_t len) { |
| 269 intptr_t i = 0; | 270 intptr_t i = 0; |
| 270 intptr_t j = 0; | 271 intptr_t j = 0; |
| 271 intptr_t num_bytes; | 272 intptr_t num_bytes; |
| 272 for (; (i < array_len) && (j < len); i += num_bytes, ++j) { | 273 for (; (i < array_len) && (j < len); i += num_bytes, ++j) { |
| 273 int32_t ch; | 274 int32_t ch; |
| 274 num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch); | 275 num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch); |
| 275 if (ch == -1) { | 276 if (ch == -1) { |
| 276 return false; // invalid input | 277 return false; // Invalid input. |
| 277 } | 278 } |
| 278 dst[j] = ch; | 279 dst[j] = ch; |
| 279 } | 280 } |
| 280 if ((i < array_len) && (j == len)) { | 281 if ((i < array_len) && (j == len)) { |
| 281 return false; // output overflow | 282 return false; // Output overflow. |
| 282 } | 283 } |
| 283 return true; // success | 284 return true; // Success. |
| 284 } | 285 } |
| 285 | 286 |
| 286 | 287 |
| 287 bool Utf8::DecodeCStringToUTF32(const char* str, int32_t* dst, intptr_t len) { | 288 bool Utf8::DecodeCStringToUTF32(const char* str, int32_t* dst, intptr_t len) { |
| 288 ASSERT(str != NULL); | 289 ASSERT(str != NULL); |
| 289 intptr_t array_len = strlen(str); | 290 intptr_t array_len = strlen(str); |
| 290 const uint8_t* utf8_array = reinterpret_cast<const uint8_t*>(str); | 291 const uint8_t* utf8_array = reinterpret_cast<const uint8_t*>(str); |
| 291 return Utf8::DecodeToUTF32(utf8_array, array_len, dst, len); | 292 return Utf8::DecodeToUTF32(utf8_array, array_len, dst, len); |
| 292 } | 293 } |
| 293 | 294 |
| 294 | 295 |
| 295 void Utf16::Encode(int32_t codepoint, uint16_t* dst) { | 296 void Utf16::Encode(int32_t codepoint, uint16_t* dst) { |
| 296 ASSERT(codepoint > Utf16::kMaxCodeUnit); | 297 ASSERT(codepoint > Utf16::kMaxCodeUnit); |
| 297 ASSERT(dst != NULL); | 298 ASSERT(dst != NULL); |
| 298 dst[0] = (Utf16::kLeadSurrogateOffset + (codepoint >> 10)); | 299 dst[0] = (Utf16::kLeadSurrogateOffset + (codepoint >> 10)); |
| 299 dst[1] = (0xDC00 + (codepoint & 0x3FF)); | 300 dst[1] = (0xDC00 + (codepoint & 0x3FF)); |
| 300 } | 301 } |
| 301 | 302 |
| 302 } // namespace dart | 303 } // namespace dart |
| OLD | NEW |