| OLD | NEW | 
|---|
| 1 // Copyright (c) 2011, the Dart project authors.  Please see the AUTHORS file | 1 // Copyright (c) 2012, the Dart project authors.  Please see the AUTHORS file | 
| 2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a | 
| 3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. | 
| 4 | 4 | 
| 5 #include "vm/unicode.h" | 5 #include "vm/unicode.h" | 
| 6 | 6 | 
| 7 #include "vm/allocation.h" | 7 #include "vm/allocation.h" | 
| 8 #include "vm/globals.h" | 8 #include "vm/globals.h" | 
| 9 #include "vm/object.h" | 9 #include "vm/object.h" | 
| 10 | 10 | 
| 11 namespace dart { | 11 namespace dart { | 
| (...skipping 12 matching lines...) Expand all  Loading... | 
| 24   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | 24   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | 
| 25   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | 25   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | 
| 26   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | 26   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | 
| 27   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | 27   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | 
| 28   3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | 28   3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | 
| 29   4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 0, 0 | 29   4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 0, 0 | 
| 30 }; | 30 }; | 
| 31 | 31 | 
| 32 | 32 | 
| 33 const uint32_t Utf8::kMagicBits[7] = { | 33 const uint32_t Utf8::kMagicBits[7] = { | 
| 34   0,  // padding | 34   0,  // Padding. | 
| 35   0x00000000, | 35   0x00000000, | 
| 36   0x00003080, | 36   0x00003080, | 
| 37   0x000E2080, | 37   0x000E2080, | 
| 38   0x03C82080, | 38   0x03C82080, | 
| 39   0xFA082080, | 39   0xFA082080, | 
| 40   0x82082080 | 40   0x82082080 | 
| 41 }; | 41 }; | 
| 42 | 42 | 
| 43 | 43 | 
| 44 // Minimum values of code points used to check shortest form. | 44 // Minimum values of code points used to check shortest form. | 
| 45 const uint32_t Utf8::kOverlongMinimum[7] = { | 45 const uint32_t Utf8::kOverlongMinimum[7] = { | 
| 46   0,  // padding | 46   0,  // Padding. | 
| 47   0x0, | 47   0x0, | 
| 48   0x80, | 48   0x80, | 
| 49   0x800, | 49   0x800, | 
| 50   0x10000, | 50   0x10000, | 
| 51   0xFFFFFFFF, | 51   0xFFFFFFFF, | 
| 52   0xFFFFFFFF | 52   0xFFFFFFFF | 
| 53 }; | 53 }; | 
| 54 | 54 | 
| 55 | 55 | 
| 56 // Returns a count of the number of UTF-8 trail bytes. | 56 // Returns a count of the number of UTF-8 trail bytes. | 
| (...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 129   String::CodePointIterator it(str); | 129   String::CodePointIterator it(str); | 
| 130   while (it.Next()) { | 130   while (it.Next()) { | 
| 131     int32_t ch = it.Current(); | 131     int32_t ch = it.Current(); | 
| 132     length += Utf8::Length(ch); | 132     length += Utf8::Length(ch); | 
| 133   } | 133   } | 
| 134   return length; | 134   return length; | 
| 135 } | 135 } | 
| 136 | 136 | 
| 137 | 137 | 
| 138 intptr_t Utf8::Encode(int32_t ch, char* dst) { | 138 intptr_t Utf8::Encode(int32_t ch, char* dst) { | 
|  | 139   ASSERT(!Utf16::IsSurrogate(ch)); | 
| 139   static const int kMask = ~(1 << 6); | 140   static const int kMask = ~(1 << 6); | 
| 140   if (ch <= kMaxOneByteChar) { | 141   if (ch <= kMaxOneByteChar) { | 
| 141     dst[0] = ch; | 142     dst[0] = ch; | 
| 142     return 1; | 143     return 1; | 
| 143   } | 144   } | 
| 144   if (ch <= kMaxTwoByteChar) { | 145   if (ch <= kMaxTwoByteChar) { | 
| 145     dst[0] = 0xC0 | (ch >> 6); | 146     dst[0] = 0xC0 | (ch >> 6); | 
| 146     dst[1] = 0x80 | (ch & kMask); | 147     dst[1] = 0x80 | (ch & kMask); | 
| 147     return 2; | 148     return 2; | 
| 148   } | 149   } | 
| (...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 215                           uint8_t* dst, | 216                           uint8_t* dst, | 
| 216                           intptr_t len) { | 217                           intptr_t len) { | 
| 217   intptr_t i = 0; | 218   intptr_t i = 0; | 
| 218   intptr_t j = 0; | 219   intptr_t j = 0; | 
| 219   intptr_t num_bytes; | 220   intptr_t num_bytes; | 
| 220   for (; (i < array_len) && (j < len); i += num_bytes, ++j) { | 221   for (; (i < array_len) && (j < len); i += num_bytes, ++j) { | 
| 221     int32_t ch; | 222     int32_t ch; | 
| 222     ASSERT(IsLatin1SequenceStart(utf8_array[i])); | 223     ASSERT(IsLatin1SequenceStart(utf8_array[i])); | 
| 223     num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch); | 224     num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch); | 
| 224     if (ch == -1) { | 225     if (ch == -1) { | 
| 225       return false;  // invalid input | 226       return false;  // Invalid input. | 
| 226     } | 227     } | 
| 227     ASSERT(Utf::IsLatin1(ch)); | 228     ASSERT(Utf::IsLatin1(ch)); | 
| 228     dst[j] = ch; | 229     dst[j] = ch; | 
| 229   } | 230   } | 
| 230   if ((i < array_len) && (j == len)) { | 231   if ((i < array_len) && (j == len)) { | 
| 231     return false;  // output overflow | 232     return false;  // Output overflow. | 
| 232   } | 233   } | 
| 233   return true;  // success | 234   return true;  // Success. | 
| 234 } | 235 } | 
| 235 | 236 | 
| 236 | 237 | 
| 237 bool Utf8::DecodeToUTF16(const uint8_t* utf8_array, | 238 bool Utf8::DecodeToUTF16(const uint8_t* utf8_array, | 
| 238                          intptr_t array_len, | 239                          intptr_t array_len, | 
| 239                          uint16_t* dst, | 240                          uint16_t* dst, | 
| 240                          intptr_t len) { | 241                          intptr_t len) { | 
| 241   intptr_t i = 0; | 242   intptr_t i = 0; | 
| 242   intptr_t j = 0; | 243   intptr_t j = 0; | 
| 243   intptr_t num_bytes; | 244   intptr_t num_bytes; | 
| 244   for (; (i < array_len) && (j < len); i += num_bytes, ++j) { | 245   for (; (i < array_len) && (j < len); i += num_bytes, ++j) { | 
| 245     int32_t ch; | 246     int32_t ch; | 
| 246     bool is_supplementary = IsSupplementarySequenceStart(utf8_array[i]); | 247     bool is_supplementary = IsSupplementarySequenceStart(utf8_array[i]); | 
| 247     num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch); | 248     num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch); | 
| 248     if (ch == -1) { | 249     if (ch == -1) { | 
| 249       return false;  // invalid input | 250       return false;  // Invalid input. | 
| 250     } | 251     } | 
| 251     if (is_supplementary) { | 252     if (is_supplementary) { | 
| 252       Utf16::Encode(ch, &dst[j]); | 253       Utf16::Encode(ch, &dst[j]); | 
| 253       j = j + 1; | 254       j = j + 1; | 
| 254     } else { | 255     } else { | 
| 255       dst[j] = ch; | 256       dst[j] = ch; | 
| 256     } | 257     } | 
| 257   } | 258   } | 
| 258   if ((i < array_len) && (j == len)) { | 259   if ((i < array_len) && (j == len)) { | 
| 259     return false;  // output overflow | 260     return false;  // Output overflow. | 
| 260   } | 261   } | 
| 261   return true;  // success | 262   return true;  // Success. | 
| 262 } | 263 } | 
| 263 | 264 | 
| 264 | 265 | 
| 265 bool Utf8::DecodeToUTF32(const uint8_t* utf8_array, | 266 bool Utf8::DecodeToUTF32(const uint8_t* utf8_array, | 
| 266                          intptr_t array_len, | 267                          intptr_t array_len, | 
| 267                          int32_t* dst, | 268                          int32_t* dst, | 
| 268                          intptr_t len) { | 269                          intptr_t len) { | 
| 269   intptr_t i = 0; | 270   intptr_t i = 0; | 
| 270   intptr_t j = 0; | 271   intptr_t j = 0; | 
| 271   intptr_t num_bytes; | 272   intptr_t num_bytes; | 
| 272   for (; (i < array_len) && (j < len); i += num_bytes, ++j) { | 273   for (; (i < array_len) && (j < len); i += num_bytes, ++j) { | 
| 273     int32_t ch; | 274     int32_t ch; | 
| 274     num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch); | 275     num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch); | 
| 275     if (ch == -1) { | 276     if (ch == -1) { | 
| 276       return false;  // invalid input | 277       return false;  // Invalid input. | 
| 277     } | 278     } | 
| 278     dst[j] = ch; | 279     dst[j] = ch; | 
| 279   } | 280   } | 
| 280   if ((i < array_len) && (j == len)) { | 281   if ((i < array_len) && (j == len)) { | 
| 281     return false;  // output overflow | 282     return false;  // Output overflow. | 
| 282   } | 283   } | 
| 283   return true;  // success | 284   return true;  // Success. | 
| 284 } | 285 } | 
| 285 | 286 | 
| 286 | 287 | 
| 287 bool Utf8::DecodeCStringToUTF32(const char* str, int32_t* dst, intptr_t len) { | 288 bool Utf8::DecodeCStringToUTF32(const char* str, int32_t* dst, intptr_t len) { | 
| 288   ASSERT(str != NULL); | 289   ASSERT(str != NULL); | 
| 289   intptr_t array_len = strlen(str); | 290   intptr_t array_len = strlen(str); | 
| 290   const uint8_t* utf8_array = reinterpret_cast<const uint8_t*>(str); | 291   const uint8_t* utf8_array = reinterpret_cast<const uint8_t*>(str); | 
| 291   return Utf8::DecodeToUTF32(utf8_array, array_len, dst, len); | 292   return Utf8::DecodeToUTF32(utf8_array, array_len, dst, len); | 
| 292 } | 293 } | 
| 293 | 294 | 
| 294 | 295 | 
| 295 void Utf16::Encode(int32_t codepoint, uint16_t* dst) { | 296 void Utf16::Encode(int32_t codepoint, uint16_t* dst) { | 
| 296   ASSERT(codepoint > Utf16::kMaxCodeUnit); | 297   ASSERT(codepoint > Utf16::kMaxCodeUnit); | 
| 297   ASSERT(dst != NULL); | 298   ASSERT(dst != NULL); | 
| 298   dst[0] = (Utf16::kLeadSurrogateOffset + (codepoint >> 10)); | 299   dst[0] = (Utf16::kLeadSurrogateOffset + (codepoint >> 10)); | 
| 299   dst[1] = (0xDC00 + (codepoint & 0x3FF)); | 300   dst[1] = (0xDC00 + (codepoint & 0x3FF)); | 
| 300 } | 301 } | 
| 301 | 302 | 
| 302 }  // namespace dart | 303 }  // namespace dart | 
| OLD | NEW | 
|---|