runtime/vm/unicode.cc - Issue 11280150: Add support for surrogates when serializing and deserializing for native ports

Side by Side Diff: runtime/vm/unicode.cc

Issue 11280150: Add support for surrogates when serializing and deserializing for native ports (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart

Patch Set: Rebased to r15579 Created 8 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file	1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file

2 // for details. All rights reserved. Use of this source code is governed by a	2 // for details. All rights reserved. Use of this source code is governed by a

3 // BSD-style license that can be found in the LICENSE file.	3 // BSD-style license that can be found in the LICENSE file.

4	4

5 #include "vm/unicode.h"	5 #include "vm/unicode.h"

6	6

7 #include "vm/allocation.h"	7 #include "vm/allocation.h"

8 #include "vm/globals.h"	8 #include "vm/globals.h"

9 #include "vm/object.h"	9 #include "vm/object.h"

10	10

11 namespace dart {	11 namespace dart {

(...skipping 12 matching lines...) Expand all Loading...
24 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	24 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

25 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	25 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

26 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,	26 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,

27 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,	27 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,

28 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,	28 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,

29 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 0, 0	29 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 0, 0

30 };	30 };

31	31

32	32

33 const uint32_t Utf8::kMagicBits[7] = {	33 const uint32_t Utf8::kMagicBits[7] = {

34 0, // padding	34 0, // Padding.

35 0x00000000,	35 0x00000000,

36 0x00003080,	36 0x00003080,

37 0x000E2080,	37 0x000E2080,

38 0x03C82080,	38 0x03C82080,

39 0xFA082080,	39 0xFA082080,

40 0x82082080	40 0x82082080

41 };	41 };

42	42

43	43

44 // Minimum values of code points used to check shortest form.	44 // Minimum values of code points used to check shortest form.

45 const uint32_t Utf8::kOverlongMinimum[7] = {	45 const uint32_t Utf8::kOverlongMinimum[7] = {

46 0, // padding	46 0, // Padding.

47 0x0,	47 0x0,

48 0x80,	48 0x80,

49 0x800,	49 0x800,

50 0x10000,	50 0x10000,

51 0xFFFFFFFF,	51 0xFFFFFFFF,

52 0xFFFFFFFF	52 0xFFFFFFFF

53 };	53 };

54	54

55	55

56 // Returns a count of the number of UTF-8 trail bytes.	56 // Returns a count of the number of UTF-8 trail bytes.

(...skipping 72 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
129 String::CodePointIterator it(str);	129 String::CodePointIterator it(str);

130 while (it.Next()) {	130 while (it.Next()) {

131 int32_t ch = it.Current();	131 int32_t ch = it.Current();

132 length += Utf8::Length(ch);	132 length += Utf8::Length(ch);

133 }	133 }

134 return length;	134 return length;

135 }	135 }

136	136

137	137

138 intptr_t Utf8::Encode(int32_t ch, char* dst) {	138 intptr_t Utf8::Encode(int32_t ch, char* dst) {

	139 ASSERT(!Utf16::IsSurrogate(ch));

139 static const int kMask = ~(1 << 6);	140 static const int kMask = ~(1 << 6);

140 if (ch <= kMaxOneByteChar) {	141 if (ch <= kMaxOneByteChar) {

141 dst[0] = ch;	142 dst[0] = ch;

142 return 1;	143 return 1;

143 }	144 }

144 if (ch <= kMaxTwoByteChar) {	145 if (ch <= kMaxTwoByteChar) {

145 dst[0] = 0xC0 \| (ch >> 6);	146 dst[0] = 0xC0 \| (ch >> 6);

146 dst[1] = 0x80 \| (ch & kMask);	147 dst[1] = 0x80 \| (ch & kMask);

147 return 2;	148 return 2;

148 }	149 }

(...skipping 66 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
215 uint8_t* dst,	216 uint8_t* dst,

216 intptr_t len) {	217 intptr_t len) {

217 intptr_t i = 0;	218 intptr_t i = 0;

218 intptr_t j = 0;	219 intptr_t j = 0;

219 intptr_t num_bytes;	220 intptr_t num_bytes;

220 for (; (i < array_len) && (j < len); i += num_bytes, ++j) {	221 for (; (i < array_len) && (j < len); i += num_bytes, ++j) {

221 int32_t ch;	222 int32_t ch;

222 ASSERT(IsLatin1SequenceStart(utf8_array[i]));	223 ASSERT(IsLatin1SequenceStart(utf8_array[i]));

223 num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch);	224 num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch);

224 if (ch == -1) {	225 if (ch == -1) {

225 return false; // invalid input	226 return false; // Invalid input.

226 }	227 }

227 ASSERT(Utf::IsLatin1(ch));	228 ASSERT(Utf::IsLatin1(ch));

228 dst[j] = ch;	229 dst[j] = ch;

229 }	230 }

230 if ((i < array_len) && (j == len)) {	231 if ((i < array_len) && (j == len)) {

231 return false; // output overflow	232 return false; // Output overflow.

232 }	233 }

233 return true; // success	234 return true; // Success.

234 }	235 }

235	236

236	237

237 bool Utf8::DecodeToUTF16(const uint8_t* utf8_array,	238 bool Utf8::DecodeToUTF16(const uint8_t* utf8_array,

238 intptr_t array_len,	239 intptr_t array_len,

239 uint16_t* dst,	240 uint16_t* dst,

240 intptr_t len) {	241 intptr_t len) {

241 intptr_t i = 0;	242 intptr_t i = 0;

242 intptr_t j = 0;	243 intptr_t j = 0;

243 intptr_t num_bytes;	244 intptr_t num_bytes;

244 for (; (i < array_len) && (j < len); i += num_bytes, ++j) {	245 for (; (i < array_len) && (j < len); i += num_bytes, ++j) {

245 int32_t ch;	246 int32_t ch;

246 bool is_supplementary = IsSupplementarySequenceStart(utf8_array[i]);	247 bool is_supplementary = IsSupplementarySequenceStart(utf8_array[i]);

247 num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch);	248 num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch);

248 if (ch == -1) {	249 if (ch == -1) {

249 return false; // invalid input	250 return false; // Invalid input.

250 }	251 }

251 if (is_supplementary) {	252 if (is_supplementary) {

252 Utf16::Encode(ch, &dst[j]);	253 Utf16::Encode(ch, &dst[j]);

253 j = j + 1;	254 j = j + 1;

254 } else {	255 } else {

255 dst[j] = ch;	256 dst[j] = ch;

256 }	257 }

257 }	258 }

258 if ((i < array_len) && (j == len)) {	259 if ((i < array_len) && (j == len)) {

259 return false; // output overflow	260 return false; // Output overflow.

260 }	261 }

261 return true; // success	262 return true; // Success.

262 }	263 }

263	264

264	265

265 bool Utf8::DecodeToUTF32(const uint8_t* utf8_array,	266 bool Utf8::DecodeToUTF32(const uint8_t* utf8_array,

266 intptr_t array_len,	267 intptr_t array_len,

267 int32_t* dst,	268 int32_t* dst,

268 intptr_t len) {	269 intptr_t len) {

269 intptr_t i = 0;	270 intptr_t i = 0;

270 intptr_t j = 0;	271 intptr_t j = 0;

271 intptr_t num_bytes;	272 intptr_t num_bytes;

272 for (; (i < array_len) && (j < len); i += num_bytes, ++j) {	273 for (; (i < array_len) && (j < len); i += num_bytes, ++j) {

273 int32_t ch;	274 int32_t ch;

274 num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch);	275 num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch);

275 if (ch == -1) {	276 if (ch == -1) {

276 return false; // invalid input	277 return false; // Invalid input.

277 }	278 }

278 dst[j] = ch;	279 dst[j] = ch;

279 }	280 }

280 if ((i < array_len) && (j == len)) {	281 if ((i < array_len) && (j == len)) {

281 return false; // output overflow	282 return false; // Output overflow.

282 }	283 }

283 return true; // success	284 return true; // Success.

284 }	285 }

285	286

286	287

287 bool Utf8::DecodeCStringToUTF32(const char* str, int32_t* dst, intptr_t len) {	288 bool Utf8::DecodeCStringToUTF32(const char* str, int32_t* dst, intptr_t len) {

288 ASSERT(str != NULL);	289 ASSERT(str != NULL);

289 intptr_t array_len = strlen(str);	290 intptr_t array_len = strlen(str);

290 const uint8_t* utf8_array = reinterpret_cast<const uint8_t*>(str);	291 const uint8_t* utf8_array = reinterpret_cast<const uint8_t*>(str);

291 return Utf8::DecodeToUTF32(utf8_array, array_len, dst, len);	292 return Utf8::DecodeToUTF32(utf8_array, array_len, dst, len);

292 }	293 }

293	294

294	295

295 void Utf16::Encode(int32_t codepoint, uint16_t* dst) {	296 void Utf16::Encode(int32_t codepoint, uint16_t* dst) {

296 ASSERT(codepoint > Utf16::kMaxCodeUnit);	297 ASSERT(codepoint > Utf16::kMaxCodeUnit);

297 ASSERT(dst != NULL);	298 ASSERT(dst != NULL);

298 dst[0] = (Utf16::kLeadSurrogateOffset + (codepoint >> 10));	299 dst[0] = (Utf16::kLeadSurrogateOffset + (codepoint >> 10));

299 dst[1] = (0xDC00 + (codepoint & 0x3FF));	300 dst[1] = (0xDC00 + (codepoint & 0x3FF));

300 }	301 }

301	302

302 } // namespace dart	303 } // namespace dart

OLD	NEW

« no previous file with comments | « runtime/vm/unicode.h ('k') | no next file » | no next file with comments »