runtime/vm/unicode.cc - Issue 11280150: Add support for surrogates when serializing and deserializing for native ports

Side by Side Diff: runtime/vm/unicode.cc

Issue 11280150: Add support for surrogates when serializing and deserializing for native ports (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart

Patch Set: Use iterator reset Created 8 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file	1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file

2 // for details. All rights reserved. Use of this source code is governed by a	2 // for details. All rights reserved. Use of this source code is governed by a

3 // BSD-style license that can be found in the LICENSE file.	3 // BSD-style license that can be found in the LICENSE file.

4	4

5 #include "vm/unicode.h"	5 #include "vm/unicode.h"

6	6

7 #include "vm/allocation.h"	7 #include "vm/allocation.h"

8 #include "vm/globals.h"	8 #include "vm/globals.h"

9 #include "vm/object.h"	9 #include "vm/object.h"

10	10

11 namespace dart {	11 namespace dart {

(...skipping 40 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
52 0xFFFFFFFF	52 0xFFFFFFFF

53 };	53 };

54	54

55	55

56 static bool IsTrailByte(uint8_t code_unit) {	56 static bool IsTrailByte(uint8_t code_unit) {

57 return (code_unit & 0xc0) == 0x80;	57 return (code_unit & 0xc0) == 0x80;

58 }	58 }

59	59

60	60

61 static bool IsLatin1SequenceStart(uint8_t code_unit) {	61 static bool IsLatin1SequenceStart(uint8_t code_unit) {

62 // Check is codepoint is <= U+00FF	62 // Check is codepoint is <= U+00FF.
	siva 2012/11/28 18:22:46 Check if codepoint is ... Check if codepoint is ... Søren Gjesse 2012/11/29 09:06:14 Done. Show quoted text On 2012/11/28 18:22:46, siva wrote: > Check if codepoint is ... Done.
63 return (code_unit <= Utf8::kMaxOneByteChar);	63 return (code_unit <= Utf8::kMaxOneByteChar);

64 }	64 }

65	65

66	66

67 static bool IsSupplementarySequenceStart(uint8_t code_unit) {	67 static bool IsSupplementarySequenceStart(uint8_t code_unit) {

68 // Check is codepoint is >= U+10000.	68 // Check is codepoint is >= U+10000.

69 return (code_unit >= 0xF0);	69 return (code_unit >= 0xF0);

70 }	70 }

71	71

72	72

(...skipping 49 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
122 is_malformed \|= !IsTrailByte(code_unit);	122 is_malformed \|= !IsTrailByte(code_unit);

123 ch = (ch << 6) + code_unit;	123 ch = (ch << 6) + code_unit;

124 } else {	124 } else {

125 return false;	125 return false;

126 }	126 }

127 }	127 }

128 ch -= kMagicBits[num_trail_bytes];	128 ch -= kMagicBits[num_trail_bytes];

129 if (!((is_malformed == false) &&	129 if (!((is_malformed == false) &&

130 (j == num_trail_bytes) &&	130 (j == num_trail_bytes) &&

131 !IsOutOfRange(ch) &&	131 !IsOutOfRange(ch) &&

132 !IsNonShortestForm(ch, j) &&	132 !IsNonShortestForm(ch, j))) {

133 !Utf16::IsSurrogate(ch))) {

134 return false;	133 return false;

135 }	134 }

136 }	135 }

137 i += j;	136 i += j;

138 }	137 }

139 return true;	138 return true;

140 }	139 }

141	140

142	141

143 intptr_t Utf8::Length(int32_t ch) {	142 intptr_t Utf8::Length(int32_t ch) {

(...skipping 77 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
221 ch = (ch << 6) + code_unit;	220 ch = (ch << 6) + code_unit;

222 } else {	221 } else {

223 *dst = -1;	222 *dst = -1;

224 return 0;	223 return 0;

225 }	224 }

226 }	225 }

227 ch -= kMagicBits[num_trail_bytes];	226 ch -= kMagicBits[num_trail_bytes];

228 if (!((is_malformed == false) &&	227 if (!((is_malformed == false) &&

229 (i == num_trail_bytes) &&	228 (i == num_trail_bytes) &&

230 !IsOutOfRange(ch) &&	229 !IsOutOfRange(ch) &&

231 !IsNonShortestForm(ch, i) &&	230 !IsNonShortestForm(ch, i))) {

232 !Utf16::IsSurrogate(ch))) {

233 *dst = -1;	231 *dst = -1;

234 return 0;	232 return 0;

235 }	233 }

236 }	234 }

237 *dst = ch;	235 *dst = ch;

238 return i;	236 return i;

239 }	237 }

240	238

241	239

242 bool Utf8::DecodeToLatin1(const uint8_t* utf8_array,	240 bool Utf8::DecodeToLatin1(const uint8_t* utf8_array,

243 intptr_t array_len,	241 intptr_t array_len,

244 uint8_t* dst,	242 uint8_t* dst,

245 intptr_t len) {	243 intptr_t len) {

246 intptr_t i = 0;	244 intptr_t i = 0;

247 intptr_t j = 0;	245 intptr_t j = 0;

248 intptr_t num_bytes;	246 intptr_t num_bytes;

249 for (; (i < array_len) && (j < len); i += num_bytes, ++j) {	247 for (; (i < array_len) && (j < len); i += num_bytes, ++j) {

250 int32_t ch;	248 int32_t ch;

251 ASSERT(IsLatin1SequenceStart(utf8_array[i]));	249 ASSERT(IsLatin1SequenceStart(utf8_array[i]));

252 num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch);	250 num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch);

253 if (ch == -1) {	251 if (ch == -1) {

254 return false; // invalid input	252 return false; // Invalid input.

255 }	253 }

256 ASSERT(ch <= 0xff);	254 ASSERT(ch <= 0xff);

257 dst[j] = ch;	255 dst[j] = ch;

258 }	256 }

259 if ((i < array_len) && (j == len)) {	257 if ((i < array_len) && (j == len)) {

260 return false; // output overflow	258 return false; // Output overflow.

261 }	259 }

262 return true; // success	260 return true; // Success.

263 }	261 }

264	262

265	263

266 bool Utf8::DecodeToUTF16(const uint8_t* utf8_array,	264 bool Utf8::DecodeToUTF16(const uint8_t* utf8_array,

267 intptr_t array_len,	265 intptr_t array_len,

268 uint16_t* dst,	266 uint16_t* dst,

269 intptr_t len) {	267 intptr_t len) {

270 intptr_t i = 0;	268 intptr_t i = 0;

271 intptr_t j = 0;	269 intptr_t j = 0;

272 intptr_t num_bytes;	270 intptr_t num_bytes;

273 for (; (i < array_len) && (j < len); i += num_bytes, ++j) {	271 for (; (i < array_len) && (j < len); i += num_bytes, ++j) {

274 int32_t ch;	272 int32_t ch;

275 bool is_supplementary = IsSupplementarySequenceStart(utf8_array[i]);	273 bool is_supplementary = IsSupplementarySequenceStart(utf8_array[i]);

276 num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch);	274 num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch);

277 if (ch == -1) {	275 if (ch == -1) {

278 return false; // invalid input	276 return false; // Invalid input.

279 }	277 }

280 if (is_supplementary) {	278 if (is_supplementary) {

281 Utf16::Encode(ch, &dst[j]);	279 Utf16::Encode(ch, &dst[j]);

282 j = j + 1;	280 j = j + 1;

283 } else {	281 } else {

284 dst[j] = ch;	282 dst[j] = ch;

285 }	283 }

286 }	284 }

287 if ((i < array_len) && (j == len)) {	285 if ((i < array_len) && (j == len)) {

288 return false; // output overflow	286 return false; // Output overflow.

289 }	287 }

290 return true; // success	288 return true; // Success.

291 }	289 }

292	290

293	291

294 bool Utf8::DecodeToUTF32(const uint8_t* utf8_array,	292 bool Utf8::DecodeToUTF32(const uint8_t* utf8_array,

295 intptr_t array_len,	293 intptr_t array_len,

296 int32_t* dst,	294 int32_t* dst,

297 intptr_t len) {	295 intptr_t len) {

298 intptr_t i = 0;	296 intptr_t i = 0;

299 intptr_t j = 0;	297 intptr_t j = 0;

300 intptr_t num_bytes;	298 intptr_t num_bytes;

301 for (; (i < array_len) && (j < len); i += num_bytes, ++j) {	299 for (; (i < array_len) && (j < len); i += num_bytes, ++j) {

302 int32_t ch;	300 int32_t ch;

303 num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch);	301 num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch);

304 if (ch == -1) {	302 if (ch == -1) {

305 return false; // invalid input	303 return false; // Invalid input.

306 }	304 }

307 dst[j] = ch;	305 dst[j] = ch;

308 }	306 }

309 if ((i < array_len) && (j == len)) {	307 if ((i < array_len) && (j == len)) {

310 return false; // output overflow	308 return false; // Output overflow.

311 }	309 }

312 return true; // success	310 return true; // Success.

313 }	311 }

314	312

315	313

316 void Utf16::Encode(int32_t codepoint, uint16_t* dst) {	314 void Utf16::Encode(int32_t codepoint, uint16_t* dst) {

317 ASSERT(codepoint > kMaxBmpCodepoint);	315 ASSERT(codepoint > kMaxBmpCodepoint);

318 ASSERT(dst != NULL);	316 ASSERT(dst != NULL);

319 dst[0] = (Utf16::kLeadSurrogateOffset + (codepoint >> 10));	317 dst[0] = (Utf16::kLeadSurrogateOffset + (codepoint >> 10));

320 dst[1] = (0xDC00 + (codepoint & 0x3FF));	318 dst[1] = (0xDC00 + (codepoint & 0x3FF));

321 }	319 }

322	320

	321

	322 bool Utf16::CodePointIterator::Next() {

	323 ASSERT(index_ >= -1);

	324 ASSERT(index_ < array_len_);

	325 int d = Length(ch_);

	326 if (index_ == (array_len_ - d)) {

	327 return false;

	328 }

	329 index_ += d;

	330 ch_ = utf16_array_[index_];

	331 if (IsLeadSurrogate(ch_) && (index_ != (array_len_ - 1))) {

	332 int32_t ch2 = utf16_array_[index_ + 1];

	333 if (IsTrailSurrogate(ch2)) {

	334 ch_ = Decode(ch_, ch2);

	335 }

	336 }

	337 return true;

	338 }

	339

323 } // namespace dart	340 } // namespace dart

OLD	NEW

« runtime/vm/dart_api_message.cc ('K') | « runtime/vm/unicode.h ('k') | runtime/vm/unicode_test.cc » ('j') | no next file with comments »