runtime/vm/unicode.cc - Issue 2974233002: VM: Re-format to use at most one newline between functions

Side by Side Diff: runtime/vm/unicode.cc

Issue 2974233002: VM: Re-format to use at most one newline between functions (Closed)

Patch Set: Rebase and merge Created 3 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file	1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file

2 // for details. All rights reserved. Use of this source code is governed by a	2 // for details. All rights reserved. Use of this source code is governed by a

3 // BSD-style license that can be found in the LICENSE file.	3 // BSD-style license that can be found in the LICENSE file.

4	4

5 #include "vm/unicode.h"	5 #include "vm/unicode.h"

6	6

7 #include "vm/allocation.h"	7 #include "vm/allocation.h"

8 #include "vm/globals.h"	8 #include "vm/globals.h"

9 #include "vm/object.h"	9 #include "vm/object.h"

10	10

(...skipping 17 matching lines...) Expand all Loading...
28 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,	28 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,

29 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,	29 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,

30 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 0, 0	30 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 0, 0

31 };	31 };

32 // clang-format on	32 // clang-format on

33	33

34 const uint32_t Utf8::kMagicBits[7] = {0, // Padding.	34 const uint32_t Utf8::kMagicBits[7] = {0, // Padding.

35 0x00000000, 0x00003080, 0x000E2080,	35 0x00000000, 0x00003080, 0x000E2080,

36 0x03C82080, 0xFA082080, 0x82082080};	36 0x03C82080, 0xFA082080, 0x82082080};

37	37

38

39 // Minimum values of code points used to check shortest form.	38 // Minimum values of code points used to check shortest form.

40 const uint32_t Utf8::kOverlongMinimum[7] = {0, // Padding.	39 const uint32_t Utf8::kOverlongMinimum[7] = {0, // Padding.

41 0x0, 0x80, 0x800,	40 0x0, 0x80, 0x800,

42 0x10000, 0xFFFFFFFF, 0xFFFFFFFF};	41 0x10000, 0xFFFFFFFF, 0xFFFFFFFF};

43	42

44

45 // Returns the most restricted coding form in which the sequence of utf8	43 // Returns the most restricted coding form in which the sequence of utf8

46 // characters in 'utf8_array' can be represented in, and the number of	44 // characters in 'utf8_array' can be represented in, and the number of

47 // code units needed in that form.	45 // code units needed in that form.

48 intptr_t Utf8::CodeUnitCount(const uint8_t* utf8_array,	46 intptr_t Utf8::CodeUnitCount(const uint8_t* utf8_array,

49 intptr_t array_len,	47 intptr_t array_len,

50 Type* type) {	48 Type* type) {

51 intptr_t len = 0;	49 intptr_t len = 0;

52 Type char_type = kLatin1;	50 Type char_type = kLatin1;

53 for (intptr_t i = 0; i < array_len; i++) {	51 for (intptr_t i = 0; i < array_len; i++) {

54 uint8_t code_unit = utf8_array[i];	52 uint8_t code_unit = utf8_array[i];

55 if (!IsTrailByte(code_unit)) {	53 if (!IsTrailByte(code_unit)) {

56 ++len;	54 ++len;

57 if (!IsLatin1SequenceStart(code_unit)) { // > U+00FF	55 if (!IsLatin1SequenceStart(code_unit)) { // > U+00FF

58 if (IsSupplementarySequenceStart(code_unit)) { // >= U+10000	56 if (IsSupplementarySequenceStart(code_unit)) { // >= U+10000

59 char_type = kSupplementary;	57 char_type = kSupplementary;

60 ++len;	58 ++len;

61 } else if (char_type == kLatin1) {	59 } else if (char_type == kLatin1) {

62 char_type = kBMP;	60 char_type = kBMP;

63 }	61 }

64 }	62 }

65 }	63 }

66 }	64 }

67 *type = char_type;	65 *type = char_type;

68 return len;	66 return len;

69 }	67 }

70	68

71

72 // Returns true if str is a valid NUL-terminated UTF-8 string.	69 // Returns true if str is a valid NUL-terminated UTF-8 string.

73 bool Utf8::IsValid(const uint8_t* utf8_array, intptr_t array_len) {	70 bool Utf8::IsValid(const uint8_t* utf8_array, intptr_t array_len) {

74 intptr_t i = 0;	71 intptr_t i = 0;

75 while (i < array_len) {	72 while (i < array_len) {

76 uint32_t ch = utf8_array[i] & 0xFF;	73 uint32_t ch = utf8_array[i] & 0xFF;

77 intptr_t j = 1;	74 intptr_t j = 1;

78 if (ch >= 0x80) {	75 if (ch >= 0x80) {

79 int8_t num_trail_bytes = kTrailBytes[ch];	76 int8_t num_trail_bytes = kTrailBytes[ch];

80 bool is_malformed = false;	77 bool is_malformed = false;

81 for (; j < num_trail_bytes; ++j) {	78 for (; j < num_trail_bytes; ++j) {

82 if ((i + j) < array_len) {	79 if ((i + j) < array_len) {

83 uint8_t code_unit = utf8_array[i + j];	80 uint8_t code_unit = utf8_array[i + j];

84 is_malformed \|= !IsTrailByte(code_unit);	81 is_malformed \|= !IsTrailByte(code_unit);

85 ch = (ch << 6) + code_unit;	82 ch = (ch << 6) + code_unit;

86 } else {	83 } else {

87 return false;	84 return false;

88 }	85 }

89 }	86 }

90 ch -= kMagicBits[num_trail_bytes];	87 ch -= kMagicBits[num_trail_bytes];

91 if (!((is_malformed == false) && (j == num_trail_bytes) &&	88 if (!((is_malformed == false) && (j == num_trail_bytes) &&

92 !Utf::IsOutOfRange(ch) && !IsNonShortestForm(ch, j))) {	89 !Utf::IsOutOfRange(ch) && !IsNonShortestForm(ch, j))) {

93 return false;	90 return false;

94 }	91 }

95 }	92 }

96 i += j;	93 i += j;

97 }	94 }

98 return true;	95 return true;

99 }	96 }

100	97

101

102 intptr_t Utf8::Length(int32_t ch) {	98 intptr_t Utf8::Length(int32_t ch) {

103 if (ch <= kMaxOneByteChar) {	99 if (ch <= kMaxOneByteChar) {

104 return 1;	100 return 1;

105 } else if (ch <= kMaxTwoByteChar) {	101 } else if (ch <= kMaxTwoByteChar) {

106 return 2;	102 return 2;

107 } else if (ch <= kMaxThreeByteChar) {	103 } else if (ch <= kMaxThreeByteChar) {

108 return 3;	104 return 3;

109 }	105 }

110 ASSERT(ch <= kMaxFourByteChar);	106 ASSERT(ch <= kMaxFourByteChar);

111 return 4;	107 return 4;

112 }	108 }

113	109

114

115 intptr_t Utf8::Length(const String& str) {	110 intptr_t Utf8::Length(const String& str) {

116 intptr_t length = 0;	111 intptr_t length = 0;

117 String::CodePointIterator it(str);	112 String::CodePointIterator it(str);

118 while (it.Next()) {	113 while (it.Next()) {

119 int32_t ch = it.Current();	114 int32_t ch = it.Current();

120 length += Utf8::Length(ch);	115 length += Utf8::Length(ch);

121 }	116 }

122 return length;	117 return length;

123 }	118 }

124	119

125

126 intptr_t Utf8::Encode(int32_t ch, char* dst) {	120 intptr_t Utf8::Encode(int32_t ch, char* dst) {

127 static const int kMask = ~(1 << 6);	121 static const int kMask = ~(1 << 6);

128 if (ch <= kMaxOneByteChar) {	122 if (ch <= kMaxOneByteChar) {

129 dst[0] = ch;	123 dst[0] = ch;

130 return 1;	124 return 1;

131 }	125 }

132 if (ch <= kMaxTwoByteChar) {	126 if (ch <= kMaxTwoByteChar) {

133 dst[0] = 0xC0 \| (ch >> 6);	127 dst[0] = 0xC0 \| (ch >> 6);

134 dst[1] = 0x80 \| (ch & kMask);	128 dst[1] = 0x80 \| (ch & kMask);

135 return 2;	129 return 2;

136 }	130 }

137 if (ch <= kMaxThreeByteChar) {	131 if (ch <= kMaxThreeByteChar) {

138 dst[0] = 0xE0 \| (ch >> 12);	132 dst[0] = 0xE0 \| (ch >> 12);

139 dst[1] = 0x80 \| ((ch >> 6) & kMask);	133 dst[1] = 0x80 \| ((ch >> 6) & kMask);

140 dst[2] = 0x80 \| (ch & kMask);	134 dst[2] = 0x80 \| (ch & kMask);

141 return 3;	135 return 3;

142 }	136 }

143 ASSERT(ch <= kMaxFourByteChar);	137 ASSERT(ch <= kMaxFourByteChar);

144 dst[0] = 0xF0 \| (ch >> 18);	138 dst[0] = 0xF0 \| (ch >> 18);

145 dst[1] = 0x80 \| ((ch >> 12) & kMask);	139 dst[1] = 0x80 \| ((ch >> 12) & kMask);

146 dst[2] = 0x80 \| ((ch >> 6) & kMask);	140 dst[2] = 0x80 \| ((ch >> 6) & kMask);

147 dst[3] = 0x80 \| (ch & kMask);	141 dst[3] = 0x80 \| (ch & kMask);

148 return 4;	142 return 4;

149 }	143 }

150	144

151

152 intptr_t Utf8::Encode(const String& src, char* dst, intptr_t len) {	145 intptr_t Utf8::Encode(const String& src, char* dst, intptr_t len) {

153 intptr_t pos = 0;	146 intptr_t pos = 0;

154 String::CodePointIterator it(src);	147 String::CodePointIterator it(src);

155 while (it.Next()) {	148 while (it.Next()) {

156 int32_t ch = it.Current();	149 int32_t ch = it.Current();

157 intptr_t num_bytes = Utf8::Length(ch);	150 intptr_t num_bytes = Utf8::Length(ch);

158 if (pos + num_bytes > len) {	151 if (pos + num_bytes > len) {

159 break;	152 break;

160 }	153 }

161 Utf8::Encode(ch, &dst[pos]);	154 Utf8::Encode(ch, &dst[pos]);

162 pos += num_bytes;	155 pos += num_bytes;

163 }	156 }

164 return pos;	157 return pos;

165 }	158 }

166	159

167

168 intptr_t Utf8::Decode(const uint8_t* utf8_array,	160 intptr_t Utf8::Decode(const uint8_t* utf8_array,

169 intptr_t array_len,	161 intptr_t array_len,

170 int32_t* dst) {	162 int32_t* dst) {

171 uint32_t ch = utf8_array[0] & 0xFF;	163 uint32_t ch = utf8_array[0] & 0xFF;

172 intptr_t i = 1;	164 intptr_t i = 1;

173 if (ch >= 0x80) {	165 if (ch >= 0x80) {

174 intptr_t num_trail_bytes = kTrailBytes[ch];	166 intptr_t num_trail_bytes = kTrailBytes[ch];

175 bool is_malformed = false;	167 bool is_malformed = false;

176 for (; i < num_trail_bytes; ++i) {	168 for (; i < num_trail_bytes; ++i) {

177 if (i < array_len) {	169 if (i < array_len) {

178 uint8_t code_unit = utf8_array[i];	170 uint8_t code_unit = utf8_array[i];

179 is_malformed \|= !IsTrailByte(code_unit);	171 is_malformed \|= !IsTrailByte(code_unit);

180 ch = (ch << 6) + code_unit;	172 ch = (ch << 6) + code_unit;

181 } else {	173 } else {

182 *dst = -1;	174 *dst = -1;

183 return 0;	175 return 0;

184 }	176 }

185 }	177 }

186 ch -= kMagicBits[num_trail_bytes];	178 ch -= kMagicBits[num_trail_bytes];

187 if (!((is_malformed == false) && (i == num_trail_bytes) &&	179 if (!((is_malformed == false) && (i == num_trail_bytes) &&

188 !Utf::IsOutOfRange(ch) && !IsNonShortestForm(ch, i))) {	180 !Utf::IsOutOfRange(ch) && !IsNonShortestForm(ch, i))) {

189 *dst = -1;	181 *dst = -1;

190 return 0;	182 return 0;

191 }	183 }

192 }	184 }

193 *dst = ch;	185 *dst = ch;

194 return i;	186 return i;

195 }	187 }

196	188

197

198 bool Utf8::DecodeToLatin1(const uint8_t* utf8_array,	189 bool Utf8::DecodeToLatin1(const uint8_t* utf8_array,

199 intptr_t array_len,	190 intptr_t array_len,

200 uint8_t* dst,	191 uint8_t* dst,

201 intptr_t len) {	192 intptr_t len) {

202 intptr_t i = 0;	193 intptr_t i = 0;

203 intptr_t j = 0;	194 intptr_t j = 0;

204 intptr_t num_bytes;	195 intptr_t num_bytes;

205 for (; (i < array_len) && (j < len); i += num_bytes, ++j) {	196 for (; (i < array_len) && (j < len); i += num_bytes, ++j) {

206 int32_t ch;	197 int32_t ch;

207 ASSERT(IsLatin1SequenceStart(utf8_array[i]));	198 ASSERT(IsLatin1SequenceStart(utf8_array[i]));

208 num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch);	199 num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch);

209 if (ch == -1) {	200 if (ch == -1) {

210 return false; // Invalid input.	201 return false; // Invalid input.

211 }	202 }

212 ASSERT(Utf::IsLatin1(ch));	203 ASSERT(Utf::IsLatin1(ch));

213 dst[j] = ch;	204 dst[j] = ch;

214 }	205 }

215 if ((i < array_len) && (j == len)) {	206 if ((i < array_len) && (j == len)) {

216 return false; // Output overflow.	207 return false; // Output overflow.

217 }	208 }

218 return true; // Success.	209 return true; // Success.

219 }	210 }

220	211

221

222 bool Utf8::DecodeToUTF16(const uint8_t* utf8_array,	212 bool Utf8::DecodeToUTF16(const uint8_t* utf8_array,

223 intptr_t array_len,	213 intptr_t array_len,

224 uint16_t* dst,	214 uint16_t* dst,

225 intptr_t len) {	215 intptr_t len) {

226 intptr_t i = 0;	216 intptr_t i = 0;

227 intptr_t j = 0;	217 intptr_t j = 0;

228 intptr_t num_bytes;	218 intptr_t num_bytes;

229 for (; (i < array_len) && (j < len); i += num_bytes, ++j) {	219 for (; (i < array_len) && (j < len); i += num_bytes, ++j) {

230 int32_t ch;	220 int32_t ch;

231 bool is_supplementary = IsSupplementarySequenceStart(utf8_array[i]);	221 bool is_supplementary = IsSupplementarySequenceStart(utf8_array[i]);

232 num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch);	222 num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch);

233 if (ch == -1) {	223 if (ch == -1) {

234 return false; // Invalid input.	224 return false; // Invalid input.

235 }	225 }

236 if (is_supplementary) {	226 if (is_supplementary) {

237 Utf16::Encode(ch, &dst[j]);	227 Utf16::Encode(ch, &dst[j]);

238 j = j + 1;	228 j = j + 1;

239 } else {	229 } else {

240 dst[j] = ch;	230 dst[j] = ch;

241 }	231 }

242 }	232 }

243 if ((i < array_len) && (j == len)) {	233 if ((i < array_len) && (j == len)) {

244 return false; // Output overflow.	234 return false; // Output overflow.

245 }	235 }

246 return true; // Success.	236 return true; // Success.

247 }	237 }

248	238

249

250 bool Utf8::DecodeToUTF32(const uint8_t* utf8_array,	239 bool Utf8::DecodeToUTF32(const uint8_t* utf8_array,

251 intptr_t array_len,	240 intptr_t array_len,

252 int32_t* dst,	241 int32_t* dst,

253 intptr_t len) {	242 intptr_t len) {

254 intptr_t i = 0;	243 intptr_t i = 0;

255 intptr_t j = 0;	244 intptr_t j = 0;

256 intptr_t num_bytes;	245 intptr_t num_bytes;

257 for (; (i < array_len) && (j < len); i += num_bytes, ++j) {	246 for (; (i < array_len) && (j < len); i += num_bytes, ++j) {

258 int32_t ch;	247 int32_t ch;

259 num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch);	248 num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch);

260 if (ch == -1) {	249 if (ch == -1) {

261 return false; // Invalid input.	250 return false; // Invalid input.

262 }	251 }

263 dst[j] = ch;	252 dst[j] = ch;

264 }	253 }

265 if ((i < array_len) && (j == len)) {	254 if ((i < array_len) && (j == len)) {

266 return false; // Output overflow.	255 return false; // Output overflow.

267 }	256 }

268 return true; // Success.	257 return true; // Success.

269 }	258 }

270	259

271

272 bool Utf8::DecodeCStringToUTF32(const char* str, int32_t* dst, intptr_t len) {	260 bool Utf8::DecodeCStringToUTF32(const char* str, int32_t* dst, intptr_t len) {

273 ASSERT(str != NULL);	261 ASSERT(str != NULL);

274 intptr_t array_len = strlen(str);	262 intptr_t array_len = strlen(str);

275 const uint8_t* utf8_array = reinterpret_cast<const uint8_t*>(str);	263 const uint8_t* utf8_array = reinterpret_cast<const uint8_t*>(str);

276 return Utf8::DecodeToUTF32(utf8_array, array_len, dst, len);	264 return Utf8::DecodeToUTF32(utf8_array, array_len, dst, len);

277 }	265 }

278	266

279

280 void Utf16::Encode(int32_t codepoint, uint16_t* dst) {	267 void Utf16::Encode(int32_t codepoint, uint16_t* dst) {

281 ASSERT(codepoint > Utf16::kMaxCodeUnit);	268 ASSERT(codepoint > Utf16::kMaxCodeUnit);

282 ASSERT(dst != NULL);	269 ASSERT(dst != NULL);

283 dst[0] = (Utf16::kLeadSurrogateOffset + (codepoint >> 10));	270 dst[0] = (Utf16::kLeadSurrogateOffset + (codepoint >> 10));

284 dst[1] = (0xDC00 + (codepoint & 0x3FF));	271 dst[1] = (0xDC00 + (codepoint & 0x3FF));

285 }	272 }

286	273

287 } // namespace dart	274 } // namespace dart

OLD	NEW

« no previous file with comments | « runtime/vm/unicode.h ('k') | runtime/vm/unicode_test.cc » ('j') | no next file with comments »