OLD | NEW |
1 // Copyright 2007-2008 the V8 project authors. All rights reserved. | 1 // Copyright 2007-2008 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 176 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
187 return -1; | 187 return -1; |
188 } | 188 } |
189 } else { | 189 } else { |
190 return 0; | 190 return 0; |
191 } | 191 } |
192 } | 192 } |
193 | 193 |
194 uchar Utf8::CalculateValue(const byte* str, | 194 uchar Utf8::CalculateValue(const byte* str, |
195 unsigned length, | 195 unsigned length, |
196 unsigned* cursor) { | 196 unsigned* cursor) { |
197 static const uchar kMaxOneByteChar = 0x7F; | |
198 static const uchar kMaxTwoByteChar = 0x7FF; | |
199 static const uchar kMaxThreeByteChar = 0xFFFF; | |
200 static const uchar kMaxFourByteChar = 0x1FFFFF; | |
201 | |
202 // We only get called for non-ascii characters. | 197 // We only get called for non-ascii characters. |
203 if (length == 1) { | 198 if (length == 1) { |
204 *cursor += 1; | 199 *cursor += 1; |
205 return kBadChar; | 200 return kBadChar; |
206 } | 201 } |
207 int first = str[0]; | 202 byte first = str[0]; |
208 int second = str[1] ^ 0x80; | 203 byte second = str[1] ^ 0x80; |
209 if (second & 0xC0) { | 204 if (second & 0xC0) { |
210 *cursor += 1; | 205 *cursor += 1; |
211 return kBadChar; | 206 return kBadChar; |
212 } | 207 } |
213 if (first < 0xE0) { | 208 if (first < 0xE0) { |
214 if (first < 0xC0) { | 209 if (first < 0xC0) { |
215 *cursor += 1; | 210 *cursor += 1; |
216 return kBadChar; | 211 return kBadChar; |
217 } | 212 } |
218 uchar l = ((first << 6) | second) & kMaxTwoByteChar; | 213 uchar l = ((first << 6) | second) & kMaxTwoByteChar; |
219 if (l <= kMaxOneByteChar) { | 214 if (l <= kMaxOneByteChar) { |
220 *cursor += 1; | 215 *cursor += 1; |
221 return kBadChar; | 216 return kBadChar; |
222 } | 217 } |
223 *cursor += 2; | 218 *cursor += 2; |
224 return l; | 219 return l; |
225 } | 220 } |
226 if (length == 2) { | 221 if (length == 2) { |
227 *cursor += 1; | 222 *cursor += 1; |
228 return kBadChar; | 223 return kBadChar; |
229 } | 224 } |
230 int third = str[2] ^ 0x80; | 225 byte third = str[2] ^ 0x80; |
231 if (third & 0xC0) { | 226 if (third & 0xC0) { |
232 *cursor += 1; | 227 *cursor += 1; |
233 return kBadChar; | 228 return kBadChar; |
234 } | 229 } |
235 if (first < 0xF0) { | 230 if (first < 0xF0) { |
236 uchar l = ((((first << 6) | second) << 6) | third) & kMaxThreeByteChar; | 231 uchar l = ((((first << 6) | second) << 6) | third) & kMaxThreeByteChar; |
237 if (l <= kMaxTwoByteChar) { | 232 if (l <= kMaxTwoByteChar) { |
238 *cursor += 1; | 233 *cursor += 1; |
239 return kBadChar; | 234 return kBadChar; |
240 } | 235 } |
241 *cursor += 3; | 236 *cursor += 3; |
242 return l; | 237 return l; |
243 } | 238 } |
244 if (length == 3) { | 239 if (length == 3) { |
245 *cursor += 1; | 240 *cursor += 1; |
246 return kBadChar; | 241 return kBadChar; |
247 } | 242 } |
248 int fourth = str[3] ^ 0x80; | 243 byte fourth = str[3] ^ 0x80; |
249 if (fourth & 0xC0) { | 244 if (fourth & 0xC0) { |
250 *cursor += 1; | 245 *cursor += 1; |
251 return kBadChar; | 246 return kBadChar; |
252 } | 247 } |
253 if (first < 0xF8) { | 248 if (first < 0xF8) { |
254 uchar l = (((((first << 6 | second) << 6) | third) << 6) | fourth) & | 249 uchar l = (((((first << 6 | second) << 6) | third) << 6) | fourth) & |
255 kMaxFourByteChar; | 250 kMaxFourByteChar; |
256 if (l <= kMaxThreeByteChar) { | 251 if (l <= kMaxThreeByteChar) { |
257 *cursor += 1; | 252 *cursor += 1; |
258 return kBadChar; | 253 return kBadChar; |
(...skipping 486 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
745 } | 740 } |
746 | 741 |
747 | 742 |
748 uchar UnicodeData::kMaxCodePoint = 1114109; | 743 uchar UnicodeData::kMaxCodePoint = 1114109; |
749 | 744 |
750 int UnicodeData::GetByteCount() { | 745 int UnicodeData::GetByteCount() { |
751 return 0 + (sizeof(uint16_t) * kUppercaseTable0Size) + (sizeof(uint16_t) * kUp
percaseTable1Size) + (sizeof(uint16_t) * kUppercaseTable2Size) + (sizeof(uint16_
t) * kUppercaseTable3Size) + (sizeof(uint16_t) * kLowercaseTable0Size) + (sizeof
(uint16_t) * kLowercaseTable1Size) + (sizeof(uint16_t) * kLowercaseTable2Size) +
(sizeof(uint16_t) * kLowercaseTable3Size) + (sizeof(uint16_t) * kLetterTable0Si
ze) + (sizeof(uint16_t) * kLetterTable1Size) + (sizeof(uint16_t) * kLetterTable2
Size) + (sizeof(uint16_t) * kLetterTable3Size) + (sizeof(uint16_t) * kLetterTabl
e4Size) + (sizeof(uint16_t) * kLetterTable5Size) + (sizeof(uint16_t) * kSpaceTab
le0Size) + (sizeof(uint16_t) * kNumberTable0Size) + (sizeof(uint16_t) * kNumberT
able1Size) + (sizeof(uint16_t) * kNumberTable2Size) + (sizeof(uint16_t) * kNumbe
rTable3Size) + (sizeof(uint16_t) * kWhiteSpaceTable0Size) + (sizeof(uint16_t) *
kLineTerminatorTable0Size) + (sizeof(uint16_t) * kCombiningMarkTable0Size) + (si
zeof(uint16_t) * kCombiningMarkTable1Size) + (sizeof(uint16_t) * kCombiningMarkT
able2Size) + (sizeof(uint16_t) * kCombiningMarkTable3Size) + (sizeof(uint16_t) *
kCombiningMarkTable28Size) + (sizeof(uint16_t) * kConnectorPunctuationTable0Siz
e) + (sizeof(uint16_t) * kConnectorPunctuationTable1Size) + (sizeof(uint16_t) *
kToLowercaseTable0Size) + (sizeof(uint16_t) * kToLowercaseTable1Size) + (sizeof(
uint16_t) * kToLowercaseTable2Size) + (sizeof(uint16_t) * kToUppercaseTable0Size
) + (sizeof(uint16_t) * kToUppercaseTable1Size) + (sizeof(uint16_t) * kToUpperca
seTable2Size) + (sizeof(uint16_t) * kEcma262CanonicalizeTable0Size) + (sizeof(ui
nt16_t) * kEcma262CanonicalizeTable1Size) + (sizeof(uint16_t) * kEcma262Canonica
lizeTable2Size) + (sizeof(uint16_t) * kEcma262UnCanonicalizeTable0Size) + (sizeo
f(uint16_t) * kEcma262UnCanonicalizeTable1Size) + (sizeof(uint16_t) * kEcma262Un
CanonicalizeTable2Size) + (sizeof(uint16_t) * kCanonicalizationRangeTable0Size)
+ (sizeof(uint16_t) * kCanonicalizationRangeTable1Size); // NOLINT | 746 return 0 + (sizeof(uint16_t) * kUppercaseTable0Size) + (sizeof(uint16_t) * kUp
percaseTable1Size) + (sizeof(uint16_t) * kUppercaseTable2Size) + (sizeof(uint16_
t) * kUppercaseTable3Size) + (sizeof(uint16_t) * kLowercaseTable0Size) + (sizeof
(uint16_t) * kLowercaseTable1Size) + (sizeof(uint16_t) * kLowercaseTable2Size) +
(sizeof(uint16_t) * kLowercaseTable3Size) + (sizeof(uint16_t) * kLetterTable0Si
ze) + (sizeof(uint16_t) * kLetterTable1Size) + (sizeof(uint16_t) * kLetterTable2
Size) + (sizeof(uint16_t) * kLetterTable3Size) + (sizeof(uint16_t) * kLetterTabl
e4Size) + (sizeof(uint16_t) * kLetterTable5Size) + (sizeof(uint16_t) * kSpaceTab
le0Size) + (sizeof(uint16_t) * kNumberTable0Size) + (sizeof(uint16_t) * kNumberT
able1Size) + (sizeof(uint16_t) * kNumberTable2Size) + (sizeof(uint16_t) * kNumbe
rTable3Size) + (sizeof(uint16_t) * kWhiteSpaceTable0Size) + (sizeof(uint16_t) *
kLineTerminatorTable0Size) + (sizeof(uint16_t) * kCombiningMarkTable0Size) + (si
zeof(uint16_t) * kCombiningMarkTable1Size) + (sizeof(uint16_t) * kCombiningMarkT
able2Size) + (sizeof(uint16_t) * kCombiningMarkTable3Size) + (sizeof(uint16_t) *
kCombiningMarkTable28Size) + (sizeof(uint16_t) * kConnectorPunctuationTable0Siz
e) + (sizeof(uint16_t) * kConnectorPunctuationTable1Size) + (sizeof(uint16_t) *
kToLowercaseTable0Size) + (sizeof(uint16_t) * kToLowercaseTable1Size) + (sizeof(
uint16_t) * kToLowercaseTable2Size) + (sizeof(uint16_t) * kToUppercaseTable0Size
) + (sizeof(uint16_t) * kToUppercaseTable1Size) + (sizeof(uint16_t) * kToUpperca
seTable2Size) + (sizeof(uint16_t) * kEcma262CanonicalizeTable0Size) + (sizeof(ui
nt16_t) * kEcma262CanonicalizeTable1Size) + (sizeof(uint16_t) * kEcma262Canonica
lizeTable2Size) + (sizeof(uint16_t) * kEcma262UnCanonicalizeTable0Size) + (sizeo
f(uint16_t) * kEcma262UnCanonicalizeTable1Size) + (sizeof(uint16_t) * kEcma262Un
CanonicalizeTable2Size) + (sizeof(uint16_t) * kCanonicalizationRangeTable0Size)
+ (sizeof(uint16_t) * kCanonicalizationRangeTable1Size); // NOLINT |
752 } | 747 } |
753 | 748 |
754 } // namespace unicode | 749 } // namespace unicode |
OLD | NEW |