| OLD | NEW |
| 1 // Copyright 2007-2008 the V8 project authors. All rights reserved. | 1 // Copyright 2007-2008 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 176 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 187 return -1; | 187 return -1; |
| 188 } | 188 } |
| 189 } else { | 189 } else { |
| 190 return 0; | 190 return 0; |
| 191 } | 191 } |
| 192 } | 192 } |
| 193 | 193 |
| 194 uchar Utf8::CalculateValue(const byte* str, | 194 uchar Utf8::CalculateValue(const byte* str, |
| 195 unsigned length, | 195 unsigned length, |
| 196 unsigned* cursor) { | 196 unsigned* cursor) { |
| 197 static const uchar kMaxOneByteChar = 0x7F; | |
| 198 static const uchar kMaxTwoByteChar = 0x7FF; | |
| 199 static const uchar kMaxThreeByteChar = 0xFFFF; | |
| 200 static const uchar kMaxFourByteChar = 0x1FFFFF; | |
| 201 | |
| 202 // We only get called for non-ascii characters. | 197 // We only get called for non-ascii characters. |
| 203 if (length == 1) { | 198 if (length == 1) { |
| 204 *cursor += 1; | 199 *cursor += 1; |
| 205 return kBadChar; | 200 return kBadChar; |
| 206 } | 201 } |
| 207 int first = str[0]; | 202 byte first = str[0]; |
| 208 int second = str[1] ^ 0x80; | 203 byte second = str[1] ^ 0x80; |
| 209 if (second & 0xC0) { | 204 if (second & 0xC0) { |
| 210 *cursor += 1; | 205 *cursor += 1; |
| 211 return kBadChar; | 206 return kBadChar; |
| 212 } | 207 } |
| 213 if (first < 0xE0) { | 208 if (first < 0xE0) { |
| 214 if (first < 0xC0) { | 209 if (first < 0xC0) { |
| 215 *cursor += 1; | 210 *cursor += 1; |
| 216 return kBadChar; | 211 return kBadChar; |
| 217 } | 212 } |
| 218 uchar l = ((first << 6) | second) & kMaxTwoByteChar; | 213 uchar l = ((first << 6) | second) & kMaxTwoByteChar; |
| 219 if (l <= kMaxOneByteChar) { | 214 if (l <= kMaxOneByteChar) { |
| 220 *cursor += 1; | 215 *cursor += 1; |
| 221 return kBadChar; | 216 return kBadChar; |
| 222 } | 217 } |
| 223 *cursor += 2; | 218 *cursor += 2; |
| 224 return l; | 219 return l; |
| 225 } | 220 } |
| 226 if (length == 2) { | 221 if (length == 2) { |
| 227 *cursor += 1; | 222 *cursor += 1; |
| 228 return kBadChar; | 223 return kBadChar; |
| 229 } | 224 } |
| 230 int third = str[2] ^ 0x80; | 225 byte third = str[2] ^ 0x80; |
| 231 if (third & 0xC0) { | 226 if (third & 0xC0) { |
| 232 *cursor += 1; | 227 *cursor += 1; |
| 233 return kBadChar; | 228 return kBadChar; |
| 234 } | 229 } |
| 235 if (first < 0xF0) { | 230 if (first < 0xF0) { |
| 236 uchar l = ((((first << 6) | second) << 6) | third) & kMaxThreeByteChar; | 231 uchar l = ((((first << 6) | second) << 6) | third) & kMaxThreeByteChar; |
| 237 if (l <= kMaxTwoByteChar) { | 232 if (l <= kMaxTwoByteChar) { |
| 238 *cursor += 1; | 233 *cursor += 1; |
| 239 return kBadChar; | 234 return kBadChar; |
| 240 } | 235 } |
| 241 *cursor += 3; | 236 *cursor += 3; |
| 242 return l; | 237 return l; |
| 243 } | 238 } |
| 244 if (length == 3) { | 239 if (length == 3) { |
| 245 *cursor += 1; | 240 *cursor += 1; |
| 246 return kBadChar; | 241 return kBadChar; |
| 247 } | 242 } |
| 248 int fourth = str[3] ^ 0x80; | 243 byte fourth = str[3] ^ 0x80; |
| 249 if (fourth & 0xC0) { | 244 if (fourth & 0xC0) { |
| 250 *cursor += 1; | 245 *cursor += 1; |
| 251 return kBadChar; | 246 return kBadChar; |
| 252 } | 247 } |
| 253 if (first < 0xF8) { | 248 if (first < 0xF8) { |
| 254 uchar l = (((((first << 6 | second) << 6) | third) << 6) | fourth) & | 249 uchar l = (((((first << 6 | second) << 6) | third) << 6) | fourth) & |
| 255 kMaxFourByteChar; | 250 kMaxFourByteChar; |
| 256 if (l <= kMaxThreeByteChar) { | 251 if (l <= kMaxThreeByteChar) { |
| 257 *cursor += 1; | 252 *cursor += 1; |
| 258 return kBadChar; | 253 return kBadChar; |
| (...skipping 486 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 745 } | 740 } |
| 746 | 741 |
| 747 | 742 |
| 748 uchar UnicodeData::kMaxCodePoint = 1114109; | 743 uchar UnicodeData::kMaxCodePoint = 1114109; |
| 749 | 744 |
| 750 int UnicodeData::GetByteCount() { | 745 int UnicodeData::GetByteCount() { |
| 751 return 0 + (sizeof(uint16_t) * kUppercaseTable0Size) + (sizeof(uint16_t) * kUp
percaseTable1Size) + (sizeof(uint16_t) * kUppercaseTable2Size) + (sizeof(uint16_
t) * kUppercaseTable3Size) + (sizeof(uint16_t) * kLowercaseTable0Size) + (sizeof
(uint16_t) * kLowercaseTable1Size) + (sizeof(uint16_t) * kLowercaseTable2Size) +
(sizeof(uint16_t) * kLowercaseTable3Size) + (sizeof(uint16_t) * kLetterTable0Si
ze) + (sizeof(uint16_t) * kLetterTable1Size) + (sizeof(uint16_t) * kLetterTable2
Size) + (sizeof(uint16_t) * kLetterTable3Size) + (sizeof(uint16_t) * kLetterTabl
e4Size) + (sizeof(uint16_t) * kLetterTable5Size) + (sizeof(uint16_t) * kSpaceTab
le0Size) + (sizeof(uint16_t) * kNumberTable0Size) + (sizeof(uint16_t) * kNumberT
able1Size) + (sizeof(uint16_t) * kNumberTable2Size) + (sizeof(uint16_t) * kNumbe
rTable3Size) + (sizeof(uint16_t) * kWhiteSpaceTable0Size) + (sizeof(uint16_t) *
kLineTerminatorTable0Size) + (sizeof(uint16_t) * kCombiningMarkTable0Size) + (si
zeof(uint16_t) * kCombiningMarkTable1Size) + (sizeof(uint16_t) * kCombiningMarkT
able2Size) + (sizeof(uint16_t) * kCombiningMarkTable3Size) + (sizeof(uint16_t) *
kCombiningMarkTable28Size) + (sizeof(uint16_t) * kConnectorPunctuationTable0Siz
e) + (sizeof(uint16_t) * kConnectorPunctuationTable1Size) + (sizeof(uint16_t) *
kToLowercaseTable0Size) + (sizeof(uint16_t) * kToLowercaseTable1Size) + (sizeof(
uint16_t) * kToLowercaseTable2Size) + (sizeof(uint16_t) * kToUppercaseTable0Size
) + (sizeof(uint16_t) * kToUppercaseTable1Size) + (sizeof(uint16_t) * kToUpperca
seTable2Size) + (sizeof(uint16_t) * kEcma262CanonicalizeTable0Size) + (sizeof(ui
nt16_t) * kEcma262CanonicalizeTable1Size) + (sizeof(uint16_t) * kEcma262Canonica
lizeTable2Size) + (sizeof(uint16_t) * kEcma262UnCanonicalizeTable0Size) + (sizeo
f(uint16_t) * kEcma262UnCanonicalizeTable1Size) + (sizeof(uint16_t) * kEcma262Un
CanonicalizeTable2Size) + (sizeof(uint16_t) * kCanonicalizationRangeTable0Size)
+ (sizeof(uint16_t) * kCanonicalizationRangeTable1Size); // NOLINT | 746 return 0 + (sizeof(uint16_t) * kUppercaseTable0Size) + (sizeof(uint16_t) * kUp
percaseTable1Size) + (sizeof(uint16_t) * kUppercaseTable2Size) + (sizeof(uint16_
t) * kUppercaseTable3Size) + (sizeof(uint16_t) * kLowercaseTable0Size) + (sizeof
(uint16_t) * kLowercaseTable1Size) + (sizeof(uint16_t) * kLowercaseTable2Size) +
(sizeof(uint16_t) * kLowercaseTable3Size) + (sizeof(uint16_t) * kLetterTable0Si
ze) + (sizeof(uint16_t) * kLetterTable1Size) + (sizeof(uint16_t) * kLetterTable2
Size) + (sizeof(uint16_t) * kLetterTable3Size) + (sizeof(uint16_t) * kLetterTabl
e4Size) + (sizeof(uint16_t) * kLetterTable5Size) + (sizeof(uint16_t) * kSpaceTab
le0Size) + (sizeof(uint16_t) * kNumberTable0Size) + (sizeof(uint16_t) * kNumberT
able1Size) + (sizeof(uint16_t) * kNumberTable2Size) + (sizeof(uint16_t) * kNumbe
rTable3Size) + (sizeof(uint16_t) * kWhiteSpaceTable0Size) + (sizeof(uint16_t) *
kLineTerminatorTable0Size) + (sizeof(uint16_t) * kCombiningMarkTable0Size) + (si
zeof(uint16_t) * kCombiningMarkTable1Size) + (sizeof(uint16_t) * kCombiningMarkT
able2Size) + (sizeof(uint16_t) * kCombiningMarkTable3Size) + (sizeof(uint16_t) *
kCombiningMarkTable28Size) + (sizeof(uint16_t) * kConnectorPunctuationTable0Siz
e) + (sizeof(uint16_t) * kConnectorPunctuationTable1Size) + (sizeof(uint16_t) *
kToLowercaseTable0Size) + (sizeof(uint16_t) * kToLowercaseTable1Size) + (sizeof(
uint16_t) * kToLowercaseTable2Size) + (sizeof(uint16_t) * kToUppercaseTable0Size
) + (sizeof(uint16_t) * kToUppercaseTable1Size) + (sizeof(uint16_t) * kToUpperca
seTable2Size) + (sizeof(uint16_t) * kEcma262CanonicalizeTable0Size) + (sizeof(ui
nt16_t) * kEcma262CanonicalizeTable1Size) + (sizeof(uint16_t) * kEcma262Canonica
lizeTable2Size) + (sizeof(uint16_t) * kEcma262UnCanonicalizeTable0Size) + (sizeo
f(uint16_t) * kEcma262UnCanonicalizeTable1Size) + (sizeof(uint16_t) * kEcma262Un
CanonicalizeTable2Size) + (sizeof(uint16_t) * kCanonicalizationRangeTable0Size)
+ (sizeof(uint16_t) * kCanonicalizationRangeTable1Size); // NOLINT |
| 752 } | 747 } |
| 753 | 748 |
| 754 } // namespace unicode | 749 } // namespace unicode |
| OLD | NEW |