Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(46)

Side by Side Diff: src/unicode.cc

Issue 155414: Small cleanup to Utf8::CalculateValue: (Closed)
Patch Set: Created 11 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2007-2008 the V8 project authors. All rights reserved. 1 // Copyright 2007-2008 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 176 matching lines...) Expand 10 before | Expand all | Expand 10 after
187 return -1; 187 return -1;
188 } 188 }
189 } else { 189 } else {
190 return 0; 190 return 0;
191 } 191 }
192 } 192 }
193 193
194 uchar Utf8::CalculateValue(const byte* str, 194 uchar Utf8::CalculateValue(const byte* str,
195 unsigned length, 195 unsigned length,
196 unsigned* cursor) { 196 unsigned* cursor) {
197 static const uchar kMaxOneByteChar = 0x7F;
198 static const uchar kMaxTwoByteChar = 0x7FF;
199 static const uchar kMaxThreeByteChar = 0xFFFF;
200 static const uchar kMaxFourByteChar = 0x1FFFFF;
201
202 // We only get called for non-ascii characters. 197 // We only get called for non-ascii characters.
203 if (length == 1) { 198 if (length == 1) {
204 *cursor += 1; 199 *cursor += 1;
205 return kBadChar; 200 return kBadChar;
206 } 201 }
207 int first = str[0]; 202 byte first = str[0];
208 int second = str[1] ^ 0x80; 203 byte second = str[1] ^ 0x80;
209 if (second & 0xC0) { 204 if (second & 0xC0) {
210 *cursor += 1; 205 *cursor += 1;
211 return kBadChar; 206 return kBadChar;
212 } 207 }
213 if (first < 0xE0) { 208 if (first < 0xE0) {
214 if (first < 0xC0) { 209 if (first < 0xC0) {
215 *cursor += 1; 210 *cursor += 1;
216 return kBadChar; 211 return kBadChar;
217 } 212 }
218 uchar l = ((first << 6) | second) & kMaxTwoByteChar; 213 uchar l = ((first << 6) | second) & kMaxTwoByteChar;
219 if (l <= kMaxOneByteChar) { 214 if (l <= kMaxOneByteChar) {
220 *cursor += 1; 215 *cursor += 1;
221 return kBadChar; 216 return kBadChar;
222 } 217 }
223 *cursor += 2; 218 *cursor += 2;
224 return l; 219 return l;
225 } 220 }
226 if (length == 2) { 221 if (length == 2) {
227 *cursor += 1; 222 *cursor += 1;
228 return kBadChar; 223 return kBadChar;
229 } 224 }
230 int third = str[2] ^ 0x80; 225 byte third = str[2] ^ 0x80;
231 if (third & 0xC0) { 226 if (third & 0xC0) {
232 *cursor += 1; 227 *cursor += 1;
233 return kBadChar; 228 return kBadChar;
234 } 229 }
235 if (first < 0xF0) { 230 if (first < 0xF0) {
236 uchar l = ((((first << 6) | second) << 6) | third) & kMaxThreeByteChar; 231 uchar l = ((((first << 6) | second) << 6) | third) & kMaxThreeByteChar;
237 if (l <= kMaxTwoByteChar) { 232 if (l <= kMaxTwoByteChar) {
238 *cursor += 1; 233 *cursor += 1;
239 return kBadChar; 234 return kBadChar;
240 } 235 }
241 *cursor += 3; 236 *cursor += 3;
242 return l; 237 return l;
243 } 238 }
244 if (length == 3) { 239 if (length == 3) {
245 *cursor += 1; 240 *cursor += 1;
246 return kBadChar; 241 return kBadChar;
247 } 242 }
248 int fourth = str[3] ^ 0x80; 243 byte fourth = str[3] ^ 0x80;
249 if (fourth & 0xC0) { 244 if (fourth & 0xC0) {
250 *cursor += 1; 245 *cursor += 1;
251 return kBadChar; 246 return kBadChar;
252 } 247 }
253 if (first < 0xF8) { 248 if (first < 0xF8) {
254 uchar l = (((((first << 6 | second) << 6) | third) << 6) | fourth) & 249 uchar l = (((((first << 6 | second) << 6) | third) << 6) | fourth) &
255 kMaxFourByteChar; 250 kMaxFourByteChar;
256 if (l <= kMaxThreeByteChar) { 251 if (l <= kMaxThreeByteChar) {
257 *cursor += 1; 252 *cursor += 1;
258 return kBadChar; 253 return kBadChar;
(...skipping 486 matching lines...) Expand 10 before | Expand all | Expand 10 after
745 } 740 }
746 741
747 742
748 uchar UnicodeData::kMaxCodePoint = 1114109; 743 uchar UnicodeData::kMaxCodePoint = 1114109;
749 744
750 int UnicodeData::GetByteCount() { 745 int UnicodeData::GetByteCount() {
751 return 0 + (sizeof(uint16_t) * kUppercaseTable0Size) + (sizeof(uint16_t) * kUp percaseTable1Size) + (sizeof(uint16_t) * kUppercaseTable2Size) + (sizeof(uint16_ t) * kUppercaseTable3Size) + (sizeof(uint16_t) * kLowercaseTable0Size) + (sizeof (uint16_t) * kLowercaseTable1Size) + (sizeof(uint16_t) * kLowercaseTable2Size) + (sizeof(uint16_t) * kLowercaseTable3Size) + (sizeof(uint16_t) * kLetterTable0Si ze) + (sizeof(uint16_t) * kLetterTable1Size) + (sizeof(uint16_t) * kLetterTable2 Size) + (sizeof(uint16_t) * kLetterTable3Size) + (sizeof(uint16_t) * kLetterTabl e4Size) + (sizeof(uint16_t) * kLetterTable5Size) + (sizeof(uint16_t) * kSpaceTab le0Size) + (sizeof(uint16_t) * kNumberTable0Size) + (sizeof(uint16_t) * kNumberT able1Size) + (sizeof(uint16_t) * kNumberTable2Size) + (sizeof(uint16_t) * kNumbe rTable3Size) + (sizeof(uint16_t) * kWhiteSpaceTable0Size) + (sizeof(uint16_t) * kLineTerminatorTable0Size) + (sizeof(uint16_t) * kCombiningMarkTable0Size) + (si zeof(uint16_t) * kCombiningMarkTable1Size) + (sizeof(uint16_t) * kCombiningMarkT able2Size) + (sizeof(uint16_t) * kCombiningMarkTable3Size) + (sizeof(uint16_t) * kCombiningMarkTable28Size) + (sizeof(uint16_t) * kConnectorPunctuationTable0Siz e) + (sizeof(uint16_t) * kConnectorPunctuationTable1Size) + (sizeof(uint16_t) * kToLowercaseTable0Size) + (sizeof(uint16_t) * kToLowercaseTable1Size) + (sizeof( uint16_t) * kToLowercaseTable2Size) + (sizeof(uint16_t) * kToUppercaseTable0Size ) + (sizeof(uint16_t) * kToUppercaseTable1Size) + (sizeof(uint16_t) * kToUpperca seTable2Size) + (sizeof(uint16_t) * kEcma262CanonicalizeTable0Size) + (sizeof(ui nt16_t) * kEcma262CanonicalizeTable1Size) + (sizeof(uint16_t) * kEcma262Canonica lizeTable2Size) + (sizeof(uint16_t) * kEcma262UnCanonicalizeTable0Size) + (sizeo f(uint16_t) * kEcma262UnCanonicalizeTable1Size) + (sizeof(uint16_t) * kEcma262Un CanonicalizeTable2Size) + (sizeof(uint16_t) * kCanonicalizationRangeTable0Size) + (sizeof(uint16_t) * kCanonicalizationRangeTable1Size); // NOLINT 746 return 0 + (sizeof(uint16_t) * kUppercaseTable0Size) + (sizeof(uint16_t) * kUp percaseTable1Size) + (sizeof(uint16_t) * kUppercaseTable2Size) + (sizeof(uint16_ t) * kUppercaseTable3Size) + (sizeof(uint16_t) * kLowercaseTable0Size) + (sizeof (uint16_t) * kLowercaseTable1Size) + (sizeof(uint16_t) * kLowercaseTable2Size) + (sizeof(uint16_t) * kLowercaseTable3Size) + (sizeof(uint16_t) * kLetterTable0Si ze) + (sizeof(uint16_t) * kLetterTable1Size) + (sizeof(uint16_t) * kLetterTable2 Size) + (sizeof(uint16_t) * kLetterTable3Size) + (sizeof(uint16_t) * kLetterTabl e4Size) + (sizeof(uint16_t) * kLetterTable5Size) + (sizeof(uint16_t) * kSpaceTab le0Size) + (sizeof(uint16_t) * kNumberTable0Size) + (sizeof(uint16_t) * kNumberT able1Size) + (sizeof(uint16_t) * kNumberTable2Size) + (sizeof(uint16_t) * kNumbe rTable3Size) + (sizeof(uint16_t) * kWhiteSpaceTable0Size) + (sizeof(uint16_t) * kLineTerminatorTable0Size) + (sizeof(uint16_t) * kCombiningMarkTable0Size) + (si zeof(uint16_t) * kCombiningMarkTable1Size) + (sizeof(uint16_t) * kCombiningMarkT able2Size) + (sizeof(uint16_t) * kCombiningMarkTable3Size) + (sizeof(uint16_t) * kCombiningMarkTable28Size) + (sizeof(uint16_t) * kConnectorPunctuationTable0Siz e) + (sizeof(uint16_t) * kConnectorPunctuationTable1Size) + (sizeof(uint16_t) * kToLowercaseTable0Size) + (sizeof(uint16_t) * kToLowercaseTable1Size) + (sizeof( uint16_t) * kToLowercaseTable2Size) + (sizeof(uint16_t) * kToUppercaseTable0Size ) + (sizeof(uint16_t) * kToUppercaseTable1Size) + (sizeof(uint16_t) * kToUpperca seTable2Size) + (sizeof(uint16_t) * kEcma262CanonicalizeTable0Size) + (sizeof(ui nt16_t) * kEcma262CanonicalizeTable1Size) + (sizeof(uint16_t) * kEcma262Canonica lizeTable2Size) + (sizeof(uint16_t) * kEcma262UnCanonicalizeTable0Size) + (sizeo f(uint16_t) * kEcma262UnCanonicalizeTable1Size) + (sizeof(uint16_t) * kEcma262Un CanonicalizeTable2Size) + (sizeof(uint16_t) * kCanonicalizationRangeTable0Size) + (sizeof(uint16_t) * kCanonicalizationRangeTable1Size); // NOLINT
752 } 747 }
753 748
754 } // namespace unicode 749 } // namespace unicode
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698