Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2007-2010 the V8 project authors. All rights reserved. | 1 // Copyright 2007-2010 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 71 } else { | 71 } else { |
| 72 entries_[c & kMask] = CacheEntry(c, 0); | 72 entries_[c & kMask] = CacheEntry(c, 0); |
| 73 return 0; | 73 return 0; |
| 74 } | 74 } |
| 75 } else { | 75 } else { |
| 76 return length; | 76 return length; |
| 77 } | 77 } |
| 78 } | 78 } |
| 79 | 79 |
| 80 | 80 |
| 81 unsigned Utf8::Encode(char* str, uchar c) { | 81 unsigned Utf8::Encode(char* str, uchar c, int previous) { |
| 82 static const int kMask = ~(1 << 6); | 82 static const int kMask = ~(1 << 6); |
| 83 if (c <= kMaxOneByteChar) { | 83 if (c <= kMaxOneByteChar) { |
| 84 str[0] = c; | 84 str[0] = c; |
| 85 return 1; | 85 return 1; |
| 86 } else if (c <= kMaxTwoByteChar) { | 86 } else if (c <= kMaxTwoByteChar) { |
| 87 str[0] = 0xC0 | (c >> 6); | 87 str[0] = 0xC0 | (c >> 6); |
| 88 str[1] = 0x80 | (c & kMask); | 88 str[1] = 0x80 | (c & kMask); |
| 89 return 2; | 89 return 2; |
| 90 } else if (c <= kMaxThreeByteChar) { | 90 } else if (c <= kMaxThreeByteChar) { |
| 91 if (Utf16::IsTrailSurrogate(c) && | |
| 92 previous != kNoPreviousCharacter && | |
|
rossberg
2012/03/07 13:32:47
Isn't that implied by Utf16::IsLeadSurrogate(previ
Erik Corry
2012/03/11 19:29:22
No, but it should be. Fixed.
| |
| 93 Utf16::IsLeadSurrogate(previous)) { | |
| 94 return Encode(str - 3, | |
| 95 Utf16::CombineSurrogatePair(previous, c), | |
| 96 Utf8::kNoPreviousCharacter) - 3; | |
|
rossberg
2012/03/07 13:32:47
kSizeOfUnmatchedSurrogate instead of 3 perhaps (he
Erik Corry
2012/03/11 19:29:22
Done.
| |
| 97 } | |
| 91 str[0] = 0xE0 | (c >> 12); | 98 str[0] = 0xE0 | (c >> 12); |
| 92 str[1] = 0x80 | ((c >> 6) & kMask); | 99 str[1] = 0x80 | ((c >> 6) & kMask); |
| 93 str[2] = 0x80 | (c & kMask); | 100 str[2] = 0x80 | (c & kMask); |
| 94 return 3; | 101 return 3; |
| 95 } else { | 102 } else { |
| 96 str[0] = 0xF0 | (c >> 18); | 103 str[0] = 0xF0 | (c >> 18); |
| 97 str[1] = 0x80 | ((c >> 12) & kMask); | 104 str[1] = 0x80 | ((c >> 12) & kMask); |
| 98 str[2] = 0x80 | ((c >> 6) & kMask); | 105 str[2] = 0x80 | ((c >> 6) & kMask); |
| 99 str[3] = 0x80 | (c & kMask); | 106 str[3] = 0x80 | (c & kMask); |
| 100 return 4; | 107 return 4; |
| 101 } | 108 } |
| 102 } | 109 } |
| 103 | 110 |
| 104 | 111 |
| 105 uchar Utf8::ValueOf(const byte* bytes, unsigned length, unsigned* cursor) { | 112 uchar Utf8::ValueOf(const byte* bytes, unsigned length, unsigned* cursor) { |
| 106 if (length <= 0) return kBadChar; | 113 if (length <= 0) return kBadChar; |
| 107 byte first = bytes[0]; | 114 byte first = bytes[0]; |
| 108 // Characters between 0000 and 0007F are encoded as a single character | 115 // Characters between 0000 and 0007F are encoded as a single character |
| 109 if (first <= kMaxOneByteChar) { | 116 if (first <= kMaxOneByteChar) { |
| 110 *cursor += 1; | 117 *cursor += 1; |
| 111 return first; | 118 return first; |
| 112 } | 119 } |
| 113 return CalculateValue(bytes, length, cursor); | 120 return CalculateValue(bytes, length, cursor); |
| 114 } | 121 } |
| 115 | 122 |
| 116 unsigned Utf8::Length(uchar c) { | 123 unsigned Utf8::Length(uchar c, int previous) { |
| 117 if (c <= kMaxOneByteChar) { | 124 if (c <= kMaxOneByteChar) { |
| 118 return 1; | 125 return 1; |
| 119 } else if (c <= kMaxTwoByteChar) { | 126 } else if (c <= kMaxTwoByteChar) { |
| 120 return 2; | 127 return 2; |
| 121 } else if (c <= kMaxThreeByteChar) { | 128 } else if (c <= kMaxThreeByteChar) { |
| 129 if (Utf16::IsTrailSurrogate(c) && | |
| 130 previous != kNoPreviousCharacter && | |
|
rossberg
2012/03/07 13:32:47
See above.
Erik Corry
2012/03/11 19:29:22
Done.
| |
| 131 Utf16::IsLeadSurrogate(previous)) { | |
| 132 return 1; | |
|
rossberg
2012/03/07 13:32:47
This is 4 - 3 already counted, I suppose.
Erik Corry
2012/03/11 19:29:22
Fixed
| |
| 133 } | |
| 122 return 3; | 134 return 3; |
| 123 } else { | 135 } else { |
| 124 return 4; | 136 return 4; |
| 125 } | 137 } |
| 126 } | 138 } |
| 127 | 139 |
| 128 uchar CharacterStream::GetNext() { | 140 uchar CharacterStream::GetNext() { |
| 129 uchar result = DecodeCharacter(buffer_, &cursor_); | 141 uchar result = DecodeCharacter(buffer_, &cursor_); |
| 130 if (remaining_ == 1) { | 142 if (remaining_ == 1) { |
| 131 cursor_ = 0; | 143 cursor_ = 0; |
| (...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 229 | 241 |
| 230 template <unsigned s> | 242 template <unsigned s> |
| 231 Utf8InputBuffer<s>::Utf8InputBuffer(const char* data, unsigned length) | 243 Utf8InputBuffer<s>::Utf8InputBuffer(const char* data, unsigned length) |
| 232 : InputBuffer<Utf8, Buffer<const char*>, s>(Buffer<const char*>(data, | 244 : InputBuffer<Utf8, Buffer<const char*>, s>(Buffer<const char*>(data, |
| 233 length)) { | 245 length)) { |
| 234 } | 246 } |
| 235 | 247 |
| 236 } // namespace unibrow | 248 } // namespace unibrow |
| 237 | 249 |
| 238 #endif // V8_UNICODE_INL_H_ | 250 #endif // V8_UNICODE_INL_H_ |
| OLD | NEW |