OLD | NEW |
---|---|
1 // Copyright 2007-2010 the V8 project authors. All rights reserved. | 1 // Copyright 2007-2010 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
71 } else { | 71 } else { |
72 entries_[c & kMask] = CacheEntry(c, 0); | 72 entries_[c & kMask] = CacheEntry(c, 0); |
73 return 0; | 73 return 0; |
74 } | 74 } |
75 } else { | 75 } else { |
76 return length; | 76 return length; |
77 } | 77 } |
78 } | 78 } |
79 | 79 |
80 | 80 |
81 unsigned Utf8::Encode(char* str, uchar c) { | 81 unsigned Utf8::Encode(char* str, uchar c, int previous) { |
82 static const int kMask = ~(1 << 6); | 82 static const int kMask = ~(1 << 6); |
83 if (c <= kMaxOneByteChar) { | 83 if (c <= kMaxOneByteChar) { |
84 str[0] = c; | 84 str[0] = c; |
85 return 1; | 85 return 1; |
86 } else if (c <= kMaxTwoByteChar) { | 86 } else if (c <= kMaxTwoByteChar) { |
87 str[0] = 0xC0 | (c >> 6); | 87 str[0] = 0xC0 | (c >> 6); |
88 str[1] = 0x80 | (c & kMask); | 88 str[1] = 0x80 | (c & kMask); |
89 return 2; | 89 return 2; |
90 } else if (c <= kMaxThreeByteChar) { | 90 } else if (c <= kMaxThreeByteChar) { |
91 if (Utf16::IsTrailSurrogate(c) && | |
92 previous != kNoPreviousCharacter && | |
rossberg
2012/03/07 13:32:47
Isn't that implied by Utf16::IsLeadSurrogate(previ
Erik Corry
2012/03/11 19:29:22
No, but it should be. Fixed.
| |
93 Utf16::IsLeadSurrogate(previous)) { | |
94 return Encode(str - 3, | |
95 Utf16::CombineSurrogatePair(previous, c), | |
96 Utf8::kNoPreviousCharacter) - 3; | |
rossberg
2012/03/07 13:32:47
kSizeOfUnmatchedSurrogate instead of 3 perhaps (he
Erik Corry
2012/03/11 19:29:22
Done.
| |
97 } | |
91 str[0] = 0xE0 | (c >> 12); | 98 str[0] = 0xE0 | (c >> 12); |
92 str[1] = 0x80 | ((c >> 6) & kMask); | 99 str[1] = 0x80 | ((c >> 6) & kMask); |
93 str[2] = 0x80 | (c & kMask); | 100 str[2] = 0x80 | (c & kMask); |
94 return 3; | 101 return 3; |
95 } else { | 102 } else { |
96 str[0] = 0xF0 | (c >> 18); | 103 str[0] = 0xF0 | (c >> 18); |
97 str[1] = 0x80 | ((c >> 12) & kMask); | 104 str[1] = 0x80 | ((c >> 12) & kMask); |
98 str[2] = 0x80 | ((c >> 6) & kMask); | 105 str[2] = 0x80 | ((c >> 6) & kMask); |
99 str[3] = 0x80 | (c & kMask); | 106 str[3] = 0x80 | (c & kMask); |
100 return 4; | 107 return 4; |
101 } | 108 } |
102 } | 109 } |
103 | 110 |
104 | 111 |
105 uchar Utf8::ValueOf(const byte* bytes, unsigned length, unsigned* cursor) { | 112 uchar Utf8::ValueOf(const byte* bytes, unsigned length, unsigned* cursor) { |
106 if (length <= 0) return kBadChar; | 113 if (length <= 0) return kBadChar; |
107 byte first = bytes[0]; | 114 byte first = bytes[0]; |
108 // Characters between 0000 and 0007F are encoded as a single character | 115 // Characters between 0000 and 0007F are encoded as a single character |
109 if (first <= kMaxOneByteChar) { | 116 if (first <= kMaxOneByteChar) { |
110 *cursor += 1; | 117 *cursor += 1; |
111 return first; | 118 return first; |
112 } | 119 } |
113 return CalculateValue(bytes, length, cursor); | 120 return CalculateValue(bytes, length, cursor); |
114 } | 121 } |
115 | 122 |
116 unsigned Utf8::Length(uchar c) { | 123 unsigned Utf8::Length(uchar c, int previous) { |
117 if (c <= kMaxOneByteChar) { | 124 if (c <= kMaxOneByteChar) { |
118 return 1; | 125 return 1; |
119 } else if (c <= kMaxTwoByteChar) { | 126 } else if (c <= kMaxTwoByteChar) { |
120 return 2; | 127 return 2; |
121 } else if (c <= kMaxThreeByteChar) { | 128 } else if (c <= kMaxThreeByteChar) { |
129 if (Utf16::IsTrailSurrogate(c) && | |
130 previous != kNoPreviousCharacter && | |
rossberg
2012/03/07 13:32:47
See above.
Erik Corry
2012/03/11 19:29:22
Done.
| |
131 Utf16::IsLeadSurrogate(previous)) { | |
132 return 1; | |
rossberg
2012/03/07 13:32:47
This is 4 - 3 already counted, I suppose.
Erik Corry
2012/03/11 19:29:22
Fixed
| |
133 } | |
122 return 3; | 134 return 3; |
123 } else { | 135 } else { |
124 return 4; | 136 return 4; |
125 } | 137 } |
126 } | 138 } |
127 | 139 |
128 uchar CharacterStream::GetNext() { | 140 uchar CharacterStream::GetNext() { |
129 uchar result = DecodeCharacter(buffer_, &cursor_); | 141 uchar result = DecodeCharacter(buffer_, &cursor_); |
130 if (remaining_ == 1) { | 142 if (remaining_ == 1) { |
131 cursor_ = 0; | 143 cursor_ = 0; |
(...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
229 | 241 |
230 template <unsigned s> | 242 template <unsigned s> |
231 Utf8InputBuffer<s>::Utf8InputBuffer(const char* data, unsigned length) | 243 Utf8InputBuffer<s>::Utf8InputBuffer(const char* data, unsigned length) |
232 : InputBuffer<Utf8, Buffer<const char*>, s>(Buffer<const char*>(data, | 244 : InputBuffer<Utf8, Buffer<const char*>, s>(Buffer<const char*>(data, |
233 length)) { | 245 length)) { |
234 } | 246 } |
235 | 247 |
236 } // namespace unibrow | 248 } // namespace unibrow |
237 | 249 |
238 #endif // V8_UNICODE_INL_H_ | 250 #endif // V8_UNICODE_INL_H_ |
OLD | NEW |