OLD | NEW |
---|---|
1 // Copyright 2007-2010 the V8 project authors. All rights reserved. | 1 // Copyright 2007-2010 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 89 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
100 static const int kMask = ~(1 << 6); | 100 static const int kMask = ~(1 << 6); |
101 if (c <= kMaxOneByteChar) { | 101 if (c <= kMaxOneByteChar) { |
102 str[0] = c; | 102 str[0] = c; |
103 return 1; | 103 return 1; |
104 } | 104 } |
105 str[0] = 0xC0 | (c >> 6); | 105 str[0] = 0xC0 | (c >> 6); |
106 str[1] = 0x80 | (c & kMask); | 106 str[1] = 0x80 | (c & kMask); |
107 return 2; | 107 return 2; |
108 } | 108 } |
109 | 109 |
110 // Encode encodes the unicode code point c into the given str buffer. Unless | |
111 // allow_invalid is set to true, surrogate code points will be replaced with | |
112 // kReplacementCharacter. The caller is required to combine surrogate pairs | |
113 // into code points before calling Encode. | |
114 unsigned Utf8::Encode(char* str, uchar c, bool allow_invalid) { | |
dcarney
2014/01/04 15:56:45
the name of the third argument is not the same as
haimuiba
2014/01/06 05:40:18
Thx, will fix.
| |
115 if (!allow_invalid && | |
116 Utf16::IsLeadSurrogate(c) || | |
117 Utf16::IsTrailSurrogate(c)) { | |
118 c = kReplacementCharacter; | |
119 } | |
110 | 120 |
111 unsigned Utf8::Encode(char* str, uchar c, int previous) { | |
112 static const int kMask = ~(1 << 6); | 121 static const int kMask = ~(1 << 6); |
113 if (c <= kMaxOneByteChar) { | 122 if (c <= kMaxOneByteChar) { |
114 str[0] = c; | 123 str[0] = c; |
115 return 1; | 124 return 1; |
116 } else if (c <= kMaxTwoByteChar) { | 125 } else if (c <= kMaxTwoByteChar) { |
117 str[0] = 0xC0 | (c >> 6); | 126 str[0] = 0xC0 | (c >> 6); |
118 str[1] = 0x80 | (c & kMask); | 127 str[1] = 0x80 | (c & kMask); |
119 return 2; | 128 return 2; |
120 } else if (c <= kMaxThreeByteChar) { | 129 } else if (c <= kMaxThreeByteChar) { |
dcarney
2014/01/04 15:56:45
can't chop this section out - need backwards compa
haimuiba
2014/01/06 05:40:18
I'd prefer changing the other call sites if needed
| |
121 if (Utf16::IsTrailSurrogate(c) && | |
122 Utf16::IsLeadSurrogate(previous)) { | |
123 const int kUnmatchedSize = kSizeOfUnmatchedSurrogate; | |
124 return Encode(str - kUnmatchedSize, | |
125 Utf16::CombineSurrogatePair(previous, c), | |
126 Utf16::kNoPreviousCharacter) - kUnmatchedSize; | |
127 } | |
128 str[0] = 0xE0 | (c >> 12); | 130 str[0] = 0xE0 | (c >> 12); |
129 str[1] = 0x80 | ((c >> 6) & kMask); | 131 str[1] = 0x80 | ((c >> 6) & kMask); |
130 str[2] = 0x80 | (c & kMask); | 132 str[2] = 0x80 | (c & kMask); |
131 return 3; | 133 return 3; |
132 } else { | 134 } else { |
133 str[0] = 0xF0 | (c >> 18); | 135 str[0] = 0xF0 | (c >> 18); |
134 str[1] = 0x80 | ((c >> 12) & kMask); | 136 str[1] = 0x80 | ((c >> 12) & kMask); |
135 str[2] = 0x80 | ((c >> 6) & kMask); | 137 str[2] = 0x80 | ((c >> 6) & kMask); |
136 str[3] = 0x80 | (c & kMask); | 138 str[3] = 0x80 | (c & kMask); |
137 return 4; | 139 return 4; |
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
209 // Copy the rest the slow way. | 211 // Copy the rest the slow way. |
210 WriteUtf16Slow(unbuffered_start_, | 212 WriteUtf16Slow(unbuffered_start_, |
211 data + buffer_length, | 213 data + buffer_length, |
212 length - buffer_length); | 214 length - buffer_length); |
213 return length; | 215 return length; |
214 } | 216 } |
215 | 217 |
216 } // namespace unibrow | 218 } // namespace unibrow |
217 | 219 |
218 #endif // V8_UNICODE_INL_H_ | 220 #endif // V8_UNICODE_INL_H_ |
OLD | NEW |