Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2007-2010 the V8 project authors. All rights reserved. | 1 // Copyright 2007-2010 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 89 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 100 static const int kMask = ~(1 << 6); | 100 static const int kMask = ~(1 << 6); |
| 101 if (c <= kMaxOneByteChar) { | 101 if (c <= kMaxOneByteChar) { |
| 102 str[0] = c; | 102 str[0] = c; |
| 103 return 1; | 103 return 1; |
| 104 } | 104 } |
| 105 str[0] = 0xC0 | (c >> 6); | 105 str[0] = 0xC0 | (c >> 6); |
| 106 str[1] = 0x80 | (c & kMask); | 106 str[1] = 0x80 | (c & kMask); |
| 107 return 2; | 107 return 2; |
| 108 } | 108 } |
| 109 | 109 |
| 110 // Encode encodes the unicode code point c into the given str buffer. Unless | |
| 111 // allow_invalid is set to true, surrogate code points will be replaced with | |
| 112 // kReplacementCharacter. The caller is required to combine surrogate pairs | |
| 113 // into code points before calling Encode. | |
| 114 unsigned Utf8::Encode(char* str, uchar c, bool allow_invalid) { | |
| 115 if (!allow_invalid && | |
|
dcarney
2014/01/07 10:12:16
move this block down into the kMaxThreeByteChar cl
| |
| 116 (Utf16::IsLeadSurrogate(c) || | |
| 117 Utf16::IsTrailSurrogate(c))) { | |
| 118 c = kReplacementCharacter; | |
| 119 } | |
| 110 | 120 |
| 111 unsigned Utf8::Encode(char* str, uchar c, int previous) { | |
| 112 static const int kMask = ~(1 << 6); | 121 static const int kMask = ~(1 << 6); |
| 113 if (c <= kMaxOneByteChar) { | 122 if (c <= kMaxOneByteChar) { |
| 114 str[0] = c; | 123 str[0] = c; |
| 115 return 1; | 124 return 1; |
| 116 } else if (c <= kMaxTwoByteChar) { | 125 } else if (c <= kMaxTwoByteChar) { |
| 117 str[0] = 0xC0 | (c >> 6); | 126 str[0] = 0xC0 | (c >> 6); |
| 118 str[1] = 0x80 | (c & kMask); | 127 str[1] = 0x80 | (c & kMask); |
| 119 return 2; | 128 return 2; |
| 120 } else if (c <= kMaxThreeByteChar) { | 129 } else if (c <= kMaxThreeByteChar) { |
| 121 if (Utf16::IsTrailSurrogate(c) && | |
| 122 Utf16::IsLeadSurrogate(previous)) { | |
| 123 const int kUnmatchedSize = kSizeOfUnmatchedSurrogate; | |
| 124 return Encode(str - kUnmatchedSize, | |
| 125 Utf16::CombineSurrogatePair(previous, c), | |
| 126 Utf16::kNoPreviousCharacter) - kUnmatchedSize; | |
| 127 } | |
| 128 str[0] = 0xE0 | (c >> 12); | 130 str[0] = 0xE0 | (c >> 12); |
| 129 str[1] = 0x80 | ((c >> 6) & kMask); | 131 str[1] = 0x80 | ((c >> 6) & kMask); |
| 130 str[2] = 0x80 | (c & kMask); | 132 str[2] = 0x80 | (c & kMask); |
| 131 return 3; | 133 return 3; |
| 132 } else { | 134 } else { |
| 133 str[0] = 0xF0 | (c >> 18); | 135 str[0] = 0xF0 | (c >> 18); |
| 134 str[1] = 0x80 | ((c >> 12) & kMask); | 136 str[1] = 0x80 | ((c >> 12) & kMask); |
| 135 str[2] = 0x80 | ((c >> 6) & kMask); | 137 str[2] = 0x80 | ((c >> 6) & kMask); |
| 136 str[3] = 0x80 | (c & kMask); | 138 str[3] = 0x80 | (c & kMask); |
| 137 return 4; | 139 return 4; |
| 138 } | 140 } |
| 139 } | 141 } |
| 140 | 142 |
| 141 | 143 |
| 142 uchar Utf8::ValueOf(const byte* bytes, unsigned length, unsigned* cursor) { | 144 uchar Utf8::ValueOf(const byte* bytes, unsigned length, unsigned* cursor) { |
| 143 if (length <= 0) return kBadChar; | 145 if (length <= 0) return kBadChar; |
| 144 byte first = bytes[0]; | 146 byte first = bytes[0]; |
| 145 // Characters between 0000 and 0007F are encoded as a single character | 147 // Characters between 0000 and 0007F are encoded as a single character |
| 146 if (first <= kMaxOneByteChar) { | 148 if (first <= kMaxOneByteChar) { |
| 147 *cursor += 1; | 149 *cursor += 1; |
| 148 return first; | 150 return first; |
| 149 } | 151 } |
| 150 return CalculateValue(bytes, length, cursor); | 152 return CalculateValue(bytes, length, cursor); |
| 151 } | 153 } |
| 152 | 154 |
| 155 // @TODO give this the same semantics as Encode? | |
|
dcarney
2014/01/07 11:05:50
i don't see an easy way to do this. You'd have to
| |
| 153 unsigned Utf8::Length(uchar c, int previous) { | 156 unsigned Utf8::Length(uchar c, int previous) { |
| 154 if (c <= kMaxOneByteChar) { | 157 if (c <= kMaxOneByteChar) { |
| 155 return 1; | 158 return 1; |
| 156 } else if (c <= kMaxTwoByteChar) { | 159 } else if (c <= kMaxTwoByteChar) { |
| 157 return 2; | 160 return 2; |
| 158 } else if (c <= kMaxThreeByteChar) { | 161 } else if (c <= kMaxThreeByteChar) { |
| 159 if (Utf16::IsTrailSurrogate(c) && | 162 if (Utf16::IsTrailSurrogate(c) && |
| 160 Utf16::IsLeadSurrogate(previous)) { | 163 Utf16::IsLeadSurrogate(previous)) { |
| 161 return kSizeOfUnmatchedSurrogate - kBytesSavedByCombiningSurrogates; | 164 return kSizeOfUnmatchedSurrogate - kBytesSavedByCombiningSurrogates; |
| 162 } | 165 } |
| (...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 209 // Copy the rest the slow way. | 212 // Copy the rest the slow way. |
| 210 WriteUtf16Slow(unbuffered_start_, | 213 WriteUtf16Slow(unbuffered_start_, |
| 211 data + buffer_length, | 214 data + buffer_length, |
| 212 length - buffer_length); | 215 length - buffer_length); |
| 213 return length; | 216 return length; |
| 214 } | 217 } |
| 215 | 218 |
| 216 } // namespace unibrow | 219 } // namespace unibrow |
| 217 | 220 |
| 218 #endif // V8_UNICODE_INL_H_ | 221 #endif // V8_UNICODE_INL_H_ |
| OLD | NEW |