OLD | NEW |
1 // Copyright 2007, Google Inc. | 1 // Copyright 2007, Google Inc. |
2 // All rights reserved. | 2 // All rights reserved. |
3 // | 3 // |
4 // Redistribution and use in source and binary forms, with or without | 4 // Redistribution and use in source and binary forms, with or without |
5 // modification, are permitted provided that the following conditions are | 5 // modification, are permitted provided that the following conditions are |
6 // met: | 6 // met: |
7 // | 7 // |
8 // * Redistributions of source code must retain the above copyright | 8 // * Redistributions of source code must retain the above copyright |
9 // notice, this list of conditions and the following disclaimer. | 9 // notice, this list of conditions and the following disclaimer. |
10 // * Redistributions in binary form must reproduce the above | 10 // * Redistributions in binary form must reproduce the above |
(...skipping 143 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
154 return true; | 154 return true; |
155 } | 155 } |
156 if (err != U_BUFFER_OVERFLOW_ERROR) | 156 if (err != U_BUFFER_OVERFLOW_ERROR) |
157 return false; // Unknown error, give up. | 157 return false; // Unknown error, give up. |
158 | 158 |
159 // Not enough room in our buffer, expand. | 159 // Not enough room in our buffer, expand. |
160 output->Resize(output->capacity() * 2); | 160 output->Resize(output->capacity() * 2); |
161 } | 161 } |
162 } | 162 } |
163 | 163 |
| 164 bool ReadUTFChar(const char* str, int* begin, int length, |
| 165 unsigned* code_point_out) { |
| 166 int code_point; // Avoids warning when U8_NEXT writes -1 to it. |
| 167 U8_NEXT(str, *begin, length, code_point); |
| 168 *code_point_out = static_cast<unsigned>(code_point); |
| 169 |
| 170 // The ICU macro above moves to the next char, we want to point to the last |
| 171 // char consumed. |
| 172 (*begin)--; |
| 173 |
| 174 // Validate the decoded value. |
| 175 if (U_IS_UNICODE_CHAR(code_point)) |
| 176 return true; |
| 177 *code_point_out = kUnicodeReplacementCharacter; |
| 178 return false; |
| 179 } |
| 180 |
| 181 bool ReadUTFChar(const char16* str, int* begin, int length, |
| 182 unsigned* code_point) { |
| 183 if (U16_IS_SURROGATE(str[*begin])) { |
| 184 if (!U16_IS_SURROGATE_LEAD(str[*begin]) || *begin + 1 >= length || |
| 185 !U16_IS_TRAIL(str[*begin + 1])) { |
| 186 // Invalid surrogate pair. |
| 187 *code_point = kUnicodeReplacementCharacter; |
| 188 return false; |
| 189 } else { |
| 190 // Valid surrogate pair. |
| 191 *code_point = U16_GET_SUPPLEMENTARY(str[*begin], str[*begin + 1]); |
| 192 (*begin)++; |
| 193 } |
| 194 } else { |
| 195 // Not a surrogate, just one 16-bit word. |
| 196 *code_point = str[*begin]; |
| 197 } |
| 198 |
| 199 if (U_IS_UNICODE_CHAR(*code_point)) |
| 200 return true; |
| 201 |
| 202 // Invalid code point. |
| 203 *code_point = kUnicodeReplacementCharacter; |
| 204 return false; |
| 205 } |
| 206 |
164 } // namespace url_canon | 207 } // namespace url_canon |
OLD | NEW |