| OLD | NEW |
| (Empty) |
| 1 // Copyright 2003-2009 Google Inc. | |
| 2 // | |
| 3 // Licensed under the Apache License, Version 2.0 (the "License"); | |
| 4 // you may not use this file except in compliance with the License. | |
| 5 // You may obtain a copy of the License at | |
| 6 // | |
| 7 // http://www.apache.org/licenses/LICENSE-2.0 | |
| 8 // | |
| 9 // Unless required by applicable law or agreed to in writing, software | |
| 10 // distributed under the License is distributed on an "AS IS" BASIS, | |
| 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 12 // See the License for the specific language governing permissions and | |
| 13 // limitations under the License. | |
| 14 // ======================================================================== | |
| 15 | |
| 16 #include "omaha/base/string.h" | |
| 17 | |
| 18 #include <wininet.h> // For INTERNET_MAX_URL_LENGTH. | |
| 19 #include <algorithm> | |
| 20 #include <cstdlib> | |
| 21 #include "base/scoped_ptr.h" | |
| 22 #include "omaha/base/commontypes.h" | |
| 23 #include "omaha/base/debug.h" | |
| 24 #include "omaha/base/localization.h" | |
| 25 #include "omaha/base/logging.h" | |
| 26 | |
| 27 namespace omaha { | |
| 28 | |
| 29 namespace { | |
| 30 // Testing shows that only the following ASCII characters are | |
| 31 // considered spaces by GetStringTypeA: 9-13, 32, 160. | |
| 32 // Rather than call GetStringTypeA with no locale, as we used to, | |
| 33 // we look up the values directly in a precomputed array. | |
| 34 | |
| 35 SELECTANY byte spaces[256] = { | |
| 36 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, // 0-9 | |
| 37 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, // 10-19 | |
| 38 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 20-29 | |
| 39 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, // 30-39 | |
| 40 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 40-49 | |
| 41 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 50-59 | |
| 42 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 60-69 | |
| 43 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 70-79 | |
| 44 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80-89 | |
| 45 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 90-99 | |
| 46 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 100-109 | |
| 47 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 110-119 | |
| 48 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 120-129 | |
| 49 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 130-139 | |
| 50 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 140-149 | |
| 51 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 150-159 | |
| 52 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 160-169 | |
| 53 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 170-179 | |
| 54 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 180-189 | |
| 55 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 190-199 | |
| 56 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 200-209 | |
| 57 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 210-219 | |
| 58 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 220-229 | |
| 59 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 230-239 | |
| 60 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 240-249 | |
| 61 0, 0, 0, 0, 0, 1, // 250-255 | |
| 62 }; | |
| 63 } // namespace | |
| 64 | |
| 65 const TCHAR* const kFalse = _T("false"); | |
| 66 const TCHAR* const kTrue = _T("true"); | |
| 67 | |
| 68 bool IsSpaceW(WCHAR c) { | |
| 69 // GetStringTypeW considers these characters to be spaces: | |
| 70 // 9-13, 32, 133, 160, 5760, 8192-8203, 8232, 8233, 12288 | |
| 71 if (c < 256) | |
| 72 return (c == 133 || IsSpaceA((char) (c & 0xff))); | |
| 73 | |
| 74 return (c >= 8192 && c <= 8203) || c == 8232 || | |
| 75 c == 8233 || c == 12288; | |
| 76 } | |
| 77 | |
| 78 bool IsSpaceA(char c) { | |
| 79 return spaces[static_cast<unsigned char>(c)] == 1; | |
| 80 } | |
| 81 | |
| 82 int TrimCString(CString &s) { | |
| 83 int len = Trim(s.GetBuffer()); | |
| 84 s.ReleaseBufferSetLength(len); | |
| 85 return len; | |
| 86 } | |
| 87 | |
| 88 void MakeLowerCString(CString & s) { | |
| 89 int len = s.GetLength(); | |
| 90 String_FastToLower(s.GetBuffer()); | |
| 91 s.ReleaseBufferSetLength(len); | |
| 92 } | |
| 93 | |
| 94 int Trim(TCHAR *s) { | |
| 95 ASSERT(s, (L"")); | |
| 96 | |
| 97 // First find end of leading spaces | |
| 98 TCHAR *start = s; | |
| 99 while (*start) { | |
| 100 if (!IsSpace(*start)) | |
| 101 break; | |
| 102 ++start; | |
| 103 } | |
| 104 | |
| 105 // Now search for the end, remembering the start of the last spaces | |
| 106 TCHAR *end = start; | |
| 107 TCHAR *last_space = end; | |
| 108 while (*end) { | |
| 109 if (!IsSpace(*end)) | |
| 110 last_space = end + 1; | |
| 111 ++end; | |
| 112 } | |
| 113 | |
| 114 // Copy the part we want | |
| 115 int len = last_space - start; | |
| 116 // lint -e{802} Conceivably passing a NULL pointer | |
| 117 memmove(s, start, len * sizeof(TCHAR)); | |
| 118 | |
| 119 // 0 terminate | |
| 120 s[len] = 0; | |
| 121 | |
| 122 return len; | |
| 123 } | |
| 124 | |
| 125 void TrimString(CString& s, const TCHAR* delimiters) { | |
| 126 s = s.Trim(delimiters); | |
| 127 } | |
| 128 | |
| 129 // Strip the first token from the front of argument s. A token is a | |
| 130 // series of consecutive non-blank characters - unless the first | |
| 131 // character is a double-quote ("), in that case the token is the full | |
| 132 // quoted string | |
| 133 CString StripFirstQuotedToken(const CString& s) { | |
| 134 const int npos = -1; | |
| 135 | |
| 136 // Make a writeable copy | |
| 137 CString str(s); | |
| 138 | |
| 139 // Trim any surrounding blanks (and tabs, for the heck of it) | |
| 140 TrimString(str, L" \t"); | |
| 141 | |
| 142 // Too short to have a second token | |
| 143 if (str.GetLength() <= 1) | |
| 144 return L""; | |
| 145 | |
| 146 // What kind of token are we stripping? | |
| 147 if (str[0] == L'\"') { | |
| 148 // Remove leading quoting string | |
| 149 int i = str.Find(L"\"", 1); | |
| 150 if (i != npos) | |
| 151 i++; | |
| 152 return str.Mid(i); | |
| 153 } else { | |
| 154 // Remove leading token | |
| 155 int i = str.FindOneOf(L" \t"); | |
| 156 if (i != npos) | |
| 157 i++; | |
| 158 return str.Mid(i); | |
| 159 } | |
| 160 } | |
| 161 | |
| 162 // A block of text to separate lines, and back | |
| 163 void TextToLines(const CString& text, const TCHAR* delimiter, std::vector<CStrin
g>* lines) { | |
| 164 ASSERT(delimiter, (L"")); | |
| 165 ASSERT(lines, (L"")); | |
| 166 | |
| 167 size_t delimiter_len = ::lstrlen(delimiter); | |
| 168 int b = 0; | |
| 169 int e = 0; | |
| 170 | |
| 171 for (b = 0; e != -1 && b < text.GetLength(); b = e + delimiter_len) { | |
| 172 e = text.Find(delimiter, b); | |
| 173 if (e != -1) { | |
| 174 ASSERT1(e - b > 0); | |
| 175 lines->push_back(text.Mid(b, e - b)); | |
| 176 } else { | |
| 177 lines->push_back(text.Mid(b)); | |
| 178 } | |
| 179 } | |
| 180 } | |
| 181 | |
| 182 void LinesToText(const std::vector<CString>& lines, const TCHAR* delimiter, CStr
ing* text) { | |
| 183 ASSERT(delimiter, (L"")); | |
| 184 ASSERT(text, (L"")); | |
| 185 | |
| 186 size_t delimiter_len = ::lstrlen(delimiter); | |
| 187 size_t len = 0; | |
| 188 for (size_t i = 0; i < lines.size(); ++i) { | |
| 189 len += lines[i].GetLength() + delimiter_len; | |
| 190 } | |
| 191 text->Empty(); | |
| 192 text->Preallocate(len); | |
| 193 for (std::vector<CString>::size_type i = 0; i < lines.size(); ++i) { | |
| 194 text->Append(lines[i]); | |
| 195 if (delimiter_len) { | |
| 196 text->Append(delimiter); | |
| 197 } | |
| 198 } | |
| 199 } | |
| 200 | |
| 201 int CleanupWhitespaceCString(CString &s) { | |
| 202 int len = CleanupWhitespace(s.GetBuffer()); | |
| 203 s.ReleaseBufferSetLength(len); | |
| 204 return len; | |
| 205 } | |
| 206 | |
| 207 int CleanupWhitespace(TCHAR *str) { | |
| 208 ASSERT(str, (L"")); | |
| 209 | |
| 210 TCHAR *src = str; | |
| 211 TCHAR *dest = str; | |
| 212 int spaces = 0; | |
| 213 bool at_start = true; | |
| 214 while (true) { | |
| 215 // At end of string? | |
| 216 TCHAR c = *src; | |
| 217 if (0 == c) | |
| 218 break; | |
| 219 | |
| 220 // Look for whitespace; copy it over if not whitespace | |
| 221 if (IsSpace(c)) { | |
| 222 ++spaces; | |
| 223 } | |
| 224 else { | |
| 225 *dest++ = c; | |
| 226 at_start = false; | |
| 227 spaces = 0; | |
| 228 } | |
| 229 | |
| 230 // Write only first consecutive space (but skip space at start) | |
| 231 if (1 == spaces && !at_start) | |
| 232 *dest++ = ' '; | |
| 233 | |
| 234 ++src; | |
| 235 } | |
| 236 | |
| 237 // Remove trailing space, if any | |
| 238 if (dest > str && *(dest - 1) == L' ') | |
| 239 --dest; | |
| 240 | |
| 241 // 0-terminate | |
| 242 *dest = 0; | |
| 243 | |
| 244 return dest - str; | |
| 245 } | |
| 246 | |
| 247 // Take 1 single hexadecimal "digit" (as a character) and return its decimal val
ue | |
| 248 // Returns -1 if given invalid hex digit | |
| 249 int HexDigitToDec(const TCHAR digit) { | |
| 250 if (digit >= L'A' && digit <= L'F') | |
| 251 return 10 + (digit - L'A'); | |
| 252 else if (digit >= L'a' && digit <= L'f') | |
| 253 return 10 + (digit - L'a'); | |
| 254 else if (digit >= L'0' && digit <= L'9') | |
| 255 return (digit - L'0'); | |
| 256 else | |
| 257 return -1; | |
| 258 } | |
| 259 | |
| 260 // Convert the 2 hex chars at positions <pos> and <pos>+1 in <s> to a char (<cha
r_out>) | |
| 261 // Note: scanf was giving me troubles, so here's the manual version | |
| 262 // Extracted char gets written to <char_out>, which must be allocated by | |
| 263 // the caller; return true on success or false if parameters are incorrect | |
| 264 // or string does not have 2 hex digits at the specified position | |
| 265 // NOTE: <char_out> is NOT a string, just a pointer to a char for the result | |
| 266 bool ExtractChar(const CString & s, int pos, unsigned char * char_out) { | |
| 267 // char_out may be NULL | |
| 268 | |
| 269 if (s.GetLength() < pos + 1) { | |
| 270 return false; | |
| 271 } | |
| 272 | |
| 273 if (pos < 0 || NULL == char_out) { | |
| 274 ASSERT(0, (_T("invalid params: pos<0 or char_out is NULL"))); | |
| 275 return false; | |
| 276 } | |
| 277 | |
| 278 TCHAR c1 = s.GetAt(pos); | |
| 279 TCHAR c2 = s.GetAt(pos+1); | |
| 280 | |
| 281 int p1 = HexDigitToDec(c1); | |
| 282 int p2 = HexDigitToDec(c2); | |
| 283 | |
| 284 if (p1 == -1 || p2 == -1) { | |
| 285 return false; | |
| 286 } | |
| 287 | |
| 288 *char_out = (unsigned char)(p1 * 16 + p2); | |
| 289 return true; | |
| 290 } | |
| 291 | |
| 292 WCHAR *ToWide (const char *s, int len) { | |
| 293 ASSERT (s, (L"")); | |
| 294 WCHAR *w = new WCHAR [len+1]; if (!w) { return NULL; } | |
| 295 // int rc = MultiByteToWideChar (CP_ACP, 0, s.GetString(), (int)s.GetLength(
)+1, w, s.GetLength()+1); | |
| 296 // TODO(omaha): why would it ever be the case that rc > len? | |
| 297 int rc = MultiByteToWideChar (CP_ACP, 0, s, len, w, len); | |
| 298 if (rc > len) { delete [] w; return NULL; } | |
| 299 // ASSERT (rc <= len, (L"")); | |
| 300 w[rc]=L'\0'; | |
| 301 return w; | |
| 302 } | |
| 303 | |
| 304 const byte *BufferContains (const byte *buf, uint32 buf_len, const byte *data, u
int32 data_len) { | |
| 305 ASSERT(data, (L"")); | |
| 306 ASSERT(buf, (L"")); | |
| 307 | |
| 308 for (uint32 i = 0; i < buf_len; i++) { | |
| 309 uint32 j = i; | |
| 310 uint32 k = 0; | |
| 311 uint32 len = 0; | |
| 312 while (j < buf_len && k < data_len && buf[j++] == data[k++]) { len++; } | |
| 313 if (len == data_len) { return buf + i; } | |
| 314 } | |
| 315 return 0; | |
| 316 } | |
| 317 | |
| 318 // Converting the Ansi Multibyte String into unicode string. The multibyte | |
| 319 // string is encoded using the specified codepage. | |
| 320 // The code is pretty much like the U2W function, except the codepage can be | |
| 321 // any valid windows CP. | |
| 322 BOOL AnsiToWideString(const char *from, int length, UINT codepage, CString *to)
{ | |
| 323 ASSERT(from, (L"")); | |
| 324 ASSERT(to, (L"")); | |
| 325 ASSERT1(length >= -1); | |
| 326 // Figure out how long the string is | |
| 327 int req_chars = MultiByteToWideChar(codepage, 0, from, length, NULL, 0); | |
| 328 | |
| 329 if (req_chars <= 0) { | |
| 330 UTIL_LOG(LEVEL_WARNING, (_T("MultiByteToWideChar Failed "))); | |
| 331 *to = AnsiToWideString(from, length); | |
| 332 return FALSE; | |
| 333 } | |
| 334 | |
| 335 TCHAR *buffer = to->GetBufferSetLength(req_chars); | |
| 336 int conv_chars = MultiByteToWideChar(codepage, 0, from, length, buffer, req_ch
ars); | |
| 337 if (conv_chars == 0) { | |
| 338 UTIL_LOG(LEVEL_WARNING, (_T("MultiByteToWideChar Failed "))); | |
| 339 to->ReleaseBuffer(0); | |
| 340 *to = AnsiToWideString(from, length); | |
| 341 return FALSE; | |
| 342 } | |
| 343 | |
| 344 // Something truly horrible happened. | |
| 345 ASSERT (req_chars == conv_chars, (L"MBToWide returned unexpected value: GetLas
tError()=%d",GetLastError())); | |
| 346 // If length was inferred, conv_chars includes the null terminator. | |
| 347 // Adjust the length here to remove null termination, | |
| 348 // because we use the length-qualified CString constructor, | |
| 349 // which automatically adds null termination given an unterminated array. | |
| 350 if (-1 == length) { --conv_chars; } | |
| 351 to->ReleaseBuffer(conv_chars); | |
| 352 return TRUE; | |
| 353 } | |
| 354 | |
| 355 // CStringW(const char* from) did not cast all character properly | |
| 356 // so we write our own. | |
| 357 CString AnsiToWideString(const char *from, int length) { | |
| 358 ASSERT(from, (L"")); | |
| 359 ASSERT1(length >= -1); | |
| 360 if (length < 0) | |
| 361 length = strlen(from); | |
| 362 CString to; | |
| 363 TCHAR *buffer = to.GetBufferSetLength(length); | |
| 364 for (int i = 0; i < length; ++i) | |
| 365 buffer[i] = static_cast<UINT8>(from[i]); | |
| 366 to.ReleaseBuffer(length); | |
| 367 return to; | |
| 368 } | |
| 369 | |
| 370 | |
| 371 // Transform a unicode string into UTF8, as represented in an ASCII string | |
| 372 CStringA WideToUtf8(const CString& w) { | |
| 373 // Add a cutoff. If it's all ascii, convert it directly | |
| 374 const TCHAR* input = static_cast<const TCHAR*>(w.GetString()); | |
| 375 int input_len = w.GetLength(), i; | |
| 376 for (i = 0; i < input_len; ++i) { | |
| 377 if (input[i] > 127) { | |
| 378 break; | |
| 379 } | |
| 380 } | |
| 381 | |
| 382 // If we made it to the end without breaking, then it's all ANSI, so do a quic
k convert | |
| 383 if (i == input_len) { | |
| 384 return WideToAnsiDirect(w); | |
| 385 } | |
| 386 | |
| 387 // Figure out how long the string is | |
| 388 int req_bytes = ::WideCharToMultiByte(CP_UTF8, 0, w, -1, NULL, 0, NULL, NULL); | |
| 389 | |
| 390 scoped_array<char> utf8_buffer(new char[req_bytes]); | |
| 391 | |
| 392 int conv_bytes = ::WideCharToMultiByte(CP_UTF8, 0, w, -1, utf8_buffer.get(), r
eq_bytes, NULL, NULL); | |
| 393 ASSERT1(req_bytes == conv_bytes); | |
| 394 | |
| 395 // conv_bytes includes the null terminator, when we read this in, don't read t
he terminator | |
| 396 CStringA out(utf8_buffer.get(), conv_bytes - 1); | |
| 397 | |
| 398 return out; | |
| 399 } | |
| 400 | |
| 401 CString Utf8ToWideChar(const char* utf8, uint32 num_bytes) { | |
| 402 ASSERT1(utf8); | |
| 403 if (num_bytes == 0) { | |
| 404 return CString(); | |
| 405 } | |
| 406 | |
| 407 uint32 number_of_wide_chars = ::MultiByteToWideChar(CP_UTF8, 0, utf8, num_byte
s, NULL, 0); | |
| 408 number_of_wide_chars += 1; // make room for NULL terminator | |
| 409 | |
| 410 CString ret_string; | |
| 411 TCHAR* buffer = ret_string.GetBuffer(number_of_wide_chars); | |
| 412 DWORD number_of_characters_copied = ::MultiByteToWideChar(CP_UTF8, 0, utf8, nu
m_bytes, buffer, number_of_wide_chars); | |
| 413 ASSERT1(number_of_characters_copied == number_of_wide_chars - 1); | |
| 414 buffer[number_of_wide_chars - 1] = _T('\0'); // ensure there is a NULL termin
ator | |
| 415 ret_string.ReleaseBuffer(); | |
| 416 | |
| 417 // Strip the byte order marker if there is one in the document. | |
| 418 if (ret_string[0] == kUnicodeBom) { | |
| 419 ret_string = ret_string.Right(ret_string.GetLength() - 1); | |
| 420 } | |
| 421 | |
| 422 if (number_of_characters_copied > 0) { | |
| 423 return ret_string; | |
| 424 } | |
| 425 | |
| 426 // Failure case | |
| 427 return CString(); | |
| 428 } | |
| 429 | |
| 430 CString Utf8BufferToWideChar(const std::vector<uint8>& buffer) { | |
| 431 CString result; | |
| 432 if (!buffer.empty()) { | |
| 433 result = Utf8ToWideChar( | |
| 434 reinterpret_cast<const char*>(&buffer.front()), buffer.size()); | |
| 435 } | |
| 436 return result; | |
| 437 } | |
| 438 | |
| 439 CString AbbreviateString (const CString & title, int32 max_len) { | |
| 440 ASSERT (max_len, (L"")); | |
| 441 CString s(title); | |
| 442 TrimCString(s); // remove whitespace at start/end | |
| 443 if (s.GetLength() > max_len) { | |
| 444 s = s.Left (max_len - 2); | |
| 445 CString orig(s); | |
| 446 // remove partial words | |
| 447 while (s.GetLength() > 1 && !IsSpace(s[s.GetLength()-1])) { s = s.Left (
s.GetLength() - 1); } | |
| 448 // but not if it would make the string very short | |
| 449 if (s.GetLength() < max_len / 2) { s = orig; } | |
| 450 s += _T(".."); | |
| 451 } | |
| 452 | |
| 453 return s; | |
| 454 } | |
| 455 | |
| 456 CString GetAbsoluteUri(const CString& uri) { | |
| 457 int i = String_FindString(uri, _T("://")); | |
| 458 if (i==-1) return uri; | |
| 459 | |
| 460 // add trailing / if none exists | |
| 461 int j = String_FindChar(uri, L'/',i+3); | |
| 462 if (j==-1) return (uri+NOTRANSL(_T("/"))); | |
| 463 | |
| 464 // remove duplicate trailing slashes | |
| 465 int len = uri.GetLength(); | |
| 466 if (len > 1 && uri.GetAt(len-1) == '/' && uri.GetAt(len-2) == '/') { | |
| 467 CString new_uri(uri); | |
| 468 int new_len = new_uri.GetLength(); | |
| 469 while (new_len > 1 && new_uri.GetAt(new_len-1) == '/' && new_uri.GetAt(new_l
en-2) == '/') { | |
| 470 new_len--; | |
| 471 new_uri = new_uri.Left(new_len); | |
| 472 } | |
| 473 return new_uri; | |
| 474 } | |
| 475 else return uri; | |
| 476 } | |
| 477 | |
| 478 // requires that input have a PROTOCOL (http://) for proper behavior | |
| 479 // items with the "file" protocol are returned as is (what is the hostname in th
at case? C: ? doesn't make sense) | |
| 480 // TODO(omaha): loosen requirement | |
| 481 // includes http://, e.g. http://www.google.com/ | |
| 482 CString GetUriHostName(const CString& uri, bool strip_leading) { | |
| 483 if (String_StartsWith(uri,NOTRANSL(_T("file:")),true)) return uri; | |
| 484 | |
| 485 // correct any "errors" | |
| 486 CString s(GetAbsoluteUri(uri)); | |
| 487 | |
| 488 // Strip the leading "www." | |
| 489 if (strip_leading) | |
| 490 { | |
| 491 int index_www = String_FindString(s, kStrLeadingWww); | |
| 492 if (index_www != -1) | |
| 493 ReplaceCString (s, kStrLeadingWww, _T("")); | |
| 494 } | |
| 495 | |
| 496 int i = String_FindString(s, _T("://")); | |
| 497 if(i==-1) return uri; | |
| 498 int j = String_FindChar(s, L'/',i+3); | |
| 499 if(j==-1) return uri; | |
| 500 return s.Left(j+1); | |
| 501 } | |
| 502 | |
| 503 // requires that input have a PROTOCOL (http://) for proper behavior | |
| 504 // TODO(omaha): loosen requirement | |
| 505 // removes the http:// and the extra slash '/' at the end. | |
| 506 // http://www.google.com/ -> www.google.com (or google.com if strip_leading = tr
ue) | |
| 507 CString GetUriHostNameHostOnly(const CString& uri, bool strip_leading) { | |
| 508 CString s(GetUriHostName(uri,strip_leading)); | |
| 509 | |
| 510 // remove protocol | |
| 511 int i = String_FindString (s, _T("://")); | |
| 512 if(i==-1) return s; | |
| 513 CString ss(s.Right (s.GetLength() - i-3)); | |
| 514 | |
| 515 // remove the last '/' | |
| 516 int j = ss.ReverseFind('/'); | |
| 517 if (j == -1) return ss; | |
| 518 return ss.Left(j); | |
| 519 } | |
| 520 | |
| 521 CString AbbreviateUri(const CString& uri, int32 max_len) { | |
| 522 ASSERT1(max_len); | |
| 523 ASSERT1(!uri.IsEmpty()); | |
| 524 | |
| 525 CString s(uri); | |
| 526 VERIFY1(String_FindString (s, _T("://"))); | |
| 527 | |
| 528 TrimCString(s); | |
| 529 // SKIP_LOC_BEGIN | |
| 530 RemoveFromStart (s, _T("ftp://"), false); | |
| 531 RemoveFromStart (s, _T("http://"), false); | |
| 532 RemoveFromStart (s, _T("https://"), false); | |
| 533 RemoveFromStart (s, _T("www."), false); | |
| 534 RemoveFromStart (s, _T("ftp."), false); | |
| 535 RemoveFromStart (s, _T("www-"), false); | |
| 536 RemoveFromStart (s, _T("ftp-"), false); | |
| 537 RemoveFromEnd (s, _T(".htm")); | |
| 538 RemoveFromEnd (s, _T(".html")); | |
| 539 RemoveFromEnd (s, _T(".asp")); | |
| 540 // SKIP_LOC_END | |
| 541 if (s.GetLength() > max_len) { | |
| 542 // try to keep the portion after the last / | |
| 543 int32 last_slash = s.ReverseFind ((TCHAR)'/'); | |
| 544 CString after_last_slash; | |
| 545 if (last_slash == -1) { after_last_slash = _T(""); } | |
| 546 else { after_last_slash = s.Right (uri.GetLength() - last_slash - 1); } | |
| 547 if (after_last_slash.GetLength() > max_len / 2) { | |
| 548 after_last_slash = after_last_slash.Right (max_len / 2); | |
| 549 } | |
| 550 s = s.Left (max_len - after_last_slash.GetLength() - 2); | |
| 551 s += ".."; | |
| 552 s += after_last_slash; | |
| 553 } | |
| 554 return s; | |
| 555 } | |
| 556 | |
| 557 // normalized version of a URI intended to map duplicates to the same string | |
| 558 // the normalized URI is not a valid URI | |
| 559 CString NormalizeUri (const CString & uri) { | |
| 560 CString s(uri); | |
| 561 TrimCString(s); | |
| 562 MakeLowerCString(s); | |
| 563 // SKIP_LOC_BEGIN | |
| 564 ReplaceCString (s, _T(":80"), _T("")); | |
| 565 | |
| 566 RemoveFromEnd (s, _T("/index.html")); | |
| 567 RemoveFromEnd (s, _T("/welcome.html")); // old netscape standard | |
| 568 RemoveFromEnd (s, _T("/")); | |
| 569 | |
| 570 RemoveFromStart (s, _T("ftp://"), false); | |
| 571 RemoveFromStart (s, _T("http://"), false); | |
| 572 RemoveFromStart (s, _T("https://"), false); | |
| 573 RemoveFromStart (s, _T("www."), false); | |
| 574 RemoveFromStart (s, _T("ftp."), false); | |
| 575 RemoveFromStart (s, _T("www-"), false); | |
| 576 RemoveFromStart (s, _T("ftp-"), false); | |
| 577 | |
| 578 ReplaceCString (s, _T("/./"), _T("/")); | |
| 579 // SKIP_LOC_END | |
| 580 | |
| 581 // TODO(omaha): | |
| 582 // fixup URLs like a/b/../../c | |
| 583 // while ($s =~ m!\/\.\.\!!) { | |
| 584 // $s =~ s!/[^/]*/\.\./!/!; | |
| 585 // } | |
| 586 | |
| 587 // TODO(omaha): | |
| 588 // unescape characters | |
| 589 // Note from RFC1630: "Sequences which start with a percent sign | |
| 590 // but are not followed by two hexadecimal characters are reserved | |
| 591 // for future extension" | |
| 592 // $str =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg if defined $str; | |
| 593 | |
| 594 return s; | |
| 595 } | |
| 596 | |
| 597 CString RemoveInternetProtocolHeader (const CString& url) { | |
| 598 int find_colon_slash_slash = String_FindString(url, NOTRANSL(L"://")); | |
| 599 if( find_colon_slash_slash != -1 ) { | |
| 600 // remove PROTOCOL:// | |
| 601 return url.Right(url.GetLength() - find_colon_slash_slash - 3); | |
| 602 } else if (String_StartsWith(url, NOTRANSL(L"mailto:"), true)) { | |
| 603 // remove "mailto:" | |
| 604 return url.Right(url.GetLength() - 7); | |
| 605 } else { | |
| 606 // return as is | |
| 607 return url; | |
| 608 } | |
| 609 } | |
| 610 | |
| 611 HRESULT ConvertFileUriToLocalPath(const CString& uri, CString* path_out) { | |
| 612 ASSERT1(path_out); | |
| 613 ASSERT1(uri.GetLength() < INTERNET_MAX_URL_LENGTH); | |
| 614 | |
| 615 if (uri.IsEmpty()) { | |
| 616 return E_INVALIDARG; | |
| 617 } | |
| 618 | |
| 619 DWORD buf_len = MAX_PATH; | |
| 620 HRESULT hr = ::PathCreateFromUrl(uri, | |
| 621 CStrBuf(*path_out, MAX_PATH), | |
| 622 &buf_len, | |
| 623 NULL); | |
| 624 return hr; | |
| 625 } | |
| 626 | |
| 627 void RemoveFromStart (CString & s, const TCHAR* remove, bool ignore_case) { | |
| 628 ASSERT(remove, (L"")); | |
| 629 | |
| 630 // Remove the characters if it is the prefix | |
| 631 if (String_StartsWith(s, remove, ignore_case)) | |
| 632 s.Delete(0, lstrlen(remove)); | |
| 633 } | |
| 634 | |
| 635 bool String_EndsWith(const TCHAR *str, const TCHAR *end_str, bool ignore_case) { | |
| 636 ASSERT(end_str, (L"")); | |
| 637 ASSERT(str, (L"")); | |
| 638 | |
| 639 int str_len = lstrlen(str); | |
| 640 int end_len = lstrlen(end_str); | |
| 641 | |
| 642 // Definitely false if the suffix is longer than the string | |
| 643 if (end_len > str_len) | |
| 644 return false; | |
| 645 | |
| 646 const TCHAR *str_ptr = str + str_len; | |
| 647 const TCHAR *end_ptr = end_str + end_len; | |
| 648 | |
| 649 while (end_ptr >= end_str) { | |
| 650 // Check for matching characters | |
| 651 TCHAR c1 = *str_ptr; | |
| 652 TCHAR c2 = *end_ptr; | |
| 653 | |
| 654 if (ignore_case) { | |
| 655 c1 = Char_ToLower(c1); | |
| 656 c2 = Char_ToLower(c2); | |
| 657 } | |
| 658 | |
| 659 if (c1 != c2) | |
| 660 return false; | |
| 661 | |
| 662 --str_ptr; | |
| 663 --end_ptr; | |
| 664 } | |
| 665 | |
| 666 // if we haven't failed out, it must be ok! | |
| 667 return true; | |
| 668 } | |
| 669 | |
| 670 CString String_MakeEndWith(const TCHAR* str, const TCHAR* end_str, bool ignore_c
ase) { | |
| 671 if (String_EndsWith(str, end_str, ignore_case)) { | |
| 672 return str; | |
| 673 } else { | |
| 674 CString r(str); | |
| 675 r += end_str; | |
| 676 return r; | |
| 677 } | |
| 678 } | |
| 679 | |
| 680 void RemoveFromEnd (CString & s, const TCHAR* remove) { | |
| 681 ASSERT(remove, (L"")); | |
| 682 | |
| 683 // If the suffix is shorter than the string, don't bother | |
| 684 int remove_len = lstrlen(remove); | |
| 685 if (s.GetLength() < remove_len) return; | |
| 686 | |
| 687 // If the suffix is equal | |
| 688 int suffix_begin = s.GetLength() - remove_len; | |
| 689 if (0 == lstrcmp(s.GetString() + suffix_begin, remove)) | |
| 690 s.Delete(suffix_begin, remove_len); | |
| 691 } | |
| 692 | |
| 693 CString ElideIfNeeded (const CString & input_string, int max_len, int min_len) { | |
| 694 ASSERT (min_len <= max_len, (L"")); | |
| 695 ASSERT (max_len >= TSTR_SIZE(kEllipsis)+1, (L"")); | |
| 696 ASSERT (min_len >= TSTR_SIZE(kEllipsis)+1, (L"")); | |
| 697 | |
| 698 CString s = input_string; | |
| 699 | |
| 700 s.TrimRight(); | |
| 701 if (s.GetLength() > max_len) { | |
| 702 int truncate_at = max_len - TSTR_SIZE(kEllipsis); | |
| 703 // find first space going backwards from character one after the truncation
point | |
| 704 while (truncate_at >= min_len && !IsSpace(s.GetAt(truncate_at))) | |
| 705 truncate_at--; | |
| 706 | |
| 707 // skip the space(s) | |
| 708 while (truncate_at >= min_len && IsSpace(s.GetAt(truncate_at))) | |
| 709 truncate_at--; | |
| 710 | |
| 711 truncate_at++; | |
| 712 | |
| 713 if (truncate_at <= min_len || truncate_at > (max_len - static_cast<int>(TSTR
_SIZE(kEllipsis)))) { | |
| 714 // we weren't able to break at a word boundary, may as well use more of th
e string | |
| 715 truncate_at = max_len - TSTR_SIZE(kEllipsis); | |
| 716 | |
| 717 // skip space(s) | |
| 718 while (truncate_at > 0 && IsSpace(s.GetAt(truncate_at-1))) | |
| 719 truncate_at--; | |
| 720 } | |
| 721 | |
| 722 s = s.Left(truncate_at); | |
| 723 s += kEllipsis; | |
| 724 } | |
| 725 | |
| 726 UTIL_LOG(L6, (L"elide (%d %d) %s -> %s", min_len, max_len, input_string, s)); | |
| 727 return s; | |
| 728 } | |
| 729 | |
| 730 // these functions untested | |
| 731 // UTF8 parameter supported on XP/2000 only | |
| 732 HRESULT AnsiToUTF8 (char * src, int src_len, char * dest, int *dest_len) { | |
| 733 ASSERT (dest_len, (L"")); | |
| 734 ASSERT (dest, (L"")); | |
| 735 ASSERT (src, (L"")); | |
| 736 | |
| 737 // First use MultiByteToWideChar(CP_UTF8, ...) to convert to Unicode | |
| 738 // then use WideCharToMultiByte to convert from Unicode to UTF8 | |
| 739 WCHAR *unicode = new WCHAR [(src_len + 1) * sizeof (TCHAR)]; ASSERT (unicode,
(L"")); | |
| 740 int chars_written = MultiByteToWideChar (CP_ACP, 0, src, src_len, unicode, src
_len); | |
| 741 ASSERT (chars_written == src_len, (L"")); | |
| 742 char *unmappable = " "; | |
| 743 BOOL unmappable_characters = false; | |
| 744 *dest_len = WideCharToMultiByte (CP_UTF8, 0, unicode, chars_written, dest, *de
st_len, unmappable, &unmappable_characters); | |
| 745 delete [] unicode; | |
| 746 return S_OK; | |
| 747 } | |
| 748 | |
| 749 // Convert Wide to ANSI directly. Use only when it is all ANSI | |
| 750 CStringA WideToAnsiDirect(const CString & in) { | |
| 751 int in_len = in.GetLength(); | |
| 752 const TCHAR * in_buf = static_cast<const TCHAR*>(in.GetString()); | |
| 753 | |
| 754 CStringA out; | |
| 755 unsigned char * out_buf = (unsigned char *)out.GetBufferSetLength(in_len); | |
| 756 | |
| 757 for(int i = 0; i < in_len; ++i) | |
| 758 out_buf[i] = static_cast<unsigned char>(in_buf[i]); | |
| 759 | |
| 760 out.ReleaseBuffer(in_len); | |
| 761 return out; | |
| 762 } | |
| 763 | |
| 764 HRESULT UCS2ToUTF8 (LPCWSTR src, int src_len, char * dest, int *dest_len) { | |
| 765 ASSERT(dest_len, (L"")); | |
| 766 ASSERT(dest, (L"")); | |
| 767 | |
| 768 *dest_len = WideCharToMultiByte (CP_UTF8, 0, src, src_len, dest, *dest_len, NU
LL,NULL); | |
| 769 return S_OK; | |
| 770 } | |
| 771 | |
| 772 HRESULT UTF8ToUCS2 (const char * src, int src_len, LPWSTR dest, int *dest_len) { | |
| 773 ASSERT (dest_len, (L"")); | |
| 774 ASSERT (src, (L"")); | |
| 775 | |
| 776 *dest_len = MultiByteToWideChar (CP_UTF8, 0, src, src_len, dest, *dest_len); | |
| 777 ASSERT (*dest_len == src_len, (L"")); | |
| 778 return S_OK; | |
| 779 } | |
| 780 | |
| 781 HRESULT UTF8ToAnsi (char * src, int, char * dest, int *dest_len) { | |
| 782 ASSERT(dest_len, (L"")); | |
| 783 ASSERT(dest, (L"")); | |
| 784 ASSERT(src, (L"")); | |
| 785 | |
| 786 src; dest; dest_len; // unreferenced formal parameter | |
| 787 | |
| 788 // First use MultiByteToWideChar(CP_UTF8, ...) to convert to Unicode | |
| 789 // then use WideCharToMultiByte to convert from Unicode to ANSI | |
| 790 return E_FAIL; | |
| 791 } | |
| 792 | |
| 793 // clean up a string so it can be included within a JavaScript string | |
| 794 // mainly involves escaping characters | |
| 795 CString SanitizeString(const CString & in, DWORD mode) { | |
| 796 CString out(in); | |
| 797 | |
| 798 if (mode & kSanHtml) { | |
| 799 // SKIP_LOC_BEGIN | |
| 800 ReplaceCString(out, _T("&"), _T("&")); | |
| 801 ReplaceCString(out, _T("<"), _T("<")); | |
| 802 ReplaceCString(out, _T(">"), _T(">")); | |
| 803 // SKIP_LOC_END | |
| 804 } | |
| 805 | |
| 806 if ((mode & kSanXml) == kSanXml) { | |
| 807 // SKIP_LOC_BEGIN | |
| 808 ReplaceCString(out, _T("'"), _T("'")); | |
| 809 ReplaceCString(out, _T("\""), _T(""")); | |
| 810 // SKIP_LOC_END | |
| 811 } | |
| 812 | |
| 813 // Note that this SAN_JAVASCRIPT and kSanXml should not be used together. | |
| 814 ASSERT ((mode & (kSanJs | kSanXml)) != (kSanJs | kSanXml), (L"")); | |
| 815 | |
| 816 if ((mode & kSanJs) == kSanJs) { | |
| 817 // SKIP_LOC_BEGIN | |
| 818 ReplaceCString(out, _T("\\"), _T("\\\\")); | |
| 819 ReplaceCString(out, _T("\'"), _T("\\\'")); | |
| 820 ReplaceCString(out, _T("\""), _T("\\\"")); | |
| 821 ReplaceCString(out, _T("\n"), _T(" ")); | |
| 822 ReplaceCString(out, _T("\t"), _T(" ")); | |
| 823 // SKIP_LOC_END | |
| 824 } | |
| 825 | |
| 826 if ((mode & kSanHtmlInput) == kSanHtmlInput) { | |
| 827 // SKIP_LOC_BEGIN | |
| 828 ReplaceCString(out, _T("\""), _T(""")); | |
| 829 ReplaceCString(out, _T("'"), _T("'")); | |
| 830 // SKIP_LOC_END | |
| 831 } | |
| 832 | |
| 833 return out; | |
| 834 } | |
| 835 | |
| 836 // Bolds the periods used for abbreviation. Call this after HighlightTerms. | |
| 837 CString BoldAbbreviationPeriods(const CString & in) { | |
| 838 CString out(in); | |
| 839 CString abbrev; | |
| 840 for (int i = 0; i < kAbbreviationPeriodLength; ++i) | |
| 841 abbrev += _T("."); | |
| 842 ReplaceCString(out, abbrev, NOTRANSL(_T("<b>")) + abbrev + NOTRANSL(_T("</b>")
)); | |
| 843 return out; | |
| 844 } | |
| 845 | |
| 846 // Unescape a escaped sequence leading by a percentage symbol '%', | |
| 847 // and converted the unescaped sequence (in UTF8) into unicode. | |
| 848 // Inputs: src is the input string. | |
| 849 // pos is the starting position. | |
| 850 // Returns: true if a EOS(null) char was encounted. | |
| 851 // out contains the unescaped and converted unicode string. | |
| 852 // consumed_length is how many bytes in the src string have been | |
| 853 // unescaped. | |
| 854 // We can avoid the expensive UTF8 conversion step if there are no higher | |
| 855 // ansi characters So if there aren't any, just convert it ANSI-to-WIDE | |
| 856 // directly, which is cheaper. | |
| 857 inline bool UnescapeSequence(const CString &src, int pos, | |
| 858 CStringW *out, int *consumed_length) { | |
| 859 ASSERT1(out); | |
| 860 ASSERT1(consumed_length); | |
| 861 | |
| 862 int length = src.GetLength(); | |
| 863 // (input_len - pos) / 3 is enough for un-escaping the (%xx)+ sequences. | |
| 864 int max_dst_length = (length - pos) / 3; | |
| 865 scoped_array<char> unescaped(new char[max_dst_length]); | |
| 866 char *buf = unescaped.get(); | |
| 867 if (buf == NULL) { // no enough space ??? | |
| 868 *consumed_length = 0; | |
| 869 return false; | |
| 870 } | |
| 871 char *dst = buf; | |
| 872 bool is_utf8 = false; | |
| 873 // It is possible that there is a null character '\0' in the sequence. | |
| 874 // Because the CStringT does't support '\0' in it, we stop | |
| 875 // parsing the input string when it is encounted. | |
| 876 bool eos_encounted = false; | |
| 877 uint8 ch; | |
| 878 int s = pos; | |
| 879 while (s + 2 < length && src[s] == '%' && !eos_encounted && | |
| 880 ExtractChar(src, s + 1, &ch)) { | |
| 881 if (ch != 0) | |
| 882 *dst++ = ch; | |
| 883 else | |
| 884 eos_encounted = true; | |
| 885 if (ch >= 128) | |
| 886 is_utf8 = true; | |
| 887 s += 3; | |
| 888 } | |
| 889 | |
| 890 ASSERT1(dst <= buf + max_dst_length); // just to make sure | |
| 891 | |
| 892 *consumed_length = s - pos; | |
| 893 if (is_utf8) | |
| 894 AnsiToWideString(buf, dst - buf, CP_UTF8, out); | |
| 895 else | |
| 896 *out = AnsiToWideString(buf, dst - buf); | |
| 897 return eos_encounted; | |
| 898 } | |
| 899 | |
| 900 // There is an encoding called "URL-encoding". This function takes a URL-encoded
string | |
| 901 // and converts it back to the original representation | |
| 902 // example: "?q=moon+doggy_%25%5E%26&" = "moon doggy_%^&" | |
| 903 CString Unencode(const CString &input) { | |
| 904 const int input_len = input.GetLength(); | |
| 905 const TCHAR *src = input.GetString(); | |
| 906 // input_len is enough for containing the unencoded string. | |
| 907 CString out; | |
| 908 TCHAR *head = out.GetBuffer(input_len); | |
| 909 TCHAR *dst = head; | |
| 910 int s = 0; | |
| 911 bool eos_encounted = false; | |
| 912 bool is_utf8 = false; | |
| 913 CStringW fragment; | |
| 914 int consumed_length = 0; | |
| 915 while (s < input_len && !eos_encounted) { | |
| 916 switch (src[s]) { | |
| 917 case '+' : | |
| 918 *dst++ = ' '; | |
| 919 ASSERT1(dst <= head + input_len); | |
| 920 ++s; | |
| 921 break; | |
| 922 case '%' : | |
| 923 eos_encounted = | |
| 924 UnescapeSequence(input, s, &fragment, &consumed_length); | |
| 925 if (consumed_length > 0) { | |
| 926 s += consumed_length; | |
| 927 ASSERT1(dst + fragment.GetLength() <= head + input_len); | |
| 928 for (int i = 0; i < fragment.GetLength(); ++i) | |
| 929 *dst++ = fragment[i]; | |
| 930 } else { | |
| 931 *dst++ = src[s++]; | |
| 932 ASSERT1(dst <= head + input_len); | |
| 933 } | |
| 934 break; | |
| 935 default: | |
| 936 *dst++ = src[s]; | |
| 937 ASSERT1(dst <= head + input_len); | |
| 938 ++s; | |
| 939 } | |
| 940 } | |
| 941 int out_len = dst - head; | |
| 942 out.ReleaseBuffer(out_len); | |
| 943 return out; | |
| 944 } | |
| 945 | |
| 946 CString GetTextInbetween(const CString &input, const CString &start, const CStri
ng &end) { | |
| 947 int start_index = String_FindString(input, start); | |
| 948 if (start_index == -1) | |
| 949 return L""; | |
| 950 | |
| 951 start_index += start.GetLength(); | |
| 952 int end_index = String_FindString(input, end, start_index); | |
| 953 if (end_index == -1) | |
| 954 return L""; | |
| 955 | |
| 956 return input.Mid(start_index, end_index - start_index); | |
| 957 } | |
| 958 | |
| 959 // Given a string, get the parameter and url-unencode it | |
| 960 CString GetParam(const CString & input, const CString & key) { | |
| 961 CString my_key(_T("?")); | |
| 962 my_key.Append(key); | |
| 963 my_key += L'='; | |
| 964 | |
| 965 return Unencode(GetTextInbetween(input, my_key, NOTRANSL(L"?"))); | |
| 966 } | |
| 967 | |
| 968 // Get an xml-like field from a string | |
| 969 CString GetField (const CString & input, const CString & field) { | |
| 970 CString start_field(NOTRANSL(_T("<"))); | |
| 971 start_field += field; | |
| 972 start_field += L'>'; | |
| 973 | |
| 974 int32 start = String_FindString(input, start_field); | |
| 975 if (start == -1) { return _T(""); } | |
| 976 start += 2 + lstrlen (field); | |
| 977 | |
| 978 CString end_field(NOTRANSL(_T("</"))); | |
| 979 end_field += field; | |
| 980 end_field += L'>'; | |
| 981 | |
| 982 int32 end = String_FindString(input, end_field); | |
| 983 if (end == -1) { return _T(""); } | |
| 984 | |
| 985 return input.Mid (start, end - start); | |
| 986 } | |
| 987 | |
| 988 // ------------------------------------------------------------ | |
| 989 // Finds a whole word match in the query. | |
| 990 // If the word has non-spaces either before or after, it will not qualify as | |
| 991 // a match. i.e. "pie!" is not a match because of the exclamation point. | |
| 992 // TODO(omaha): Add parameter that will consider punctuation acceptable. | |
| 993 // | |
| 994 // Optionally will look for a colon at the end. | |
| 995 // If not found, return -1. | |
| 996 int FindWholeWordMatch (const CString &query, | |
| 997 const CString &word_to_match, | |
| 998 const bool end_with_colon, | |
| 999 const int index_begin) { | |
| 1000 if (word_to_match.IsEmpty()) { | |
| 1001 return -1; | |
| 1002 } | |
| 1003 | |
| 1004 int index_word_begin = index_begin; | |
| 1005 | |
| 1006 // Keep going until we find a whole word match, or the string ends. | |
| 1007 do { | |
| 1008 index_word_begin = String_FindString (query, word_to_match, index_word_begin
); | |
| 1009 | |
| 1010 if (-1 == index_word_begin) { | |
| 1011 return index_word_begin; | |
| 1012 } | |
| 1013 | |
| 1014 // If it's not a whole word match, keep going. | |
| 1015 if (index_word_begin > 0 && | |
| 1016 !IsSpaceW (query[index_word_begin - 1])) { | |
| 1017 goto LoopEnd; | |
| 1018 } | |
| 1019 | |
| 1020 if (end_with_colon) { | |
| 1021 int index_colon = String_FindChar (query, L':', index_word_begin); | |
| 1022 | |
| 1023 // If there is no colon in the string, return now. | |
| 1024 if (-1 == index_colon) { | |
| 1025 return -1; | |
| 1026 } | |
| 1027 | |
| 1028 // If there is text between the end of the word and the colon, keep going. | |
| 1029 if (index_colon - index_word_begin != word_to_match.GetLength()) { | |
| 1030 goto LoopEnd; | |
| 1031 } | |
| 1032 } else { | |
| 1033 // If there are more chars left after this word/phrase, and | |
| 1034 // they are not spaces, return. | |
| 1035 if (query.GetLength() > index_word_begin + word_to_match.GetLength() && | |
| 1036 !IsSpaceW (query.GetAt (index_word_begin + word_to_match.GetLength())))
{ | |
| 1037 goto LoopEnd; | |
| 1038 } | |
| 1039 } | |
| 1040 | |
| 1041 // It fits all the requirements, so return the index to the beginning of the
word. | |
| 1042 return index_word_begin; | |
| 1043 | |
| 1044 LoopEnd: | |
| 1045 ++index_word_begin; | |
| 1046 | |
| 1047 } while (-1 != index_word_begin); | |
| 1048 | |
| 1049 return index_word_begin; | |
| 1050 } | |
| 1051 | |
| 1052 // -------------------------------------------------------- | |
| 1053 // Do whole-word replacement in "str". | |
| 1054 void ReplaceWholeWord (const CString &string_to_replace, | |
| 1055 const CString &replacement, | |
| 1056 const bool trim_whitespace, | |
| 1057 CString *str) { | |
| 1058 ASSERT (str, (L"ReplaceWholeWord")); | |
| 1059 | |
| 1060 if (string_to_replace.IsEmpty() || str->IsEmpty()) { | |
| 1061 return; | |
| 1062 } | |
| 1063 | |
| 1064 int index_str = 0; | |
| 1065 do { | |
| 1066 index_str = FindWholeWordMatch (*str, string_to_replace, false, index_str); | |
| 1067 | |
| 1068 if (-1 != index_str) { | |
| 1069 // Get the strings before and after, and trim whitespace. | |
| 1070 CString str_before_word(str->Left (index_str)); | |
| 1071 if (trim_whitespace) { | |
| 1072 str_before_word.TrimRight(); | |
| 1073 } | |
| 1074 | |
| 1075 CString str_after_word(str->Mid (index_str + string_to_replace.GetLength()
)); | |
| 1076 if (trim_whitespace) { | |
| 1077 str_after_word.TrimLeft(); | |
| 1078 } | |
| 1079 | |
| 1080 *str = str_before_word + replacement + str_after_word; | |
| 1081 index_str += replacement.GetLength() + 1; | |
| 1082 } | |
| 1083 } while (index_str != -1); | |
| 1084 } | |
| 1085 | |
| 1086 // -------------------------------------------------------- | |
| 1087 // Reverse (big-endian<->little-endian) the shorts that make up | |
| 1088 // Unicode characters in a byte array of Unicode chars | |
| 1089 HRESULT ReverseUnicodeByteOrder(byte* unicode_string, int size_in_bytes) { | |
| 1090 ASSERT (unicode_string, (L"")); | |
| 1091 | |
| 1092 // If odd # of bytes, just leave the last one alone | |
| 1093 for (int i = 0; i < size_in_bytes - 1; i += 2) { | |
| 1094 byte b = unicode_string[i]; | |
| 1095 unicode_string[i] = unicode_string[i+1]; | |
| 1096 unicode_string[i+1] = b; | |
| 1097 } | |
| 1098 | |
| 1099 return S_OK; | |
| 1100 } | |
| 1101 | |
| 1102 // case insensitive strstr | |
| 1103 // adapted from http://c.snippets.org/snip_lister.php?fname=stristr.c | |
| 1104 const char *stristr(const char *string, const char *pattern) | |
| 1105 { | |
| 1106 ASSERT (pattern, (L"")); | |
| 1107 ASSERT (string, (L"")); | |
| 1108 ASSERT (string && pattern, (L"")); | |
| 1109 char *pattern_ptr, *string_ptr; | |
| 1110 const char *start; | |
| 1111 | |
| 1112 for (start = string; *start != 0; start++) | |
| 1113 { | |
| 1114 // find start of pattern in string | |
| 1115 for ( ; ((*start!=0) && (String_ToUpperA(*start) != String_ToUpperA(*pattern
))); start++) | |
| 1116 ; | |
| 1117 if (0 == *start) | |
| 1118 return NULL; | |
| 1119 | |
| 1120 pattern_ptr = (char *)pattern; | |
| 1121 string_ptr = (char *)start; | |
| 1122 | |
| 1123 while (String_ToUpperA(*string_ptr) == String_ToUpperA(*pattern_ptr)) | |
| 1124 { | |
| 1125 string_ptr++; | |
| 1126 pattern_ptr++; | |
| 1127 | |
| 1128 // if end of pattern then pattern was found | |
| 1129 if (0 == *pattern_ptr) | |
| 1130 return (start); | |
| 1131 } | |
| 1132 } | |
| 1133 | |
| 1134 return NULL; | |
| 1135 } | |
| 1136 | |
| 1137 // case insensitive Unicode strstr | |
| 1138 // adapted from http://c.snippets.org/snip_lister.php?fname=stristr.c | |
| 1139 const WCHAR *stristrW(const WCHAR *string, const WCHAR *pattern) | |
| 1140 { | |
| 1141 ASSERT (pattern, (L"")); | |
| 1142 ASSERT (string, (L"")); | |
| 1143 ASSERT (string && pattern, (L"")); | |
| 1144 const WCHAR *start; | |
| 1145 | |
| 1146 for (start = string; *start != 0; start++) | |
| 1147 { | |
| 1148 // find start of pattern in string | |
| 1149 for ( ; ((*start!=0) && (String_ToUpper(*start) != String_ToUpper(*pattern))
); start++) | |
| 1150 ; | |
| 1151 if (0 == *start) | |
| 1152 return NULL; | |
| 1153 | |
| 1154 const WCHAR *pattern_ptr = pattern; | |
| 1155 const WCHAR *string_ptr = start; | |
| 1156 | |
| 1157 while (String_ToUpper(*string_ptr) == String_ToUpper(*pattern_ptr)) | |
| 1158 { | |
| 1159 string_ptr++; | |
| 1160 pattern_ptr++; | |
| 1161 | |
| 1162 // if end of pattern then pattern was found | |
| 1163 if (0 == *pattern_ptr) | |
| 1164 return (start); | |
| 1165 } | |
| 1166 } | |
| 1167 | |
| 1168 return NULL; | |
| 1169 } | |
| 1170 | |
| 1171 // case sensitive Unicode strstr | |
| 1172 // adapted from http://c.snippets.org/snip_lister.php?fname=stristr.c | |
| 1173 const WCHAR *strstrW(const WCHAR *string, const WCHAR *pattern) | |
| 1174 { | |
| 1175 ASSERT (pattern, (L"")); | |
| 1176 ASSERT (string, (L"")); | |
| 1177 ASSERT (string && pattern, (L"")); | |
| 1178 const WCHAR *start; | |
| 1179 | |
| 1180 for (start = string; *start != 0; start++) | |
| 1181 { | |
| 1182 // find start of pattern in string | |
| 1183 for ( ; ((*start!=0) && (*start != *pattern)); start++) | |
| 1184 ; | |
| 1185 if (0 == *start) | |
| 1186 return NULL; | |
| 1187 | |
| 1188 const WCHAR *pattern_ptr = pattern; | |
| 1189 const WCHAR *string_ptr = start; | |
| 1190 | |
| 1191 while (*string_ptr == *pattern_ptr) | |
| 1192 { | |
| 1193 string_ptr++; | |
| 1194 pattern_ptr++; | |
| 1195 | |
| 1196 // if end of pattern then pattern was found | |
| 1197 if (0 == *pattern_ptr) | |
| 1198 return (start); | |
| 1199 } | |
| 1200 } | |
| 1201 | |
| 1202 return NULL; | |
| 1203 } | |
| 1204 | |
| 1205 // ------------------------------------------------------------------------- | |
| 1206 // Helper function | |
| 1207 float GetLenWithWordWrap (const float len_so_far, | |
| 1208 const float len_to_add, | |
| 1209 const uint32 len_line) { | |
| 1210 // lint -save -e414 Possible division by 0 | |
| 1211 ASSERT (len_line != 0, (L"")); | |
| 1212 | |
| 1213 float len_total = len_so_far + len_to_add; | |
| 1214 | |
| 1215 // Figure out if we need to word wrap by seeing if adding the second | |
| 1216 // string will cause us to span more lines than before. | |
| 1217 uint32 num_lines_before = static_cast<uint32> (len_so_far / len_line); | |
| 1218 uint32 num_lines_after = static_cast<uint32> (len_total / len_line); | |
| 1219 | |
| 1220 // If it just barely fit onto the line, do not wrap to the next line. | |
| 1221 if (num_lines_after > 0 && (len_total / len_line - num_lines_after == 0)) { | |
| 1222 --num_lines_after; | |
| 1223 } | |
| 1224 | |
| 1225 if (num_lines_after > num_lines_before) { | |
| 1226 // Need to word wrap. | |
| 1227 // lint -e{790} Suspicious truncation | |
| 1228 return num_lines_after * len_line + len_to_add; | |
| 1229 } | |
| 1230 else | |
| 1231 return len_total; | |
| 1232 | |
| 1233 // lint -restore | |
| 1234 } | |
| 1235 | |
| 1236 int CalculateBase64EscapedLen(int input_len, bool do_padding) { | |
| 1237 // these formulae were copied from comments that used to go with the base64 | |
| 1238 // encoding functions | |
| 1239 int intermediate_result = 8 * input_len + 5; | |
| 1240 ASSERT(intermediate_result > 0,(L"")); // make sure we didn't overflow | |
| 1241 int len = intermediate_result / 6; | |
| 1242 if (do_padding) len = ((len + 3) / 4) * 4; | |
| 1243 return len; | |
| 1244 } | |
| 1245 | |
| 1246 // Base64Escape does padding, so this calculation includes padding. | |
| 1247 int CalculateBase64EscapedLen(int input_len) { | |
| 1248 return CalculateBase64EscapedLen(input_len, true); | |
| 1249 } | |
| 1250 | |
| 1251 // Base64Escape | |
| 1252 // Largely based on b2a_base64 in google/docid_encryption.c | |
| 1253 // | |
| 1254 // | |
| 1255 int Base64EscapeInternal(const char *src, int szsrc, | |
| 1256 char *dest, int szdest, const char *base64, | |
| 1257 bool do_padding) | |
| 1258 { | |
| 1259 ASSERT(base64, (L"")); | |
| 1260 ASSERT(dest, (L"")); | |
| 1261 ASSERT(src, (L"")); | |
| 1262 | |
| 1263 static const char kPad64 = '='; | |
| 1264 | |
| 1265 if (szsrc <= 0) return 0; | |
| 1266 | |
| 1267 char *cur_dest = dest; | |
| 1268 const unsigned char *cur_src = reinterpret_cast<const unsigned char*>(src); | |
| 1269 | |
| 1270 // Three bytes of data encodes to four characters of cyphertext. | |
| 1271 // So we can pump through three-byte chunks atomically. | |
| 1272 while (szsrc > 2) { /* keep going until we have less than 24 bits */ | |
| 1273 if( (szdest -= 4) < 0 ) return 0; | |
| 1274 cur_dest[0] = base64[cur_src[0] >> 2]; | |
| 1275 cur_dest[1] = base64[((cur_src[0] & 0x03) << 4) + (cur_src[1] >> 4)]; | |
| 1276 cur_dest[2] = base64[((cur_src[1] & 0x0f) << 2) + (cur_src[2] >> 6)]; | |
| 1277 cur_dest[3] = base64[cur_src[2] & 0x3f]; | |
| 1278 | |
| 1279 cur_dest += 4; | |
| 1280 cur_src += 3; | |
| 1281 szsrc -= 3; | |
| 1282 } | |
| 1283 | |
| 1284 /* now deal with the tail (<=2 bytes) */ | |
| 1285 switch (szsrc) { | |
| 1286 case 0: | |
| 1287 // Nothing left; nothing more to do. | |
| 1288 break; | |
| 1289 case 1: | |
| 1290 // One byte left: this encodes to two characters, and (optionally) | |
| 1291 // two pad characters to round out the four-character cypherblock. | |
| 1292 if( (szdest -= 2) < 0 ) return 0; | |
| 1293 cur_dest[0] = base64[cur_src[0] >> 2]; | |
| 1294 cur_dest[1] = base64[(cur_src[0] & 0x03) << 4]; | |
| 1295 cur_dest += 2; | |
| 1296 if (do_padding) { | |
| 1297 if( (szdest -= 2) < 0 ) return 0; | |
| 1298 cur_dest[0] = kPad64; | |
| 1299 cur_dest[1] = kPad64; | |
| 1300 cur_dest += 2; | |
| 1301 } | |
| 1302 break; | |
| 1303 case 2: | |
| 1304 // Two bytes left: this encodes to three characters, and (optionally) | |
| 1305 // one pad character to round out the four-character cypherblock. | |
| 1306 if( (szdest -= 3) < 0 ) return 0; | |
| 1307 cur_dest[0] = base64[cur_src[0] >> 2]; | |
| 1308 cur_dest[1] = base64[((cur_src[0] & 0x03) << 4) + (cur_src[1] >> 4)]; | |
| 1309 cur_dest[2] = base64[(cur_src[1] & 0x0f) << 2]; | |
| 1310 cur_dest += 3; | |
| 1311 if (do_padding) { | |
| 1312 if( (szdest -= 1) < 0 ) return 0; | |
| 1313 cur_dest[0] = kPad64; | |
| 1314 cur_dest += 1; | |
| 1315 } | |
| 1316 break; | |
| 1317 default: | |
| 1318 // Should not be reached: blocks of 3 bytes are handled | |
| 1319 // in the while loop before this switch statement. | |
| 1320 ASSERT(false, (L"Logic problem? szsrc = %S",szsrc)); | |
| 1321 break; | |
| 1322 } | |
| 1323 return (cur_dest - dest); | |
| 1324 } | |
| 1325 | |
| 1326 #define kBase64Chars "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz01234
56789+/" | |
| 1327 | |
| 1328 #define kWebSafeBase64Chars "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxy
z0123456789-_" | |
| 1329 | |
| 1330 int Base64Escape(const char *src, int szsrc, char *dest, int szdest) { | |
| 1331 ASSERT(dest, (L"")); | |
| 1332 ASSERT(src, (L"")); | |
| 1333 | |
| 1334 return Base64EscapeInternal(src, szsrc, dest, szdest, kBase64Chars, true); | |
| 1335 } | |
| 1336 int WebSafeBase64Escape(const char *src, int szsrc, char *dest, | |
| 1337 int szdest, bool do_padding) { | |
| 1338 ASSERT(dest, (L"")); | |
| 1339 ASSERT(src, (L"")); | |
| 1340 | |
| 1341 return Base64EscapeInternal(src, szsrc, dest, szdest, | |
| 1342 kWebSafeBase64Chars, do_padding); | |
| 1343 } | |
| 1344 | |
| 1345 void Base64Escape(const char *src, int szsrc, | |
| 1346 CStringA* dest, bool do_padding) | |
| 1347 { | |
| 1348 ASSERT(src, (L"")); | |
| 1349 ASSERT(dest,(L"")); | |
| 1350 const int max_escaped_size = CalculateBase64EscapedLen(szsrc, do_padding); | |
| 1351 dest->Empty(); | |
| 1352 const int escaped_len = Base64EscapeInternal(src, szsrc, | |
| 1353 dest->GetBufferSetLength(max_escaped_size + 1), max_escaped_size + 1, | |
| 1354 kBase64Chars, | |
| 1355 do_padding); | |
| 1356 ASSERT(max_escaped_size <= escaped_len,(L"")); | |
| 1357 dest->ReleaseBuffer(escaped_len); | |
| 1358 } | |
| 1359 | |
| 1360 void WebSafeBase64Escape(const char *src, int szsrc, | |
| 1361 CStringA *dest, bool do_padding) | |
| 1362 { | |
| 1363 ASSERT(src, (L"")); | |
| 1364 ASSERT(dest,(L"")); | |
| 1365 const int max_escaped_size = | |
| 1366 CalculateBase64EscapedLen(szsrc, do_padding); | |
| 1367 dest->Empty(); | |
| 1368 const int escaped_len = Base64EscapeInternal(src, szsrc, | |
| 1369 dest->GetBufferSetLength(max_escaped_size + 1), max_escaped_size + 1, | |
| 1370 kWebSafeBase64Chars, | |
| 1371 do_padding); | |
| 1372 ASSERT(max_escaped_size <= escaped_len,(L"")); | |
| 1373 dest->ReleaseBuffer(escaped_len); | |
| 1374 } | |
| 1375 | |
| 1376 void WebSafeBase64Escape(const CStringA& src, CStringA* dest) { | |
| 1377 ASSERT(dest,(L"")); | |
| 1378 int encoded_len = CalculateBase64EscapedLen(src.GetLength()); | |
| 1379 scoped_array<char> buf(new char[encoded_len]); | |
| 1380 int len = WebSafeBase64Escape(src,src.GetLength(), buf.get(), encoded_len, fal
se); | |
| 1381 dest->SetString(buf.get(), len); | |
| 1382 } | |
| 1383 | |
| 1384 // ---------------------------------------------------------------------- | |
| 1385 // int Base64Unescape() - base64 decoder | |
| 1386 // | |
| 1387 // Check out | |
| 1388 // http://www.cis.ohio-state.edu/htbin/rfc/rfc2045.html for formal | |
| 1389 // description, but what we care about is that... | |
| 1390 // Take the encoded stuff in groups of 4 characters and turn each | |
| 1391 // character into a code 0 to 63 thus: | |
| 1392 // A-Z map to 0 to 25 | |
| 1393 // a-z map to 26 to 51 | |
| 1394 // 0-9 map to 52 to 61 | |
| 1395 // +(- for WebSafe) maps to 62 | |
| 1396 // /(_ for WebSafe) maps to 63 | |
| 1397 // There will be four numbers, all less than 64 which can be represented | |
| 1398 // by a 6 digit binary number (aaaaaa, bbbbbb, cccccc, dddddd respectively). | |
| 1399 // Arrange the 6 digit binary numbers into three bytes as such: | |
| 1400 // aaaaaabb bbbbcccc ccdddddd | |
| 1401 // Equals signs (one or two) are used at the end of the encoded block to | |
| 1402 // indicate that the text was not an integer multiple of three bytes long. | |
| 1403 // ---------------------------------------------------------------------- | |
| 1404 int Base64UnescapeInternal(const char *src, int len_src, | |
| 1405 char *dest, int len_dest, const char* unbase64) { | |
| 1406 ASSERT (unbase64, (L"")); | |
| 1407 ASSERT (src, (L"")); | |
| 1408 | |
| 1409 static const char kPad64 = '='; | |
| 1410 | |
| 1411 int decode; | |
| 1412 int destidx = 0; | |
| 1413 int state = 0; | |
| 1414 // Used an unsigned char, since ch is used as an array index (into unbase64). | |
| 1415 unsigned char ch = 0; | |
| 1416 while (len_src-- && (ch = *src++) != '\0') { | |
| 1417 if (IsSpaceA(ch)) // Skip whitespace | |
| 1418 continue; | |
| 1419 | |
| 1420 if (ch == kPad64) | |
| 1421 break; | |
| 1422 | |
| 1423 decode = unbase64[ch]; | |
| 1424 if (decode == 99) // A non-base64 character | |
| 1425 return (-1); | |
| 1426 | |
| 1427 // Four cyphertext characters decode to three bytes. | |
| 1428 // Therefore we can be in one of four states. | |
| 1429 switch (state) { | |
| 1430 case 0: | |
| 1431 // We're at the beginning of a four-character cyphertext block. | |
| 1432 // This sets the high six bits of the first byte of the | |
| 1433 // plaintext block. | |
| 1434 if (dest) { | |
| 1435 if (destidx >= len_dest) | |
| 1436 return (-1); | |
| 1437 // lint -e{734} Loss of precision | |
| 1438 dest[destidx] = static_cast<char>(decode << 2); | |
| 1439 } | |
| 1440 state = 1; | |
| 1441 break; | |
| 1442 case 1: | |
| 1443 // We're one character into a four-character cyphertext block. | |
| 1444 // This sets the low two bits of the first plaintext byte, | |
| 1445 // and the high four bits of the second plaintext byte. | |
| 1446 // However, if this is the end of data, and those four | |
| 1447 // bits are zero, it could be that those four bits are | |
| 1448 // leftovers from the encoding of data that had a length | |
| 1449 // of one mod three. | |
| 1450 if (dest) { | |
| 1451 if (destidx >= len_dest) | |
| 1452 return (-1); | |
| 1453 // lint -e{734} Loss of precision | |
| 1454 dest[destidx] |= decode >> 4; | |
| 1455 if (destidx + 1 >= len_dest) { | |
| 1456 if (0 != (decode & 0x0f)) | |
| 1457 return (-1); | |
| 1458 else | |
| 1459 ; | |
| 1460 } else { | |
| 1461 // lint -e{734} Loss of precision | |
| 1462 dest[destidx+1] = static_cast<char>((decode & 0x0f) << 4); | |
| 1463 } | |
| 1464 } | |
| 1465 destidx++; | |
| 1466 state = 2; | |
| 1467 break; | |
| 1468 case 2: | |
| 1469 // We're two characters into a four-character cyphertext block. | |
| 1470 // This sets the low four bits of the second plaintext | |
| 1471 // byte, and the high two bits of the third plaintext byte. | |
| 1472 // However, if this is the end of data, and those two | |
| 1473 // bits are zero, it could be that those two bits are | |
| 1474 // leftovers from the encoding of data that had a length | |
| 1475 // of two mod three. | |
| 1476 if (dest) { | |
| 1477 if (destidx >= len_dest) | |
| 1478 return (-1); | |
| 1479 // lint -e{734} Loss of precision | |
| 1480 dest[destidx] |= decode >> 2; | |
| 1481 if (destidx +1 >= len_dest) { | |
| 1482 if (0 != (decode & 0x03)) | |
| 1483 return (-1); | |
| 1484 else | |
| 1485 ; | |
| 1486 } else { | |
| 1487 // lint -e{734} Loss of precision | |
| 1488 dest[destidx+1] = static_cast<char>((decode & 0x03) << 6); | |
| 1489 } | |
| 1490 } | |
| 1491 destidx++; | |
| 1492 state = 3; | |
| 1493 break; | |
| 1494 case 3: | |
| 1495 // We're at the last character of a four-character cyphertext block. | |
| 1496 // This sets the low six bits of the third plaintext byte. | |
| 1497 if (dest) { | |
| 1498 if (destidx >= len_dest) | |
| 1499 return (-1); | |
| 1500 // lint -e{734} Loss of precision | |
| 1501 dest[destidx] |= decode; | |
| 1502 } | |
| 1503 destidx++; | |
| 1504 state = 0; | |
| 1505 break; | |
| 1506 | |
| 1507 default: | |
| 1508 ASSERT (false, (L"")); | |
| 1509 break; | |
| 1510 } | |
| 1511 } | |
| 1512 | |
| 1513 // We are done decoding Base-64 chars. Let's see if we ended | |
| 1514 // on a byte boundary, and/or with erroneous trailing characters. | |
| 1515 if (ch == kPad64) { // We got a pad char | |
| 1516 if ((state == 0) || (state == 1)) | |
| 1517 return (-1); // Invalid '=' in first or second position | |
| 1518 if (len_src == 0) { | |
| 1519 if (state == 2) // We run out of input but we still need another '=' | |
| 1520 return (-1); | |
| 1521 // Otherwise, we are in state 3 and only need this '=' | |
| 1522 } else { | |
| 1523 if (state == 2) { // need another '=' | |
| 1524 while ((ch = *src++) != '\0' && (len_src-- > 0)) { | |
| 1525 if (!IsSpaceA(ch)) | |
| 1526 break; | |
| 1527 } | |
| 1528 if (ch != kPad64) | |
| 1529 return (-1); | |
| 1530 } | |
| 1531 // state = 1 or 2, check if all remain padding is space | |
| 1532 while ((ch = *src++) != '\0' && (len_src-- > 0)) { | |
| 1533 if (!IsSpaceA(ch)) | |
| 1534 return(-1); | |
| 1535 } | |
| 1536 } | |
| 1537 } else { | |
| 1538 // We ended by seeing the end of the string. Make sure we | |
| 1539 // have no partial bytes lying around. Note that we | |
| 1540 // do not require trailing '=', so states 2 and 3 are okay too. | |
| 1541 if (state == 1) | |
| 1542 return (-1); | |
| 1543 } | |
| 1544 | |
| 1545 return (destidx); | |
| 1546 } | |
| 1547 | |
| 1548 int Base64Unescape(const char *src, int len_src, char *dest, int len_dest) { | |
| 1549 ASSERT(dest, (L"")); | |
| 1550 ASSERT(src, (L"")); | |
| 1551 | |
| 1552 static const char UnBase64[] = { | |
| 1553 99, 99, 99, 99, 99, 99, 99, 99, | |
| 1554 99, 99, 99, 99, 99, 99, 99, 99, | |
| 1555 99, 99, 99, 99, 99, 99, 99, 99, | |
| 1556 99, 99, 99, 99, 99, 99, 99, 99, | |
| 1557 99, 99, 99, 99, 99, 99, 99, 99, | |
| 1558 99, 99, 99, 62/*+*/, 99, 99, 99, 63/*/ */, | |
| 1559 52/*0*/, 53/*1*/, 54/*2*/, 55/*3*/, 56/*4*/, 57/*5*/, 58/*6*/, 59/*7*/, | |
| 1560 60/*8*/, 61/*9*/, 99, 99, 99, 99, 99, 99, | |
| 1561 99, 0/*A*/, 1/*B*/, 2/*C*/, 3/*D*/, 4/*E*/, 5/*F*/, 6/*G*/, | |
| 1562 7/*H*/, 8/*I*/, 9/*J*/, 10/*K*/, 11/*L*/, 12/*M*/, 13/*N*/, 14/*O*/, | |
| 1563 15/*P*/, 16/*Q*/, 17/*R*/, 18/*S*/, 19/*T*/, 20/*U*/, 21/*V*/, 22/*W*/, | |
| 1564 23/*X*/, 24/*Y*/, 25/*Z*/, 99, 99, 99, 99, 99, | |
| 1565 99, 26/*a*/, 27/*b*/, 28/*c*/, 29/*d*/, 30/*e*/, 31/*f*/, 32/*g*/, | |
| 1566 33/*h*/, 34/*i*/, 35/*j*/, 36/*k*/, 37/*l*/, 38/*m*/, 39/*n*/, 40/*o*/, | |
| 1567 41/*p*/, 42/*q*/, 43/*r*/, 44/*s*/, 45/*t*/, 46/*u*/, 47/*v*/, 48/*w*/, | |
| 1568 49/*x*/, 50/*y*/, 51/*z*/, 99, 99, 99, 99, 99, | |
| 1569 99, 99, 99, 99, 99, 99, 99, 99, | |
| 1570 99, 99, 99, 99, 99, 99, 99, 99, | |
| 1571 99, 99, 99, 99, 99, 99, 99, 99, | |
| 1572 99, 99, 99, 99, 99, 99, 99, 99, | |
| 1573 99, 99, 99, 99, 99, 99, 99, 99, | |
| 1574 99, 99, 99, 99, 99, 99, 99, 99, | |
| 1575 99, 99, 99, 99, 99, 99, 99, 99, | |
| 1576 99, 99, 99, 99, 99, 99, 99, 99, | |
| 1577 99, 99, 99, 99, 99, 99, 99, 99, | |
| 1578 99, 99, 99, 99, 99, 99, 99, 99, | |
| 1579 99, 99, 99, 99, 99, 99, 99, 99, | |
| 1580 99, 99, 99, 99, 99, 99, 99, 99, | |
| 1581 99, 99, 99, 99, 99, 99, 99, 99, | |
| 1582 99, 99, 99, 99, 99, 99, 99, 99, | |
| 1583 99, 99, 99, 99, 99, 99, 99, 99, | |
| 1584 99, 99, 99, 99, 99, 99, 99, 99 | |
| 1585 }; | |
| 1586 | |
| 1587 // The above array was generated by the following code | |
| 1588 // #include <sys/time.h> | |
| 1589 // #include <stdlib.h> | |
| 1590 // #include <string.h> | |
| 1591 // main() | |
| 1592 // { | |
| 1593 // static const char Base64[] = | |
| 1594 // "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; | |
| 1595 // char *pos; | |
| 1596 // int idx, i, j; | |
| 1597 // printf(" "); | |
| 1598 // for (i = 0; i < 255; i += 8) { | |
| 1599 // for (j = i; j < i + 8; j++) { | |
| 1600 // pos = strchr(Base64, j); | |
| 1601 // if ((pos == NULL) || (j == 0)) | |
| 1602 // idx = 99; | |
| 1603 // else | |
| 1604 // idx = pos - Base64; | |
| 1605 // if (idx == 99) | |
| 1606 // printf(" %2d, ", idx); | |
| 1607 // else | |
| 1608 // printf(" %2d/*%c*/,", idx, j); | |
| 1609 // } | |
| 1610 // printf("\n "); | |
| 1611 // } | |
| 1612 // } | |
| 1613 | |
| 1614 return Base64UnescapeInternal(src, len_src, dest, len_dest, UnBase64); | |
| 1615 } | |
| 1616 | |
| 1617 int WebSafeBase64Unescape(const char *src, int szsrc, char *dest, int szdest) { | |
| 1618 ASSERT(dest, (L"")); | |
| 1619 ASSERT(src, (L"")); | |
| 1620 | |
| 1621 static const char UnBase64[] = { | |
| 1622 99, 99, 99, 99, 99, 99, 99, 99, | |
| 1623 99, 99, 99, 99, 99, 99, 99, 99, | |
| 1624 99, 99, 99, 99, 99, 99, 99, 99, | |
| 1625 99, 99, 99, 99, 99, 99, 99, 99, | |
| 1626 99, 99, 99, 99, 99, 99, 99, 99, | |
| 1627 99, 99, 99, 99, 99, 62/*-*/, 99, 99, | |
| 1628 52/*0*/, 53/*1*/, 54/*2*/, 55/*3*/, 56/*4*/, 57/*5*/, 58/*6*/, 59/*7*/, | |
| 1629 60/*8*/, 61/*9*/, 99, 99, 99, 99, 99, 99, | |
| 1630 99, 0/*A*/, 1/*B*/, 2/*C*/, 3/*D*/, 4/*E*/, 5/*F*/, 6/*G*/, | |
| 1631 7/*H*/, 8/*I*/, 9/*J*/, 10/*K*/, 11/*L*/, 12/*M*/, 13/*N*/, 14/*O*/, | |
| 1632 15/*P*/, 16/*Q*/, 17/*R*/, 18/*S*/, 19/*T*/, 20/*U*/, 21/*V*/, 22/*W*/, | |
| 1633 23/*X*/, 24/*Y*/, 25/*Z*/, 99, 99, 99, 99, 63/*_*/, | |
| 1634 99, 26/*a*/, 27/*b*/, 28/*c*/, 29/*d*/, 30/*e*/, 31/*f*/, 32/*g*/, | |
| 1635 33/*h*/, 34/*i*/, 35/*j*/, 36/*k*/, 37/*l*/, 38/*m*/, 39/*n*/, 40/*o*/, | |
| 1636 41/*p*/, 42/*q*/, 43/*r*/, 44/*s*/, 45/*t*/, 46/*u*/, 47/*v*/, 48/*w*/, | |
| 1637 49/*x*/, 50/*y*/, 51/*z*/, 99, 99, 99, 99, 99, | |
| 1638 99, 99, 99, 99, 99, 99, 99, 99, | |
| 1639 99, 99, 99, 99, 99, 99, 99, 99, | |
| 1640 99, 99, 99, 99, 99, 99, 99, 99, | |
| 1641 99, 99, 99, 99, 99, 99, 99, 99, | |
| 1642 99, 99, 99, 99, 99, 99, 99, 99, | |
| 1643 99, 99, 99, 99, 99, 99, 99, 99, | |
| 1644 99, 99, 99, 99, 99, 99, 99, 99, | |
| 1645 99, 99, 99, 99, 99, 99, 99, 99, | |
| 1646 99, 99, 99, 99, 99, 99, 99, 99, | |
| 1647 99, 99, 99, 99, 99, 99, 99, 99, | |
| 1648 99, 99, 99, 99, 99, 99, 99, 99, | |
| 1649 99, 99, 99, 99, 99, 99, 99, 99, | |
| 1650 99, 99, 99, 99, 99, 99, 99, 99, | |
| 1651 99, 99, 99, 99, 99, 99, 99, 99, | |
| 1652 99, 99, 99, 99, 99, 99, 99, 99, | |
| 1653 99, 99, 99, 99, 99, 99, 99, 99 | |
| 1654 }; | |
| 1655 // The above array was generated by the following code | |
| 1656 // #include <sys/time.h> | |
| 1657 // #include <stdlib.h> | |
| 1658 // #include <string.h> | |
| 1659 // main() | |
| 1660 // { | |
| 1661 // static const char Base64[] = | |
| 1662 // "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"; | |
| 1663 // char *pos; | |
| 1664 // int idx, i, j; | |
| 1665 // printf(" "); | |
| 1666 // for (i = 0; i < 255; i += 8) { | |
| 1667 // for (j = i; j < i + 8; j++) { | |
| 1668 // pos = strchr(Base64, j); | |
| 1669 // if ((pos == NULL) || (j == 0)) | |
| 1670 // idx = 99; | |
| 1671 // else | |
| 1672 // idx = pos - Base64; | |
| 1673 // if (idx == 99) | |
| 1674 // printf(" %2d, ", idx); | |
| 1675 // else | |
| 1676 // printf(" %2d/*%c*/,", idx, j); | |
| 1677 // } | |
| 1678 // printf("\n "); | |
| 1679 // } | |
| 1680 // } | |
| 1681 | |
| 1682 return Base64UnescapeInternal(src, szsrc, dest, szdest, UnBase64); | |
| 1683 } | |
| 1684 | |
| 1685 bool IsHexDigit (WCHAR c) { | |
| 1686 return (((c >= L'a') && (c <= L'f')) | |
| 1687 || ((c >= L'A') && (c <= L'F')) | |
| 1688 || ((c >= L'0') && (c <= L'9'))); | |
| 1689 } | |
| 1690 | |
| 1691 int HexDigitToInt (WCHAR c) { | |
| 1692 return ((c >= L'a') ? ((c - L'a') + 10) : | |
| 1693 (c >= L'A') ? ((c - L'A') + 10) : | |
| 1694 (c - L'0')); | |
| 1695 } | |
| 1696 | |
| 1697 // ---------------------------------------------------------------------- | |
| 1698 // int QuotedPrintableUnescape() | |
| 1699 // | |
| 1700 // Check out http://www.cis.ohio-state.edu/htbin/rfc/rfc2045.html for | |
| 1701 // more details, only briefly implemented. But from the web... | |
| 1702 // Quoted-printable is an encoding method defined in the MIME | |
| 1703 // standard. It is used primarily to encode 8-bit text (such as text | |
| 1704 // that includes foreign characters) into 7-bit US ASCII, creating a | |
| 1705 // document that is mostly readable by humans, even in its encoded | |
| 1706 // form. All MIME compliant applications can decode quoted-printable | |
| 1707 // text, though they may not necessarily be able to properly display the | |
| 1708 // document as it was originally intended. As quoted-printable encoding | |
| 1709 // is implemented most commonly, printable ASCII characters (values 33 | |
| 1710 // through 126, excluding 61), tabs and spaces that do not appear at the | |
| 1711 // end of lines, and end-of-line characters are not encoded. Other | |
| 1712 // characters are represented by an equal sign (=) immediately followed | |
| 1713 // by that character's hexadecimal value. Lines that are longer than 76 | |
| 1714 // characters are shortened by line breaks, with the equal sign marking | |
| 1715 // where the breaks occurred. | |
| 1716 // | |
| 1717 // Update: we really want QuotedPrintableUnescape to conform to rfc2047, | |
| 1718 // which expands the q encoding. In particular, it specifices that _'s are | |
| 1719 // to be treated as spaces. | |
| 1720 // ---------------------------------------------------------------------- | |
| 1721 int QuotedPrintableUnescape(const WCHAR *source, int slen, | |
| 1722 WCHAR *dest, int len_dest) { | |
| 1723 ASSERT(dest, (L"")); | |
| 1724 ASSERT(source, (L"")); | |
| 1725 | |
| 1726 WCHAR* d = dest; | |
| 1727 const WCHAR* p = source; | |
| 1728 | |
| 1729 while (*p != '\0' && p < source+slen && d < dest+len_dest) { | |
| 1730 switch (*p) { | |
| 1731 case '=': | |
| 1732 if (p == source+slen-1) { | |
| 1733 // End of line, no need to print the =.. | |
| 1734 return (d-dest); | |
| 1735 } | |
| 1736 // if its valid, convert to hex and insert | |
| 1737 if (p < source+slen-2 && IsHexDigit(p[1]) && IsHexDigit(p[2])) { | |
| 1738 // lint -e{734} Loss of precision | |
| 1739 *d++ = static_cast<WCHAR>( | |
| 1740 HexDigitToInt(p[1]) * 16 + HexDigitToInt(p[2])); | |
| 1741 p += 3; | |
| 1742 } else { | |
| 1743 p++; | |
| 1744 } | |
| 1745 break; | |
| 1746 case '_': // According to rfc2047, _'s are to be treated as spaces | |
| 1747 *d++ = ' '; p++; | |
| 1748 break; | |
| 1749 default: | |
| 1750 *d++ = *p++; | |
| 1751 break; | |
| 1752 } | |
| 1753 } | |
| 1754 return (d-dest); | |
| 1755 } | |
| 1756 | |
| 1757 // TODO(omaha): currently set not to use IsCharUpper because that is relatively
slow | |
| 1758 // this is used in the QUIB; consider if we need to use IsCharUpper or a replace
ment | |
| 1759 bool String_IsUpper(TCHAR c) { | |
| 1760 return (c >= 'A' && c <= 'Z'); | |
| 1761 // return (IsCharUpper (c)); | |
| 1762 } | |
| 1763 | |
| 1764 // Replacement for the CRT toupper(c) | |
| 1765 int String_ToUpper(int c) { | |
| 1766 // If it's < 128, then convert is ourself, which is far cheaper than the syste
m conversion | |
| 1767 if (c < 128) | |
| 1768 return String_ToUpperA(static_cast<char>(c)); | |
| 1769 | |
| 1770 TCHAR * p_c = reinterpret_cast<TCHAR *>(c); | |
| 1771 int conv_c = reinterpret_cast<int>(::CharUpper(p_c)); | |
| 1772 return conv_c; | |
| 1773 } | |
| 1774 | |
| 1775 // Replacement for the CRT toupper(c) | |
| 1776 char String_ToUpperA(char c) { | |
| 1777 if (c >= 'a' && c <= 'z') return (c - ('a' - 'A')); | |
| 1778 return c; | |
| 1779 } | |
| 1780 | |
| 1781 void String_ToLower(TCHAR* str) { | |
| 1782 ASSERT1(str); | |
| 1783 ::CharLower(str); | |
| 1784 } | |
| 1785 | |
| 1786 void String_ToUpper(TCHAR* str) { | |
| 1787 ASSERT1(str); | |
| 1788 ::CharUpper(str); | |
| 1789 } | |
| 1790 | |
| 1791 // String comparison based on length | |
| 1792 // Replacement for the CRT strncmp(i) | |
| 1793 int String_StrNCmp(const TCHAR * str1, const TCHAR * str2, uint32 len, bool igno
re_case) { | |
| 1794 ASSERT(str2, (L"")); | |
| 1795 ASSERT(str1, (L"")); | |
| 1796 | |
| 1797 TCHAR c1, c2; | |
| 1798 | |
| 1799 if (len == 0) | |
| 1800 return 0; | |
| 1801 | |
| 1802 // compare each char | |
| 1803 // TODO(omaha): If we use a lot of case sensitive compares consider having 2 l
oops. | |
| 1804 do { | |
| 1805 c1 = *str1++; | |
| 1806 c2 = *str2++; | |
| 1807 if (ignore_case) { | |
| 1808 c1 = (TCHAR)String_ToLowerChar((int)(c1)); // lint !e507 Suspicious trun
cation | |
| 1809 c2 = (TCHAR)String_ToLowerChar((int)(c2)); // lint !e507 | |
| 1810 } | |
| 1811 } while ( (--len) && c1 && (c1 == c2) ); | |
| 1812 | |
| 1813 return (int)(c1 - c2); | |
| 1814 } | |
| 1815 | |
| 1816 // TODO(omaha): Why do we introduce this behaviorial difference? | |
| 1817 // Replacement for strncpy() - except ALWAYS ends string with null | |
| 1818 TCHAR* String_StrNCpy(TCHAR* destination, const TCHAR* source, uint32 len) { | |
| 1819 ASSERT (source, (L"")); | |
| 1820 ASSERT (destination, (L"")); | |
| 1821 | |
| 1822 TCHAR* result = destination; | |
| 1823 | |
| 1824 ASSERT (0 != len, (L"")); // Too short a destination for even the null cha
racter | |
| 1825 | |
| 1826 while (*source && len) { | |
| 1827 *destination++ = *source++; | |
| 1828 len--; | |
| 1829 } | |
| 1830 | |
| 1831 // If we ran out of space, back up one | |
| 1832 if (0 == len) { | |
| 1833 destination--; | |
| 1834 } | |
| 1835 | |
| 1836 // Null-terminate the string | |
| 1837 *destination = _T('\0'); | |
| 1838 | |
| 1839 return result; | |
| 1840 } | |
| 1841 | |
| 1842 // check if a string starts with another string | |
| 1843 bool String_StartsWith(const TCHAR *str, const TCHAR *start_str, | |
| 1844 bool ignore_case) { | |
| 1845 ASSERT(start_str, (L"")); | |
| 1846 ASSERT(str, (L"")); | |
| 1847 | |
| 1848 while (0 != *str) { | |
| 1849 // Check for matching characters | |
| 1850 TCHAR c1 = *str; | |
| 1851 TCHAR c2 = *start_str; | |
| 1852 | |
| 1853 // Reached the end of start_str? | |
| 1854 if (0 == c2) | |
| 1855 return true; | |
| 1856 | |
| 1857 if (ignore_case) { | |
| 1858 c1 = (TCHAR)String_ToLowerChar((int)(c1)); // lint !e507 Suspicious trun
cation | |
| 1859 c2 = (TCHAR)String_ToLowerChar((int)(c2)); // lint !e507 Suspicious trun
cation | |
| 1860 } | |
| 1861 | |
| 1862 if (c1 != c2) | |
| 1863 return false; | |
| 1864 | |
| 1865 ++str; | |
| 1866 ++start_str; | |
| 1867 } | |
| 1868 | |
| 1869 // If str is shorter than start_str, no match. If equal size, match. | |
| 1870 return 0 == *start_str; | |
| 1871 } | |
| 1872 | |
| 1873 // check if a string starts with another string | |
| 1874 bool String_StartsWithA(const char *str, const char *start_str, bool ignore_case
) { | |
| 1875 ASSERT(start_str, (L"")); | |
| 1876 ASSERT(str, (L"")); | |
| 1877 | |
| 1878 while (0 != *str) { | |
| 1879 // Check for matching characters | |
| 1880 char c1 = *str; | |
| 1881 char c2 = *start_str; | |
| 1882 | |
| 1883 // Reached the end of start_str? | |
| 1884 if (0 == c2) | |
| 1885 return true; | |
| 1886 | |
| 1887 if (ignore_case) { | |
| 1888 c1 = String_ToLowerCharAnsi(c1); | |
| 1889 c2 = String_ToLowerCharAnsi(c2); | |
| 1890 } | |
| 1891 | |
| 1892 if (c1 != c2) | |
| 1893 return false; | |
| 1894 | |
| 1895 ++str; | |
| 1896 ++start_str; | |
| 1897 } | |
| 1898 | |
| 1899 // If str is shorter than start_str, no match. If equal size, match. | |
| 1900 return 0 == *start_str; | |
| 1901 } | |
| 1902 | |
| 1903 // the wrapper version below actually increased code size as of 5/31/04 | |
| 1904 // perhaps because the int64 version is larger and in some EXE/DLLs we only need
the int32 version | |
| 1905 | |
| 1906 // converts a string to an int | |
| 1907 // Does not check for overflow | |
| 1908 // is the direct int32 version significantly faster for our usage? | |
| 1909 // int32 String_StringToInt(const TCHAR * str) { | |
| 1910 // ASSERT(str, (L"")); | |
| 1911 // return static_cast<int32>(String_StringToInt64 (str)); | |
| 1912 // } | |
| 1913 | |
| 1914 // converts a string to an int | |
| 1915 // Does not check for overflow | |
| 1916 int32 String_StringToInt(const TCHAR * str) { | |
| 1917 ASSERT(str, (L"")); | |
| 1918 | |
| 1919 int c; // current char | |
| 1920 int32 total; // current total | |
| 1921 int sign; // if '-', then negative, otherwise positive | |
| 1922 | |
| 1923 // remove spaces | |
| 1924 while ( *str == _T(' ')) | |
| 1925 ++str; | |
| 1926 | |
| 1927 c = (int)*str++; | |
| 1928 sign = c; // save sign indication | |
| 1929 if (c == _T('-') || c == _T('+')) | |
| 1930 c = (int)*str++; // skip sign | |
| 1931 | |
| 1932 total = 0; | |
| 1933 | |
| 1934 while ((c = String_CharToDigit(static_cast<TCHAR>(c))) != -1 ) { | |
| 1935 total = 10 * total + c; // accumulate digit | |
| 1936 c = *str++; // get next char | |
| 1937 } | |
| 1938 | |
| 1939 if (sign == '-') | |
| 1940 return -total; | |
| 1941 else | |
| 1942 return total; // return result, negated if necessary | |
| 1943 } | |
| 1944 | |
| 1945 // converts a string to an int64 | |
| 1946 // Does not check for overflow | |
| 1947 int64 String_StringToInt64(const TCHAR * str) { | |
| 1948 ASSERT(str, (L"")); | |
| 1949 | |
| 1950 int c; // current char | |
| 1951 int64 total; // current total | |
| 1952 int sign; | |
| 1953 | |
| 1954 while (*str == ' ') ++str; // skip space | |
| 1955 | |
| 1956 c = (int)*str++; | |
| 1957 sign = c; /* save sign indication */ | |
| 1958 if (c == '-' || c == '+') | |
| 1959 c = (int)*str++; | |
| 1960 | |
| 1961 total = 0; | |
| 1962 | |
| 1963 while ((c = String_CharToDigit(static_cast<TCHAR>(c))) != -1) { | |
| 1964 total = 10 * total + c; /* accumulate digit */ | |
| 1965 c = *str++; /* get next char */ | |
| 1966 } | |
| 1967 | |
| 1968 if (sign == '-') | |
| 1969 return -total; | |
| 1970 else | |
| 1971 return total; | |
| 1972 } | |
| 1973 | |
| 1974 // A faster version of the ::CharLower command. We first check if all characters
are in low ANSI | |
| 1975 // If so, we can convert it ourselves [which is about 10x faster] | |
| 1976 // Otherwise, ask the system to do it for us. | |
| 1977 TCHAR * String_FastToLower(TCHAR * str) { | |
| 1978 ASSERT(str, (L"")); | |
| 1979 | |
| 1980 TCHAR * p = str; | |
| 1981 while (*p) { | |
| 1982 // If we can't process it ourselves, then do it with the API | |
| 1983 if (*p > 127) | |
| 1984 return ::CharLower(str); | |
| 1985 ++p; | |
| 1986 } | |
| 1987 | |
| 1988 // If we're still here, do it ourselves | |
| 1989 p = str; | |
| 1990 while (*p) { | |
| 1991 // Lower case it | |
| 1992 if (*p >= L'A' && *p <= 'Z') | |
| 1993 *p |= 0x20; | |
| 1994 ++p; | |
| 1995 } | |
| 1996 | |
| 1997 return str; | |
| 1998 } | |
| 1999 | |
| 2000 // Convert a size_t to a CString | |
| 2001 CString sizet_to_str(const size_t & i) { | |
| 2002 CString out; | |
| 2003 out.Format(NOTRANSL(_T("%u")),i); | |
| 2004 return out; | |
| 2005 } | |
| 2006 | |
| 2007 // Convert an int to a CString | |
| 2008 CString itostr(const int i) { | |
| 2009 return String_Int64ToString(i, 10); | |
| 2010 } | |
| 2011 | |
| 2012 // Convert a uint to a CString | |
| 2013 CString itostr(const uint32 i) { | |
| 2014 return String_Int64ToString(i, 10); | |
| 2015 } | |
| 2016 | |
| 2017 // converts an int to a string | |
| 2018 // Does not check for overflow | |
| 2019 CString String_Int64ToString(int64 value, int radix) { | |
| 2020 ASSERT(radix > 0, (L"")); | |
| 2021 | |
| 2022 // Space big enough for it in binary, plus the sign | |
| 2023 TCHAR temp[66]; | |
| 2024 | |
| 2025 bool negative = false; | |
| 2026 if (value < 0) { | |
| 2027 negative = true; | |
| 2028 value = -value; | |
| 2029 } | |
| 2030 | |
| 2031 int pos = 0; | |
| 2032 | |
| 2033 // Add digits in reverse order | |
| 2034 do { | |
| 2035 TCHAR digit = (TCHAR) (value % radix); | |
| 2036 if (digit > 9) | |
| 2037 temp[pos] = L'a' + digit - 10; | |
| 2038 else | |
| 2039 temp[pos] = L'0' + digit; | |
| 2040 | |
| 2041 pos++; | |
| 2042 value /= radix; | |
| 2043 } while (value > 0); | |
| 2044 | |
| 2045 if (negative) | |
| 2046 temp[pos++] = L'-'; | |
| 2047 | |
| 2048 // Reverse it before making a CString out of it | |
| 2049 int start = 0, end = pos - 1; | |
| 2050 while (start < end) { | |
| 2051 TCHAR t = temp[start]; | |
| 2052 temp[start] = temp[end]; | |
| 2053 temp[end] = t; | |
| 2054 | |
| 2055 end--; | |
| 2056 start++; | |
| 2057 } | |
| 2058 | |
| 2059 return CString(temp, pos); | |
| 2060 } | |
| 2061 | |
| 2062 // converts an uint64 to a string | |
| 2063 // Does not check for overflow | |
| 2064 CString String_Uint64ToString(uint64 value, int radix) { | |
| 2065 ASSERT1(radix > 0); | |
| 2066 | |
| 2067 CString ret; | |
| 2068 | |
| 2069 const uint32 kMaxUint64Digits = 65; | |
| 2070 | |
| 2071 // Space big enough for it in binary | |
| 2072 TCHAR* temp = ret.GetBufferSetLength(kMaxUint64Digits); | |
| 2073 | |
| 2074 int pos = 0; | |
| 2075 | |
| 2076 // Add digits in reverse order | |
| 2077 do { | |
| 2078 TCHAR digit = static_cast<TCHAR>(value % radix); | |
| 2079 if (digit > 9) { | |
| 2080 temp[pos] = _T('a') + digit - 10; | |
| 2081 } else { | |
| 2082 temp[pos] = _T('0') + digit; | |
| 2083 } | |
| 2084 | |
| 2085 pos++; | |
| 2086 value /= radix; | |
| 2087 } while (value > 0 && pos < kMaxUint64Digits); | |
| 2088 | |
| 2089 ret.ReleaseBuffer(pos); | |
| 2090 | |
| 2091 // Reverse it before making a CString out of it | |
| 2092 ret.MakeReverse(); | |
| 2093 | |
| 2094 return ret; | |
| 2095 } | |
| 2096 | |
| 2097 // converts an double to a string specifies the number of digits after | |
| 2098 // the decimal point | |
| 2099 CString String_DoubleToString(double value, int point_digits) { | |
| 2100 int64 int_val = (int64) value; | |
| 2101 | |
| 2102 // Deal with integer part | |
| 2103 CString result(String_Int64ToString(int_val, 10)); | |
| 2104 | |
| 2105 if (point_digits > 0) { | |
| 2106 result.AppendChar(L'.'); | |
| 2107 | |
| 2108 // get the fp digits | |
| 2109 double rem_val = value - int_val; | |
| 2110 if (rem_val < 0) | |
| 2111 rem_val = -rem_val; | |
| 2112 | |
| 2113 // multiply w/ the requested number of significant digits | |
| 2114 // construct the string in place | |
| 2115 for(int i=0; i<point_digits; i++) { | |
| 2116 // TODO(omaha): I have seen 1.2 turn into 1.1999999999999, and generate th
at string. | |
| 2117 // We should round better. For now, I'll add a quick fix to favor high | |
| 2118 rem_val += 1e-12; | |
| 2119 rem_val *= 10; | |
| 2120 // Get the ones digit | |
| 2121 int64 int_rem_dig = std::min(10LL, static_cast<int64>(rem_val)); | |
| 2122 result += static_cast<TCHAR>(int_rem_dig + L'0'); | |
| 2123 rem_val = rem_val - int_rem_dig; | |
| 2124 } | |
| 2125 } | |
| 2126 | |
| 2127 return result; | |
| 2128 } | |
| 2129 | |
| 2130 double String_StringToDouble (const TCHAR *s) { | |
| 2131 ASSERT(s, (L"")); | |
| 2132 | |
| 2133 double value, power; | |
| 2134 int i = 0, sign; | |
| 2135 | |
| 2136 while (IsSpaceW(s[i])) i++; | |
| 2137 | |
| 2138 // get sign | |
| 2139 sign = (s[i] == '-') ? -1 : 1; | |
| 2140 if (s[i] == '+' || s[i] == '-') i++; | |
| 2141 | |
| 2142 for (value = 0.0; s[i] >= '0' && s[i] <= '9'; i++) | |
| 2143 value = 10.0 * value + (s[i] - '0'); | |
| 2144 | |
| 2145 if (s[i] == '.') i++; | |
| 2146 | |
| 2147 for (power = 1.0; s[i] >= '0' && s[i] <= '9'; i++) { | |
| 2148 value = 10.0 * value + (s[i] - '0'); | |
| 2149 power *= 10.0; | |
| 2150 } | |
| 2151 | |
| 2152 return sign * value / power; | |
| 2153 } | |
| 2154 | |
| 2155 // Converts a character to a digit | |
| 2156 // if the character is not a digit return -1 (same as CRT) | |
| 2157 int32 String_CharToDigit(const TCHAR c) { | |
| 2158 return ((c) >= '0' && (c) <= '9' ? (c) - '0' : -1); | |
| 2159 } | |
| 2160 | |
| 2161 bool String_IsDigit (const TCHAR c) { | |
| 2162 return ((c) >= '0' && (c) <= '9'); | |
| 2163 } | |
| 2164 | |
| 2165 TCHAR String_DigitToChar(unsigned int n) { | |
| 2166 ASSERT1(n < 10); | |
| 2167 return static_cast<TCHAR>(_T('0') + n % 10); | |
| 2168 } | |
| 2169 | |
| 2170 // Returns true if an identifier character: letter, digit, or "_" | |
| 2171 bool String_IsIdentifierChar(const TCHAR c) { | |
| 2172 return ((c >= _T('A') && c <= _T('Z')) || | |
| 2173 (c >= _T('a') && c <= _T('z')) || | |
| 2174 (c >= _T('0') && c <= _T('9')) || | |
| 2175 c == _T('_')); | |
| 2176 } | |
| 2177 | |
| 2178 // Returns true if the string has letters in it. | |
| 2179 // This is used by the keyword extractor to downweight numbers, | |
| 2180 // IDs (sequences of numbers like social security numbers), etc. | |
| 2181 bool String_HasAlphabetLetters (const TCHAR * str) { | |
| 2182 ASSERT (str, (L"")); | |
| 2183 | |
| 2184 while (*str != '\0') { | |
| 2185 // if (iswalpha (*str)) { | |
| 2186 // Note that IsCharAlpha is slower but we want to avoid the CRT | |
| 2187 if (IsCharAlpha (*str)) { | |
| 2188 return true; | |
| 2189 } | |
| 2190 ++str; | |
| 2191 } | |
| 2192 | |
| 2193 return false; | |
| 2194 } | |
| 2195 | |
| 2196 CString String_LargeIntToApproximateString(uint64 value, bool base_ten, int* pow
er) { | |
| 2197 uint32 to_one_decimal; | |
| 2198 | |
| 2199 uint32 gig = base_ten ? 1000000000 : (1<<30); | |
| 2200 uint32 gig_div_10 = base_ten ? 100000000 : (1<<30)/10; | |
| 2201 uint32 meg = base_ten ? 1000000 : (1<<20); | |
| 2202 uint32 meg_div_10 = base_ten ? 100000 : (1<<20)/10; | |
| 2203 uint32 kilo = base_ten ? 1000 : (1<<10); | |
| 2204 uint32 kilo_div_10 = base_ten ? 100 : (1<<10)/10; | |
| 2205 | |
| 2206 if (value >= gig) { | |
| 2207 if (power) *power = 3; | |
| 2208 to_one_decimal = static_cast<uint32>(value / gig_div_10); | |
| 2209 } else if (value >= meg) { | |
| 2210 if (power) *power = 2; | |
| 2211 to_one_decimal = static_cast<uint32>(value / meg_div_10); | |
| 2212 } else if (value >= kilo) { | |
| 2213 if (power) *power = 1; | |
| 2214 to_one_decimal = static_cast<uint32>(value / kilo_div_10); | |
| 2215 } else { | |
| 2216 if (power) *power = 0; | |
| 2217 return String_Int64ToString(static_cast<uint32>(value), 10 /*radix*/); | |
| 2218 } | |
| 2219 | |
| 2220 uint32 whole_part = to_one_decimal / 10; | |
| 2221 | |
| 2222 if (whole_part < 10) | |
| 2223 return Show(0.1 * static_cast<double>(to_one_decimal), 1); | |
| 2224 | |
| 2225 return String_Int64ToString(whole_part, 10 /*radix*/); | |
| 2226 } | |
| 2227 | |
| 2228 int String_FindString(const TCHAR *s1, const TCHAR *s2) { | |
| 2229 ASSERT(s2, (L"")); | |
| 2230 ASSERT(s1, (L"")); | |
| 2231 | |
| 2232 // Naive implementation, but still oodles better than ATL's implementation | |
| 2233 // (which deals with variable character widths---we don't). | |
| 2234 | |
| 2235 const TCHAR *found = _tcsstr(s1, s2); | |
| 2236 if (NULL == found) | |
| 2237 return -1; | |
| 2238 | |
| 2239 return found - s1; | |
| 2240 } | |
| 2241 | |
| 2242 int String_FindString(const TCHAR *s1, const TCHAR *s2, int start_pos) { | |
| 2243 ASSERT(s2, (L"")); | |
| 2244 ASSERT(s1, (L"")); | |
| 2245 | |
| 2246 // Naive implementation, but still oodles better than ATL's implementation | |
| 2247 // (which deals with variable character widths---we don't). | |
| 2248 | |
| 2249 int skip = start_pos; | |
| 2250 | |
| 2251 const TCHAR *s = s1; | |
| 2252 while (skip && *s) { | |
| 2253 ++s; | |
| 2254 --skip; | |
| 2255 } | |
| 2256 if (!(*s)) | |
| 2257 return -1; | |
| 2258 | |
| 2259 const TCHAR *found = _tcsstr(s, s2); | |
| 2260 if (NULL == found) | |
| 2261 return -1; | |
| 2262 | |
| 2263 return found - s1; | |
| 2264 } | |
| 2265 | |
| 2266 int String_FindChar(const TCHAR *str, const TCHAR c) { | |
| 2267 ASSERT (str, (L"")); | |
| 2268 const TCHAR *s = str; | |
| 2269 while (*s) { | |
| 2270 if (*s == c) | |
| 2271 return s - str; | |
| 2272 ++s; | |
| 2273 } | |
| 2274 | |
| 2275 return -1; | |
| 2276 } | |
| 2277 | |
| 2278 // taken from wcsrchr, modified to behave in the CString way | |
| 2279 int String_ReverseFindChar(const TCHAR * str,TCHAR c) { | |
| 2280 ASSERT (str, (L"")); | |
| 2281 TCHAR *start = (TCHAR *)str; | |
| 2282 | |
| 2283 while (*str++) /* find end of string */ | |
| 2284 ; | |
| 2285 /* search towards front */ | |
| 2286 while (--str != start && *str != (TCHAR)c) | |
| 2287 ; | |
| 2288 | |
| 2289 if (*str == (TCHAR)c) /* found ? */ | |
| 2290 return( str - start ); | |
| 2291 | |
| 2292 return -1; | |
| 2293 } | |
| 2294 | |
| 2295 int String_FindChar(const TCHAR *str, const TCHAR c, int start_pos) { | |
| 2296 ASSERT (str, (L"")); | |
| 2297 int n = 0; | |
| 2298 const TCHAR *s = str; | |
| 2299 while (*s) { | |
| 2300 if (n++ >= start_pos && *s == c) | |
| 2301 return s - str; | |
| 2302 ++s; | |
| 2303 } | |
| 2304 | |
| 2305 return -1; | |
| 2306 } | |
| 2307 | |
| 2308 bool String_Contains(const TCHAR *s1, const TCHAR *s2) { | |
| 2309 ASSERT(s2, (L"")); | |
| 2310 ASSERT(s1, (L"")); | |
| 2311 | |
| 2312 return -1 != String_FindString(s1, s2); | |
| 2313 } | |
| 2314 | |
| 2315 void String_ReplaceChar(TCHAR *str, TCHAR old_char, TCHAR new_char) { | |
| 2316 ASSERT (str, (L"")); | |
| 2317 while (*str) { | |
| 2318 if (*str == old_char) | |
| 2319 *str = new_char; | |
| 2320 | |
| 2321 ++str; | |
| 2322 } | |
| 2323 } | |
| 2324 | |
| 2325 void String_ReplaceChar(CString & str, TCHAR old_char, TCHAR new_char) { | |
| 2326 String_ReplaceChar (str.GetBuffer(), old_char, new_char); | |
| 2327 str.ReleaseBuffer(); | |
| 2328 } | |
| 2329 | |
| 2330 int ReplaceCString (CString & src, const TCHAR *from, const TCHAR *to) { | |
| 2331 ASSERT(to, (L"")); | |
| 2332 ASSERT(from, (L"")); | |
| 2333 | |
| 2334 return ReplaceCString(src, from, lstrlen(from), to, lstrlen(to), kRepMax); | |
| 2335 } | |
| 2336 | |
| 2337 // A special version of the replace function which takes advantage of CString pr
operties | |
| 2338 // to make it much faster when the string grows | |
| 2339 // 1) It will resize the string in place if possible. Even if it has to 'grow' t
he string | |
| 2340 // 2) It will cutoff after a maximum number of matches | |
| 2341 // 3) It expects sizing data to be passed to it | |
| 2342 int ReplaceCString (CString & src, const TCHAR *from, unsigned int from_len, | |
| 2343 const TCHAR *to, unsigned int to_len, | |
| 2344 unsigned int max_matches) { | |
| 2345 ASSERT (from, (L"")); | |
| 2346 ASSERT (to, (L"")); | |
| 2347 ASSERT (from[0] != '\0', (L"")); | |
| 2348 int i = 0, j = 0; | |
| 2349 unsigned int matches = 0; | |
| 2350 | |
| 2351 // Keep track of the matches, it's easier than recalculating them | |
| 2352 unsigned int match_pos_stack[kExpectedMaxReplaceMatches]; | |
| 2353 | |
| 2354 // We might need to dynamically allocate space for the matches | |
| 2355 bool dynamic_allocate = false; | |
| 2356 unsigned int * match_pos = (unsigned int*)match_pos_stack; | |
| 2357 unsigned int max_match_size = kExpectedMaxReplaceMatches; | |
| 2358 | |
| 2359 // Is the string getting bigger? | |
| 2360 bool longer = to_len > from_len; | |
| 2361 | |
| 2362 // don't compute the lengths unless we know we need to | |
| 2363 int src_len = src.GetLength(); | |
| 2364 int cur_len = src_len; | |
| 2365 | |
| 2366 // Trick: We temporarily add 1 extra character to the string. The first char f
rom the from | |
| 2367 // string. This way we can avoid searching for NULL, since we are guaranteed t
o find it | |
| 2368 TCHAR * buffer = src.GetBufferSetLength(src_len+1); | |
| 2369 const TCHAR from_0 = from[0]; | |
| 2370 buffer[src_len] = from[0]; | |
| 2371 | |
| 2372 while (i < cur_len) { | |
| 2373 // If we have too many matches, then re-allocate to a dynamic buffer that is | |
| 2374 // twice as big as the one we are currently using | |
| 2375 if (longer && (matches == max_match_size)) { | |
| 2376 // Double the buffer size, and copy it over | |
| 2377 unsigned int * temp = new unsigned int[max_match_size * 2]; | |
| 2378 memcpy(temp, match_pos, matches * sizeof(unsigned int)); | |
| 2379 if (dynamic_allocate) | |
| 2380 delete [] match_pos; // lint !e424 Inappropriate deallocation | |
| 2381 match_pos = temp; | |
| 2382 | |
| 2383 max_match_size *= 2; | |
| 2384 dynamic_allocate = true; | |
| 2385 } | |
| 2386 | |
| 2387 // If we have the maximum number of matches already, then stop | |
| 2388 if (matches >= max_matches) { | |
| 2389 break; | |
| 2390 } | |
| 2391 | |
| 2392 // For each potential match | |
| 2393 // Note: oddly enough, this is the most expensive line in the function under
normal usage. So I am optimizing the heck out of it | |
| 2394 TCHAR * buf_ptr = buffer + i; | |
| 2395 while (*buf_ptr != from_0) { ++buf_ptr; } | |
| 2396 i = buf_ptr - buffer; | |
| 2397 | |
| 2398 // We're done! | |
| 2399 if (i >= cur_len) | |
| 2400 break; | |
| 2401 | |
| 2402 // buffer is not NULL terminated, we replaced the NULL above | |
| 2403 while (i < cur_len && buffer[i] && buffer[i] == from[j]) { | |
| 2404 ++i; ++j; | |
| 2405 if (from[j] == '\0') { // found match | |
| 2406 | |
| 2407 if (!longer) { // modify in place | |
| 2408 | |
| 2409 memcpy ((byte *)(buffer+i) - (sizeof (TCHAR) * from_len), (byte *)to,
sizeof (TCHAR) * to_len); | |
| 2410 // if there are often a lot of replacements, it would be faster to cre
ate a new string instead | |
| 2411 // of using memmove | |
| 2412 | |
| 2413 // TODO(omaha): - memmove will cause n^2 behavior in strings with mult
iple matches since it will be moved many times... | |
| 2414 if (to_len < from_len) { memmove ((byte *)(buffer+i) - (sizeof (TCHAR)
* (from_len - to_len)), | |
| 2415 (byte *)(buffer+i), (src_len - i + 1)
* sizeof (TCHAR)); } | |
| 2416 | |
| 2417 i -= (from_len - to_len); | |
| 2418 cur_len -= (from_len - to_len); | |
| 2419 } | |
| 2420 else | |
| 2421 match_pos[matches] = i - from_len; | |
| 2422 | |
| 2423 ++matches; | |
| 2424 | |
| 2425 break; | |
| 2426 } | |
| 2427 } | |
| 2428 | |
| 2429 j = 0; | |
| 2430 } | |
| 2431 | |
| 2432 if (to_len <= from_len) | |
| 2433 src_len -= matches * (from_len - to_len); | |
| 2434 | |
| 2435 // if the new string is longer we do another pass now that we know how long th
e new string needs to be | |
| 2436 if (matches && to_len > from_len) { | |
| 2437 src.ReleaseBuffer(src_len); | |
| 2438 | |
| 2439 int new_len = src_len + matches * (to_len - from_len); | |
| 2440 buffer = src.GetBufferSetLength(new_len); | |
| 2441 | |
| 2442 // It's easier to assemble it backwards... | |
| 2443 int temp_end = new_len; | |
| 2444 for(i = matches-1; i >= 0; --i) { | |
| 2445 // Figure out where the trailing portion isthe trailing portion | |
| 2446 int len = src_len - match_pos[i] - from_len; | |
| 2447 int start = match_pos[i] + from_len; | |
| 2448 int dest = temp_end - len; | |
| 2449 memmove(buffer+dest, buffer+start, (len) * sizeof(TCHAR)); | |
| 2450 | |
| 2451 // copy the new item | |
| 2452 memcpy(buffer + dest - to_len, to, to_len * sizeof(TCHAR)); | |
| 2453 | |
| 2454 // Update the pointers | |
| 2455 temp_end = dest - to_len; | |
| 2456 src_len = match_pos[i]; | |
| 2457 | |
| 2458 } | |
| 2459 src_len = new_len; | |
| 2460 } | |
| 2461 | |
| 2462 src.ReleaseBuffer(src_len); | |
| 2463 if (dynamic_allocate) | |
| 2464 delete [] match_pos; // lint !e673 Possibly inappropriate deallocation | |
| 2465 | |
| 2466 return matches; | |
| 2467 } | |
| 2468 | |
| 2469 /* | |
| 2470 The following 2 functions will do replacement on TCHAR* directly. They is cur
rently unused. | |
| 2471 Feel free to put it back if you need to. | |
| 2472 */ | |
| 2473 int ReplaceString (TCHAR *src, const TCHAR *from, const TCHAR *to, TCHAR **out,
int *out_len) { | |
| 2474 ASSERT(out_len, (L"")); | |
| 2475 ASSERT(out, (L"")); | |
| 2476 ASSERT(to, (L"")); | |
| 2477 ASSERT(from, (L"")); | |
| 2478 ASSERT(src, (L"")); | |
| 2479 | |
| 2480 bool created_new_string; | |
| 2481 int matches = ReplaceStringMaybeInPlace (src, from, to, out, out_len, &created
_new_string); | |
| 2482 if (!created_new_string) { | |
| 2483 *out = new TCHAR [(*out_len)+1]; | |
| 2484 if (!(*out)) { *out = src; return 0; } | |
| 2485 _tcscpy_s(*out, *out_len + 1, src); | |
| 2486 } | |
| 2487 | |
| 2488 return matches; | |
| 2489 } | |
| 2490 | |
| 2491 int ReplaceStringMaybeInPlace (TCHAR *src, const TCHAR *from, const TCHAR *to, T
CHAR **out, int *out_len, bool *created_new_string) { | |
| 2492 ASSERT (created_new_string, (L"")); | |
| 2493 ASSERT (out_len, (L"")); | |
| 2494 ASSERT (src, (L"")); | |
| 2495 ASSERT (from, (L"")); | |
| 2496 ASSERT (to, (L"")); | |
| 2497 ASSERT (out, (L"")); | |
| 2498 ASSERT (from[0] != '\0', (L"")); | |
| 2499 int i = 0, j = 0; | |
| 2500 int matches = 0; | |
| 2501 | |
| 2502 // don't compute the lengths unless we know we need to | |
| 2503 int from_len = -1, to_len = -1, src_len = -1; | |
| 2504 | |
| 2505 *created_new_string = false; | |
| 2506 *out = src; | |
| 2507 | |
| 2508 while (src[i]) { | |
| 2509 while (src[i] && src[i] != from[0]) { i++; } | |
| 2510 while (src[i] && src[i] == from[j]) { | |
| 2511 i++; j++; | |
| 2512 if (from[j] == '\0') { // found match | |
| 2513 if (from_len == -1) { // compute lengths if not known | |
| 2514 from_len = lstrlen (from); | |
| 2515 to_len = lstrlen (to); | |
| 2516 src_len = lstrlen (src); | |
| 2517 } | |
| 2518 | |
| 2519 matches++; | |
| 2520 | |
| 2521 if (to_len <= from_len) { // modify in place | |
| 2522 memcpy ((byte *)(src+i) - (sizeof (TCHAR) * from_len), (byte *)to, siz
eof (TCHAR) * to_len); | |
| 2523 // if there are often a lot of replacements, it would be faster to cre
ate a new string instead | |
| 2524 // of using memmove | |
| 2525 if (to_len < from_len) { memmove ((byte *)(src+i) - (sizeof (TCHAR) *
(from_len - to_len)), | |
| 2526 (byte *)(src+i), (src_len - i + 1) *
sizeof (TCHAR)); } | |
| 2527 i -= (from_len - to_len); | |
| 2528 } | |
| 2529 | |
| 2530 break; | |
| 2531 } | |
| 2532 } | |
| 2533 | |
| 2534 j = 0; | |
| 2535 } | |
| 2536 | |
| 2537 *out_len = i; | |
| 2538 | |
| 2539 // if the new string is longer we do another pass now that we know how long th
e new string needs to be | |
| 2540 if (matches && to_len > from_len) { | |
| 2541 ASSERT (src_len == i, (L"")); | |
| 2542 int new_len = src_len + matches * (to_len - from_len); | |
| 2543 *out = new TCHAR [new_len+1]; | |
| 2544 if (!(*out)) { *out = src; *out_len = lstrlen (src); return 0; } | |
| 2545 *created_new_string = true; | |
| 2546 i = 0; j = 0; int k = 0; | |
| 2547 | |
| 2548 while (src[i]) { | |
| 2549 while (src[i] && src[i] != from[0]) { | |
| 2550 (*out)[k++] = src[i++]; | |
| 2551 } | |
| 2552 while (src[i] && src[i] == from[j]) { | |
| 2553 (*out)[k++] = src[i++]; | |
| 2554 j++; | |
| 2555 | |
| 2556 if (from[j] == '\0') { // found match | |
| 2557 k -= from_len; | |
| 2558 ASSERT (k >= 0, (L"")); | |
| 2559 memcpy ((byte *)((*out)+k), (byte *)to, sizeof (TCHAR) * to_le
n); | |
| 2560 k += to_len; | |
| 2561 break; | |
| 2562 } | |
| 2563 } | |
| 2564 | |
| 2565 j = 0; | |
| 2566 } | |
| 2567 | |
| 2568 (*out)[k] = '\0'; | |
| 2569 ASSERT (k == new_len, (L"")); | |
| 2570 *out_len = new_len; | |
| 2571 } | |
| 2572 | |
| 2573 return matches; | |
| 2574 } | |
| 2575 | |
| 2576 /**************************************************************************** | |
| 2577 * wcstol, wcstoul(nptr,endptr,ibase) - Convert ascii string to long un/signed in
t. | |
| 2578 * | |
| 2579 * modified from: | |
| 2580 * | |
| 2581 * wcstol.c - Contains C runtimes wcstol and wcstoul | |
| 2582 * | |
| 2583 * Copyright (c) Microsoft Corporation. All rights reserved. | |
| 2584 * | |
| 2585 * Purpose: | |
| 2586 * Convert an ascii string to a long 32-bit value. The base | |
| 2587 * used for the caculations is supplied by the caller. The base | |
| 2588 * must be in the range 0, 2-36. If a base of 0 is supplied, the | |
| 2589 * ascii string must be examined to determine the base of the | |
| 2590 * number: | |
| 2591 * (a) First char = '0', second char = 'x' or 'X', | |
| 2592 * use base 16. | |
| 2593 * (b) First char = '0', use base 8 | |
| 2594 * (c) First char in range '1' - '9', use base 10. | |
| 2595 * | |
| 2596 * If the 'endptr' value is non-NULL, then wcstol/wcstoul places | |
| 2597 * a pointer to the terminating character in this value. | |
| 2598 * See ANSI standard for details | |
| 2599 * | |
| 2600 *Entry: | |
| 2601 * nptr == NEAR/FAR pointer to the start of string. | |
| 2602 * endptr == NEAR/FAR pointer to the end of the string. | |
| 2603 * ibase == integer base to use for the calculations. | |
| 2604 * | |
| 2605 * string format: [whitespace] [sign] [0] [x] [digits/letters] | |
| 2606 * | |
| 2607 *Exit: | |
| 2608 * Good return: | |
| 2609 * result | |
| 2610 * | |
| 2611 * Overflow return: | |
| 2612 * wcstol -- LONG_MAX or LONG_MIN | |
| 2613 * wcstoul -- ULONG_MAX | |
| 2614 * wcstol/wcstoul -- errno == ERANGE | |
| 2615 * | |
| 2616 * No digits or bad base return: | |
| 2617 * 0 | |
| 2618 * endptr = nptr* | |
| 2619 * | |
| 2620 *Exceptions: | |
| 2621 * None. | |
| 2622 * | |
| 2623 *******************************************************************************/ | |
| 2624 | |
| 2625 // flag values */ | |
| 2626 #define kFlUnsigned (1) // wcstoul called */ | |
| 2627 #define kFlNeg (2) // negative sign found */ | |
| 2628 #define kFlOverflow (4) // overflow occured */ | |
| 2629 #define kFlReaddigit (8) // we've read at least one correct digit */ | |
| 2630 | |
| 2631 static unsigned long __cdecl wcstoxl (const wchar_t *nptr, wchar_t **endptr, int
ibase, int flags) { | |
| 2632 ASSERT(nptr, (L"")); | |
| 2633 | |
| 2634 const wchar_t *p; | |
| 2635 wchar_t c; | |
| 2636 unsigned long number; | |
| 2637 unsigned digval; | |
| 2638 unsigned long maxval; | |
| 2639 // #ifdef _MT | |
| 2640 // pthreadlocinfo ptloci = _getptd()->ptlocinfo; | |
| 2641 | |
| 2642 // if ( ptloci != __ptlocinfo ) | |
| 2643 // ptloci = __updatetlocinfo(); | |
| 2644 // #endif // _MT */ | |
| 2645 | |
| 2646 p = nptr; // p is our scanning pointer */ | |
| 2647 number = 0; // start with zero */ | |
| 2648 | |
| 2649 c = *p++; // read char */ | |
| 2650 | |
| 2651 // #ifdef _MT | |
| 2652 // while ( __iswspace_mt(ptloci, c) ) | |
| 2653 // #else // _MT */ | |
| 2654 while (c == ' ') | |
| 2655 // while ( iswspace(c) ) | |
| 2656 // #endif // _MT */ | |
| 2657 c = *p++; // skip whitespace */ | |
| 2658 | |
| 2659 if (c == '-') { | |
| 2660 flags |= kFlNeg; // remember minus sign */ | |
| 2661 c = *p++; | |
| 2662 } | |
| 2663 else if (c == '+') | |
| 2664 c = *p++; // skip sign */ | |
| 2665 | |
| 2666 if (ibase < 0 || ibase == 1 || ibase > 36) { | |
| 2667 // bad base! */ | |
| 2668 if (endptr) | |
| 2669 // store beginning of string in endptr */ | |
| 2670 *endptr = const_cast<wchar_t *>(nptr); | |
| 2671 return 0L; // return 0 */ | |
| 2672 } | |
| 2673 else if (ibase == 0) { | |
| 2674 // determine base free-lance, based on first two chars of | |
| 2675 // string */ | |
| 2676 if (String_CharToDigit(c) != 0) | |
| 2677 ibase = 10; | |
| 2678 else if (*p == L'x' || *p == L'X') | |
| 2679 ibase = 16; | |
| 2680 else | |
| 2681 ibase = 8; | |
| 2682 } | |
| 2683 | |
| 2684 if (ibase == 16) { | |
| 2685 // we might have 0x in front of number; remove if there */ | |
| 2686 if (String_CharToDigit(c) == 0 && (*p == L'x' || *p == L'X')) { | |
| 2687 ++p; | |
| 2688 c = *p++; // advance past prefix */ | |
| 2689 } | |
| 2690 } | |
| 2691 | |
| 2692 // if our number exceeds this, we will overflow on multiply */ | |
| 2693 maxval = ULONG_MAX / ibase; | |
| 2694 | |
| 2695 for (;;) { // exit in middle of loop */ | |
| 2696 | |
| 2697 // convert c to value */ | |
| 2698 if ( (digval = String_CharToDigit(c)) != (unsigned) -1 ) | |
| 2699 ; | |
| 2700 else if (c >= 'A' && c <= 'F') { digval = c - 'A' + 10; } | |
| 2701 else if (c >= 'a' && c <= 'f') { digval = c - 'a' + 10; } | |
| 2702 // else if ( __ascii_iswalpha(c)) | |
| 2703 // digval = __ascii_towupper(c) - L'A' + 10; | |
| 2704 else | |
| 2705 break; | |
| 2706 | |
| 2707 if (digval >= (unsigned)ibase) | |
| 2708 break; // exit loop if bad digit found */ | |
| 2709 | |
| 2710 // record the fact we have read one digit */ | |
| 2711 flags |= kFlReaddigit; | |
| 2712 | |
| 2713 // we now need to compute number = number * base + digval, | |
| 2714 // but we need to know if overflow occured. This requires | |
| 2715 // a tricky pre-check. */ | |
| 2716 | |
| 2717 if (number < maxval || (number == maxval && | |
| 2718 (unsigned long)digval <= ULONG_MAX % ibase)) { | |
| 2719 // we won't overflow, go ahead and multiply */ | |
| 2720 number = number * ibase + digval; | |
| 2721 } | |
| 2722 else { | |
| 2723 // we would have overflowed -- set the overflow flag */ | |
| 2724 flags |= kFlOverflow; | |
| 2725 } | |
| 2726 | |
| 2727 c = *p++; // read next digit */ | |
| 2728 } | |
| 2729 | |
| 2730 --p; // point to place that stopped scan */ | |
| 2731 | |
| 2732 if (!(flags & kFlReaddigit)) { | |
| 2733 // no number there; return 0 and point to beginning of string */ | |
| 2734 if (endptr) | |
| 2735 // store beginning of string in endptr later on */ | |
| 2736 p = nptr; | |
| 2737 number = 0L; // return 0 */ | |
| 2738 } | |
| 2739 // lint -save -e648 -e650 Overflow in -LONG_MIN | |
| 2740 #pragma warning(push) | |
| 2741 // C4287 : unsigned/negative constant mismatch. | |
| 2742 // The offending expression is number > -LONG_MIN. -LONG_MIN overflows and | |
| 2743 // technically -LONG_MIN == LONG_MIN == 0x80000000. It should actually | |
| 2744 // result in a compiler warning, such as C4307: integral constant overflow. | |
| 2745 // Anyway, in the expression (number > -LONG_MIN) the right operand is converted | |
| 2746 // to unsigned long, so the expression is actually evaluated as | |
| 2747 // number > 0x80000000UL. The code is probably correct but subtle, to say the | |
| 2748 // least. | |
| 2749 #pragma warning(disable : 4287) | |
| 2750 else if ( (flags & kFlOverflow) || | |
| 2751 ( !(flags & kFlUnsigned) && | |
| 2752 ( ( (flags & kFlNeg) && (number > -LONG_MIN) ) || | |
| 2753 ( !(flags & kFlNeg) && (number > LONG_MAX) ) ) ) ) | |
| 2754 { | |
| 2755 // overflow or signed overflow occurred */ | |
| 2756 // errno = ERANGE; | |
| 2757 if ( flags & kFlUnsigned ) | |
| 2758 number = ULONG_MAX; | |
| 2759 else if ( flags & kFlNeg ) | |
| 2760 // lint -e{648, 650} Overflow in -LONG_MIN | |
| 2761 number = (unsigned long)(-LONG_MIN); | |
| 2762 else | |
| 2763 number = LONG_MAX; | |
| 2764 } | |
| 2765 #pragma warning(pop) | |
| 2766 // lint -restore | |
| 2767 | |
| 2768 if (endptr != NULL) | |
| 2769 // store pointer to char that stopped the scan */ | |
| 2770 *endptr = const_cast<wchar_t *>(p); | |
| 2771 | |
| 2772 if (flags & kFlNeg) | |
| 2773 // negate result if there was a neg sign */ | |
| 2774 number = (unsigned long)(-(long)number); | |
| 2775 | |
| 2776 return number; // done. */ | |
| 2777 } | |
| 2778 | |
| 2779 long __cdecl Wcstol (const wchar_t *nptr, wchar_t **endptr, int ibase) { | |
| 2780 ASSERT(endptr, (L"")); | |
| 2781 ASSERT(nptr, (L"")); | |
| 2782 | |
| 2783 return (long) wcstoxl(nptr, endptr, ibase, 0); | |
| 2784 } | |
| 2785 | |
| 2786 unsigned long __cdecl Wcstoul (const wchar_t *nptr, wchar_t **endptr, int ibase)
{ | |
| 2787 // endptr may be NULL | |
| 2788 ASSERT(nptr, (L"")); | |
| 2789 | |
| 2790 return wcstoxl(nptr, endptr, ibase, kFlUnsigned); | |
| 2791 } | |
| 2792 | |
| 2793 // Functions on arrays of strings | |
| 2794 | |
| 2795 // Returns true iff s is in the array strings (case-insensitive compare) | |
| 2796 bool String_MemberOf(const TCHAR* const* strings, const TCHAR* s) { | |
| 2797 ASSERT(s, (L"")); | |
| 2798 // strings may be NULL | |
| 2799 | |
| 2800 const int s_length = lstrlen(s); | |
| 2801 if (strings == NULL) | |
| 2802 return false; | |
| 2803 for (; *strings != NULL; strings++) { | |
| 2804 if (0 == String_StrNCmp(*strings, s, s_length, true)) { | |
| 2805 return true; // Found equal string | |
| 2806 } | |
| 2807 } | |
| 2808 return false; | |
| 2809 } | |
| 2810 | |
| 2811 // Returns index of s in the array of strings (or -1 for missing) (case-insensit
ive compare) | |
| 2812 int String_IndexOf(const TCHAR* const* strings, const TCHAR* s) { | |
| 2813 ASSERT(s, (L"")); | |
| 2814 // strings may be NULL | |
| 2815 | |
| 2816 const int s_length = lstrlen(s); | |
| 2817 if (strings == NULL) | |
| 2818 return -1; | |
| 2819 for (int i = 0; *strings != NULL; i++, strings++) { | |
| 2820 if (0 == String_StrNCmp(*strings, s, s_length, true)) { | |
| 2821 return i; // Found equal string | |
| 2822 } | |
| 2823 } | |
| 2824 return -1; | |
| 2825 } | |
| 2826 | |
| 2827 // The internal format is a int64. | |
| 2828 time64 StringToTime(const CString & time) { | |
| 2829 return static_cast<time64>(String_StringToInt64(time)); | |
| 2830 } | |
| 2831 | |
| 2832 // See above comment from StringToTime. | |
| 2833 // Just show it as a INT64 for now | |
| 2834 // NOTE: this will truncating it to INT64, which may lop off some times in the f
uture | |
| 2835 CString TimeToString(const time64 & time) { | |
| 2836 return String_Int64ToString(static_cast<int64>(time), 10); | |
| 2837 } | |
| 2838 | |
| 2839 const TCHAR *FindStringASpaceStringB (const TCHAR *s, const TCHAR *a, const TCHA
R *b) { | |
| 2840 ASSERT(s, (L"")); | |
| 2841 ASSERT(a, (L"")); | |
| 2842 ASSERT(b, (L"")); | |
| 2843 | |
| 2844 const TCHAR *search_from = s; | |
| 2845 const TCHAR *pos; | |
| 2846 while (*search_from && (pos = stristrW (search_from, a)) != NULL) { | |
| 2847 const TCHAR *start = pos; | |
| 2848 pos += lstrlen(a); | |
| 2849 search_from = pos; | |
| 2850 while (*pos == ' ' || *pos == '\t') pos++; | |
| 2851 if (!String_StrNCmp (pos, b, lstrlen(b), true)) return start; | |
| 2852 } | |
| 2853 | |
| 2854 return 0; | |
| 2855 } | |
| 2856 | |
| 2857 bool IsAlphaA (const char c) { | |
| 2858 return ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')); | |
| 2859 } | |
| 2860 | |
| 2861 bool IsDigitA (const char c) { | |
| 2862 return (c >= '0' && c <= '9'); | |
| 2863 } | |
| 2864 | |
| 2865 void SafeStrCat (TCHAR *dest, const TCHAR *src, int dest_buffer_len) { | |
| 2866 _tcscat_s(dest, dest_buffer_len, src); | |
| 2867 } | |
| 2868 | |
| 2869 // extracts next float in a string | |
| 2870 // skips any non-digit characters | |
| 2871 // return position after end of float | |
| 2872 const TCHAR *ExtractNextDouble (const TCHAR *s, double *f) { | |
| 2873 ASSERT (f, (L"")); | |
| 2874 ASSERT (s, (L"")); | |
| 2875 | |
| 2876 CString num; | |
| 2877 while (*s && !String_IsDigit (*s)) s++; | |
| 2878 while (*s && (*s == '.' || String_IsDigit (*s))) { num += *s; s++; } | |
| 2879 ASSERT (num.GetLength(), (L"")); | |
| 2880 *f = String_StringToDouble (num); | |
| 2881 return s; | |
| 2882 } | |
| 2883 | |
| 2884 TCHAR *String_PathFindExtension(const TCHAR *path) { | |
| 2885 ASSERT(path, (L"")); | |
| 2886 | |
| 2887 // Documentation says PathFindExtension string must be of max length | |
| 2888 // MAX_PATH but a trusted tester hit the ASSERT and we don't really | |
| 2889 // need it here, so commented out. We can't address where it is | |
| 2890 // called because it's called from ATL code. | |
| 2891 // ASSERT(lstrlen(path)<=MAX_PATH, (L"")); | |
| 2892 | |
| 2893 // point to terminating NULL | |
| 2894 const TCHAR *ret = path + lstrlen(path); | |
| 2895 const TCHAR *pos = ret; | |
| 2896 | |
| 2897 while (--pos >= path) { | |
| 2898 if (*pos == '.') | |
| 2899 return const_cast<TCHAR *>(pos); | |
| 2900 } | |
| 2901 | |
| 2902 return const_cast<TCHAR *>(ret); | |
| 2903 } | |
| 2904 | |
| 2905 char String_ToLowerCharAnsi(char c) { | |
| 2906 if (c >= 'A' && c <= 'Z') return (c + ('a' - 'A')); | |
| 2907 return c; | |
| 2908 } | |
| 2909 | |
| 2910 int String_ToLowerChar(int c) { | |
| 2911 // If it's < 128, then convert is ourself, which is far cheaper than the syste
m conversion | |
| 2912 if (c < 128) | |
| 2913 return String_ToLowerCharAnsi(static_cast<char>(c)); | |
| 2914 | |
| 2915 return Char_ToLower(static_cast<TCHAR>(c)); | |
| 2916 } | |
| 2917 | |
| 2918 | |
| 2919 bool String_PathRemoveFileSpec(TCHAR *path) { | |
| 2920 ASSERT (path, (L"")); | |
| 2921 | |
| 2922 int len, pos; | |
| 2923 len = pos = lstrlen (path); | |
| 2924 | |
| 2925 // You might think that the SHLWAPI API does not change "c:\windows" -> "c:\" | |
| 2926 // when c:\windows is a directory, but it does. | |
| 2927 | |
| 2928 // If we don't want to match this weird API we can use the following to check | |
| 2929 // for directories: | |
| 2930 | |
| 2931 // Check if we are already a directory. | |
| 2932 WIN32_FILE_ATTRIBUTE_DATA attrs; | |
| 2933 // Failure (if file does not exist) is OK. | |
| 2934 BOOL success = GetFileAttributesEx(path, GetFileExInfoStandard, &attrs); | |
| 2935 UTIL_LOG(L4, (_T("[String_PathRemoveFileSpec][path %s][success %d][dir %d]"), | |
| 2936 path, | |
| 2937 success, | |
| 2938 attrs.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)); | |
| 2939 if (success && (attrs.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) { | |
| 2940 // Remove trailing backslash, if any. | |
| 2941 if (path[pos-1] == '\\') | |
| 2942 path[pos-1] = '\0'; | |
| 2943 return 1; | |
| 2944 } | |
| 2945 | |
| 2946 // Find last backslash. | |
| 2947 while (pos && path[pos] != '\\') pos--; | |
| 2948 if (!pos && path[pos] != '\\') return 0; | |
| 2949 | |
| 2950 ASSERT (pos < len, (L"")); | |
| 2951 | |
| 2952 // The documentation says it removes backslash but it doesn't for c:\. | |
| 2953 if (!pos || path[pos-1] == ':' || (pos == 1 && path[0] == '\\')) | |
| 2954 // Keep the backslash in this case. | |
| 2955 path[pos+1] = '\0'; | |
| 2956 else | |
| 2957 path[pos] = '\0'; | |
| 2958 | |
| 2959 return 1; | |
| 2960 } | |
| 2961 | |
| 2962 void String_EndWithChar(TCHAR *str, TCHAR c) { | |
| 2963 ASSERT (str, (L"")); | |
| 2964 int len = lstrlen(str); | |
| 2965 if (len == 0 || str[len - 1] != c) { | |
| 2966 str[len] = c; | |
| 2967 str[len + 1] = 0; | |
| 2968 } | |
| 2969 } | |
| 2970 | |
| 2971 bool StartsWithBOM(const TCHAR* string) { | |
| 2972 ASSERT(string, (L"")); | |
| 2973 wchar_t c = string[0]; | |
| 2974 if (c == 0xFFFE || c == 0xFEFF) | |
| 2975 return true; | |
| 2976 else | |
| 2977 return false; | |
| 2978 } | |
| 2979 | |
| 2980 const TCHAR* StringAfterBOM(const TCHAR* string) { | |
| 2981 ASSERT(string, (L"")); | |
| 2982 return &string[StartsWithBOM(string) ? 1 : 0]; | |
| 2983 } | |
| 2984 | |
| 2985 bool String_StringToDecimalIntChecked(const TCHAR* str, int* value) { | |
| 2986 ASSERT1(str); | |
| 2987 ASSERT1(value); | |
| 2988 | |
| 2989 if (_set_errno(0)) { | |
| 2990 return false; | |
| 2991 } | |
| 2992 | |
| 2993 TCHAR* end_ptr = NULL; | |
| 2994 *value = _tcstol(str, &end_ptr, 10); | |
| 2995 ASSERT1(end_ptr); | |
| 2996 | |
| 2997 if (errno) { | |
| 2998 ASSERT1(ERANGE == errno); | |
| 2999 // Overflow or underflow. | |
| 3000 return false; | |
| 3001 } else if (*value == 0) { | |
| 3002 // The value returned could be an error code. tcsltol returns | |
| 3003 // zero when it cannot convert the string. However we need to | |
| 3004 // distinguish a real zero. Thus check to see if end_ptr is not the start | |
| 3005 // of the string (str is not an empty string) and is pointing to a '\0'. | |
| 3006 // If not, we have an error. | |
| 3007 if ((str == end_ptr) || (*end_ptr != '\0')) { | |
| 3008 return false; | |
| 3009 } | |
| 3010 } else if (*end_ptr != '\0') { | |
| 3011 // The end_ptr is pointing at a character that is | |
| 3012 // not the end of the string. Only part of the string could be converted. | |
| 3013 return false; | |
| 3014 } | |
| 3015 | |
| 3016 return true; | |
| 3017 } | |
| 3018 | |
| 3019 bool CLSIDToCString(const GUID& guid, CString* str) { | |
| 3020 ASSERT(str, (L"")); | |
| 3021 | |
| 3022 LPOLESTR string_guid = NULL; | |
| 3023 if (::StringFromCLSID(guid, &string_guid) != S_OK) { | |
| 3024 return false; | |
| 3025 } | |
| 3026 *str = string_guid; | |
| 3027 ::CoTaskMemFree(string_guid); | |
| 3028 | |
| 3029 return true; | |
| 3030 } | |
| 3031 | |
| 3032 HRESULT String_StringToBool(const TCHAR* str, bool* value) { | |
| 3033 ASSERT1(str); | |
| 3034 ASSERT1(value); | |
| 3035 | |
| 3036 // This method now performs a case-insentitive | |
| 3037 // culture aware compare. We should however be ok as we are only comparing | |
| 3038 // latin characters. | |
| 3039 if (_tcsicmp(kFalse, str) == 0) { | |
| 3040 *value = false; | |
| 3041 } else if (_tcsicmp(kTrue, str) == 0) { | |
| 3042 *value = true; | |
| 3043 } else { | |
| 3044 // we found another string. should error out. | |
| 3045 return E_FAIL; | |
| 3046 } | |
| 3047 return S_OK; | |
| 3048 } | |
| 3049 | |
| 3050 HRESULT String_BoolToString(bool value, CString* string) { | |
| 3051 ASSERT1(string); | |
| 3052 *string = value ? kTrue : kFalse; | |
| 3053 return S_OK; | |
| 3054 } | |
| 3055 | |
| 3056 CString String_ReplaceIgnoreCase(const CString& string, | |
| 3057 const CString& token, | |
| 3058 const CString& replacement) { | |
| 3059 int token_length = token.GetLength(); | |
| 3060 if (!token_length) { | |
| 3061 return string; | |
| 3062 } | |
| 3063 | |
| 3064 CString string_lowercase(string); | |
| 3065 CString token_lowercase(token); | |
| 3066 string_lowercase.MakeLower(); | |
| 3067 token_lowercase.MakeLower(); | |
| 3068 | |
| 3069 CString output(string); | |
| 3070 int replacement_length = replacement.GetLength(); | |
| 3071 | |
| 3072 int index = 0; | |
| 3073 int output_index = 0; | |
| 3074 | |
| 3075 for (int new_index = 0; | |
| 3076 (new_index = string_lowercase.Find(token_lowercase, index)) != -1; | |
| 3077 index = new_index + token_length) { | |
| 3078 output_index += new_index - index; | |
| 3079 output.Delete(output_index, token_length); | |
| 3080 output.Insert(output_index, replacement); | |
| 3081 output_index += replacement_length; | |
| 3082 } | |
| 3083 | |
| 3084 return output; | |
| 3085 } | |
| 3086 | |
| 3087 // Escape and unescape strings (shlwapi-based implementation). | |
| 3088 // The intended usage for these APIs is escaping strings to make up | |
| 3089 // URLs, for example building query strings. | |
| 3090 // | |
| 3091 // Pass false to the flag segment_only to escape the url. This will not | |
| 3092 // cause the conversion of the # (%23), ? (%3F), and / (%2F) characters. | |
| 3093 | |
| 3094 // Characters that must be encoded include any characters that have no | |
| 3095 // corresponding graphic character in the US-ASCII coded character | |
| 3096 // set (hexadecimal 80-FF, which are not used in the US-ASCII coded character | |
| 3097 // set, and hexadecimal 00-1F and 7F, which are control characters), | |
| 3098 // blank spaces, "%" (which is used to encode other characters), | |
| 3099 // and unsafe characters (<, >, ", #, {, }, |, \, ^, ~, [, ], and '). | |
| 3100 // | |
| 3101 // The input and output strings can't be longer than INTERNET_MAX_URL_LENGTH | |
| 3102 | |
| 3103 HRESULT StringEscape(const CString& str_in, | |
| 3104 bool segment_only, | |
| 3105 CString* str_out) { | |
| 3106 ASSERT1(str_out); | |
| 3107 ASSERT1(str_in.GetLength() < INTERNET_MAX_URL_LENGTH); | |
| 3108 | |
| 3109 DWORD buf_len = INTERNET_MAX_URL_LENGTH + 1; | |
| 3110 HRESULT hr = ::UrlEscape(str_in, str_out->GetBufferSetLength(buf_len), &buf_le
n, | |
| 3111 segment_only ? URL_ESCAPE_PERCENT | URL_ESCAPE_SEGMENT_ONLY : URL_ESCAPE_PER
CENT); | |
| 3112 if (SUCCEEDED(hr)) { | |
| 3113 str_out->ReleaseBuffer(); | |
| 3114 ASSERT1(buf_len <= INTERNET_MAX_URL_LENGTH); | |
| 3115 } | |
| 3116 return hr; | |
| 3117 } | |
| 3118 | |
| 3119 HRESULT StringUnescape(const CString& str_in, CString* str_out) { | |
| 3120 ASSERT1(str_out); | |
| 3121 ASSERT1(str_in.GetLength() < INTERNET_MAX_URL_LENGTH); | |
| 3122 | |
| 3123 DWORD buf_len = INTERNET_MAX_URL_LENGTH + 1; | |
| 3124 HRESULT hr = ::UrlUnescape(const_cast<TCHAR*>(str_in.GetString()), | |
| 3125 str_out->GetBufferSetLength(buf_len), &buf_len, 0); | |
| 3126 if (SUCCEEDED(hr)) { | |
| 3127 str_out->ReleaseBuffer(buf_len + 1); | |
| 3128 ASSERT1(buf_len <= INTERNET_MAX_URL_LENGTH); | |
| 3129 } | |
| 3130 return hr; | |
| 3131 } | |
| 3132 | |
| 3133 bool String_StringToTristate(const TCHAR* str, Tristate* value) { | |
| 3134 ASSERT1(str); | |
| 3135 ASSERT1(value); | |
| 3136 | |
| 3137 int numerical_value = 0; | |
| 3138 if (!String_StringToDecimalIntChecked(str, &numerical_value)) { | |
| 3139 return false; | |
| 3140 } | |
| 3141 | |
| 3142 switch (numerical_value) { | |
| 3143 case 0: | |
| 3144 *value = TRISTATE_FALSE; | |
| 3145 break; | |
| 3146 case 1: | |
| 3147 *value = TRISTATE_TRUE; | |
| 3148 break; | |
| 3149 case 2: | |
| 3150 *value = TRISTATE_NONE; | |
| 3151 break; | |
| 3152 default: | |
| 3153 return false; | |
| 3154 } | |
| 3155 | |
| 3156 return true; | |
| 3157 } | |
| 3158 | |
| 3159 // Extracts the name and value from a string that contains a name/value pair. | |
| 3160 bool ParseNameValuePair(const CString& token, | |
| 3161 TCHAR separator, | |
| 3162 CString* name, | |
| 3163 CString* value) { | |
| 3164 ASSERT1(name); | |
| 3165 ASSERT1(value); | |
| 3166 | |
| 3167 int separator_index = token.Find(separator); | |
| 3168 if ((separator_index == -1) || // Not a name-value pair. | |
| 3169 (separator_index == 0) || // No name was supplied. | |
| 3170 (separator_index == (token.GetLength() - 1))) { // No value was supplied. | |
| 3171 return false; | |
| 3172 } | |
| 3173 | |
| 3174 *name = token.Left(separator_index); | |
| 3175 *value = token.Right(token.GetLength() - separator_index - 1); | |
| 3176 | |
| 3177 ASSERT1(token.GetLength() == name->GetLength() + value->GetLength() + 1); | |
| 3178 | |
| 3179 // It's not possible for the name to contain the separator. | |
| 3180 ASSERT1(-1 == name->Find(separator)); | |
| 3181 if (-1 != value->Find(separator)) { | |
| 3182 // The value contains the separator. | |
| 3183 return false; | |
| 3184 } | |
| 3185 | |
| 3186 return true; | |
| 3187 } | |
| 3188 | |
| 3189 bool SplitCommandLineInPlace(TCHAR *command_line, | |
| 3190 TCHAR **first_argument_parameter, | |
| 3191 TCHAR **remaining_arguments_parameter) { | |
| 3192 if (!command_line || | |
| 3193 !first_argument_parameter || | |
| 3194 !remaining_arguments_parameter) { | |
| 3195 return false; | |
| 3196 } | |
| 3197 | |
| 3198 TCHAR end_char; | |
| 3199 TCHAR *&first_argument = *first_argument_parameter; | |
| 3200 TCHAR *&remaining_arguments = *remaining_arguments_parameter; | |
| 3201 if (_T('\"') == *command_line) { | |
| 3202 end_char = _T('\"'); | |
| 3203 first_argument = remaining_arguments = command_line + 1; | |
| 3204 } else { | |
| 3205 end_char = _T(' '); | |
| 3206 first_argument = remaining_arguments = command_line; | |
| 3207 } | |
| 3208 // Search for the end of the first argument | |
| 3209 while (end_char != *remaining_arguments && '\0' != *remaining_arguments) { | |
| 3210 ++remaining_arguments; | |
| 3211 } | |
| 3212 if (end_char == *remaining_arguments) { | |
| 3213 *remaining_arguments = '\0'; | |
| 3214 do { | |
| 3215 // Skip the spaces between the first argument and the remaining arguments. | |
| 3216 ++remaining_arguments; | |
| 3217 } while (_T(' ') == *remaining_arguments); | |
| 3218 } | |
| 3219 return true; | |
| 3220 } | |
| 3221 | |
| 3222 bool ContainsOnlyAsciiChars(const CString& str) { | |
| 3223 for (int i = 0; i < str.GetLength(); ++i) { | |
| 3224 if (str[i] > 0x7F) { | |
| 3225 return false; | |
| 3226 } | |
| 3227 } | |
| 3228 return true; | |
| 3229 } | |
| 3230 CString BytesToHex(const uint8* bytes, size_t num_bytes) { | |
| 3231 CString result; | |
| 3232 if (bytes) { | |
| 3233 result.Preallocate(num_bytes * sizeof(TCHAR)); | |
| 3234 static const TCHAR* const kHexChars = _T("0123456789abcdef"); | |
| 3235 for (size_t i = 0; i != num_bytes; ++i) { | |
| 3236 result.AppendChar(kHexChars[(bytes[i] >> 4)]); | |
| 3237 result.AppendChar(kHexChars[(bytes[i] & 0xf)]); | |
| 3238 } | |
| 3239 } | |
| 3240 return result; | |
| 3241 } | |
| 3242 | |
| 3243 CString BytesToHex(const std::vector<uint8>& bytes) { | |
| 3244 CString result; | |
| 3245 if (!bytes.empty()) { | |
| 3246 result.SetString(BytesToHex(&bytes.front(), bytes.size())); | |
| 3247 } | |
| 3248 return result; | |
| 3249 } | |
| 3250 | |
| 3251 void JoinStrings(const std::vector<CString>& components, | |
| 3252 const TCHAR* delim, | |
| 3253 CString* result) { | |
| 3254 ASSERT1(result); | |
| 3255 result->Empty(); | |
| 3256 | |
| 3257 // Compute length so we can reserve memory. | |
| 3258 size_t length = 0; | |
| 3259 size_t delim_length = delim ? _tcslen(delim) : 0; | |
| 3260 for (size_t i = 0; i != components.size(); ++i) { | |
| 3261 if (i != 0) { | |
| 3262 length += delim_length; | |
| 3263 } | |
| 3264 length += components[i].GetLength(); | |
| 3265 } | |
| 3266 | |
| 3267 result->Preallocate(length); | |
| 3268 | |
| 3269 for (size_t i = 0; i != components.size(); ++i) { | |
| 3270 if (i != 0 && delim) { | |
| 3271 result->Append(delim, delim_length); | |
| 3272 } | |
| 3273 result->Append(components[i]); | |
| 3274 } | |
| 3275 } | |
| 3276 | |
| 3277 void JoinStringsInArray(const TCHAR* components[], | |
| 3278 int num_components, | |
| 3279 const TCHAR* delim, | |
| 3280 CString* result) { | |
| 3281 ASSERT1(result); | |
| 3282 result->Empty(); | |
| 3283 | |
| 3284 for (int i = 0; i != num_components; ++i) { | |
| 3285 if (i != 0 && delim) { | |
| 3286 result->Append(delim); | |
| 3287 } | |
| 3288 if (components[i]) { | |
| 3289 result->Append(components[i]); | |
| 3290 } | |
| 3291 } | |
| 3292 } | |
| 3293 | |
| 3294 CString FormatResourceMessage(uint32 resource_id, ...) { | |
| 3295 CString format; | |
| 3296 const bool is_loaded = !!format.LoadString(resource_id); | |
| 3297 | |
| 3298 if (!is_loaded) { | |
| 3299 return CString(); | |
| 3300 } | |
| 3301 | |
| 3302 va_list arg_list; | |
| 3303 va_start(arg_list, resource_id); | |
| 3304 | |
| 3305 CString formatted; | |
| 3306 formatted.FormatMessageV(format, &arg_list); | |
| 3307 | |
| 3308 va_end(arg_list); | |
| 3309 | |
| 3310 return formatted; | |
| 3311 } | |
| 3312 | |
| 3313 CString FormatErrorCode(DWORD error_code) { | |
| 3314 CString error_code_string; | |
| 3315 if (FAILED(error_code)) { | |
| 3316 error_code_string.Format(_T("0x%08x"), error_code); | |
| 3317 } else { | |
| 3318 error_code_string.Format(_T("%u"), error_code); | |
| 3319 } | |
| 3320 return error_code_string; | |
| 3321 } | |
| 3322 | |
| 3323 HRESULT WideStringToUtf8UrlEncodedString(const CString& str, CString* out) { | |
| 3324 ASSERT1(out); | |
| 3325 | |
| 3326 out->Empty(); | |
| 3327 if (str.IsEmpty()) { | |
| 3328 return S_OK; | |
| 3329 } | |
| 3330 | |
| 3331 // Utf8 encode the Utf16 string first. Next urlencode it. | |
| 3332 CStringA utf8str = WideToUtf8(str); | |
| 3333 ASSERT1(!utf8str.IsEmpty()); | |
| 3334 DWORD buf_len = INTERNET_MAX_URL_LENGTH; | |
| 3335 CStringA escaped_utf8_name; | |
| 3336 HRESULT hr = ::UrlEscapeA(utf8str, | |
| 3337 CStrBufA(escaped_utf8_name, buf_len), | |
| 3338 &buf_len, | |
| 3339 0); | |
| 3340 ASSERT1(buf_len <= INTERNET_MAX_URL_LENGTH); | |
| 3341 ASSERT1(escaped_utf8_name.GetLength() == static_cast<int>(buf_len)); | |
| 3342 if (FAILED(hr)) { | |
| 3343 UTIL_LOG(LE, (_T("[UrlEscapeA failed][0x%08x]"), hr)); | |
| 3344 return hr; | |
| 3345 } | |
| 3346 | |
| 3347 *out = CString(escaped_utf8_name); | |
| 3348 return S_OK; | |
| 3349 } | |
| 3350 | |
| 3351 HRESULT Utf8UrlEncodedStringToWideString(const CString& str, CString* out) { | |
| 3352 ASSERT1(out); | |
| 3353 | |
| 3354 out->Empty(); | |
| 3355 if (str.IsEmpty()) { | |
| 3356 return S_OK; | |
| 3357 } | |
| 3358 | |
| 3359 // The value is a utf8 encoded url escaped string that is stored as a | |
| 3360 // unicode string. Because of this, it should contain only ascii chars. | |
| 3361 if (!ContainsOnlyAsciiChars(str)) { | |
| 3362 UTIL_LOG(LE, (_T("[String contains non ascii chars]"))); | |
| 3363 return E_INVALIDARG; | |
| 3364 } | |
| 3365 | |
| 3366 CStringA escaped_utf8_val = WideToAnsiDirect(str); | |
| 3367 DWORD buf_len = INTERNET_MAX_URL_LENGTH; | |
| 3368 CStringA unescaped_val; | |
| 3369 HRESULT hr = ::UrlUnescapeA(const_cast<char*>(escaped_utf8_val.GetString()), | |
| 3370 CStrBufA(unescaped_val, buf_len), | |
| 3371 &buf_len, | |
| 3372 0); | |
| 3373 ASSERT1(unescaped_val.GetLength() == static_cast<int>(buf_len)); | |
| 3374 if (FAILED(hr)) { | |
| 3375 UTIL_LOG(LE, (_T("[UrlUnescapeA failed][0x%08x]"), hr)); | |
| 3376 return hr; | |
| 3377 } | |
| 3378 ASSERT1(buf_len == static_cast<DWORD>(unescaped_val.GetLength())); | |
| 3379 ASSERT1(buf_len <= INTERNET_MAX_URL_LENGTH); | |
| 3380 CString app_name = Utf8ToWideChar(unescaped_val, | |
| 3381 unescaped_val.GetLength()); | |
| 3382 if (app_name.IsEmpty()) { | |
| 3383 return E_INVALIDARG; | |
| 3384 } | |
| 3385 | |
| 3386 *out = app_name; | |
| 3387 return S_OK; | |
| 3388 } | |
| 3389 | |
| 3390 } // namespace omaha | |
| 3391 | |
| OLD | NEW |