Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(698)

Side by Side Diff: base/string.cc

Issue 624713003: Keep only base/extractor.[cc|h]. (Closed) Base URL: https://chromium.googlesource.com/external/omaha.git@master
Patch Set: Created 6 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « base/string.h ('k') | base/string_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright 2003-2009 Google Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 // ========================================================================
15
16 #include "omaha/base/string.h"
17
18 #include <wininet.h> // For INTERNET_MAX_URL_LENGTH.
19 #include <algorithm>
20 #include <cstdlib>
21 #include "base/scoped_ptr.h"
22 #include "omaha/base/commontypes.h"
23 #include "omaha/base/debug.h"
24 #include "omaha/base/localization.h"
25 #include "omaha/base/logging.h"
26
27 namespace omaha {
28
29 namespace {
30 // Testing shows that only the following ASCII characters are
31 // considered spaces by GetStringTypeA: 9-13, 32, 160.
32 // Rather than call GetStringTypeA with no locale, as we used to,
33 // we look up the values directly in a precomputed array.
34
35 SELECTANY byte spaces[256] = {
36 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, // 0-9
37 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, // 10-19
38 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 20-29
39 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, // 30-39
40 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 40-49
41 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 50-59
42 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 60-69
43 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 70-79
44 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80-89
45 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 90-99
46 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 100-109
47 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 110-119
48 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 120-129
49 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 130-139
50 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 140-149
51 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 150-159
52 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 160-169
53 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 170-179
54 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 180-189
55 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 190-199
56 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 200-209
57 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 210-219
58 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 220-229
59 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 230-239
60 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 240-249
61 0, 0, 0, 0, 0, 1, // 250-255
62 };
63 } // namespace
64
65 const TCHAR* const kFalse = _T("false");
66 const TCHAR* const kTrue = _T("true");
67
68 bool IsSpaceW(WCHAR c) {
69 // GetStringTypeW considers these characters to be spaces:
70 // 9-13, 32, 133, 160, 5760, 8192-8203, 8232, 8233, 12288
71 if (c < 256)
72 return (c == 133 || IsSpaceA((char) (c & 0xff)));
73
74 return (c >= 8192 && c <= 8203) || c == 8232 ||
75 c == 8233 || c == 12288;
76 }
77
78 bool IsSpaceA(char c) {
79 return spaces[static_cast<unsigned char>(c)] == 1;
80 }
81
82 int TrimCString(CString &s) {
83 int len = Trim(s.GetBuffer());
84 s.ReleaseBufferSetLength(len);
85 return len;
86 }
87
88 void MakeLowerCString(CString & s) {
89 int len = s.GetLength();
90 String_FastToLower(s.GetBuffer());
91 s.ReleaseBufferSetLength(len);
92 }
93
94 int Trim(TCHAR *s) {
95 ASSERT(s, (L""));
96
97 // First find end of leading spaces
98 TCHAR *start = s;
99 while (*start) {
100 if (!IsSpace(*start))
101 break;
102 ++start;
103 }
104
105 // Now search for the end, remembering the start of the last spaces
106 TCHAR *end = start;
107 TCHAR *last_space = end;
108 while (*end) {
109 if (!IsSpace(*end))
110 last_space = end + 1;
111 ++end;
112 }
113
114 // Copy the part we want
115 int len = last_space - start;
116 // lint -e{802} Conceivably passing a NULL pointer
117 memmove(s, start, len * sizeof(TCHAR));
118
119 // 0 terminate
120 s[len] = 0;
121
122 return len;
123 }
124
125 void TrimString(CString& s, const TCHAR* delimiters) {
126 s = s.Trim(delimiters);
127 }
128
129 // Strip the first token from the front of argument s. A token is a
130 // series of consecutive non-blank characters - unless the first
131 // character is a double-quote ("), in that case the token is the full
132 // quoted string
133 CString StripFirstQuotedToken(const CString& s) {
134 const int npos = -1;
135
136 // Make a writeable copy
137 CString str(s);
138
139 // Trim any surrounding blanks (and tabs, for the heck of it)
140 TrimString(str, L" \t");
141
142 // Too short to have a second token
143 if (str.GetLength() <= 1)
144 return L"";
145
146 // What kind of token are we stripping?
147 if (str[0] == L'\"') {
148 // Remove leading quoting string
149 int i = str.Find(L"\"", 1);
150 if (i != npos)
151 i++;
152 return str.Mid(i);
153 } else {
154 // Remove leading token
155 int i = str.FindOneOf(L" \t");
156 if (i != npos)
157 i++;
158 return str.Mid(i);
159 }
160 }
161
162 // A block of text to separate lines, and back
163 void TextToLines(const CString& text, const TCHAR* delimiter, std::vector<CStrin g>* lines) {
164 ASSERT(delimiter, (L""));
165 ASSERT(lines, (L""));
166
167 size_t delimiter_len = ::lstrlen(delimiter);
168 int b = 0;
169 int e = 0;
170
171 for (b = 0; e != -1 && b < text.GetLength(); b = e + delimiter_len) {
172 e = text.Find(delimiter, b);
173 if (e != -1) {
174 ASSERT1(e - b > 0);
175 lines->push_back(text.Mid(b, e - b));
176 } else {
177 lines->push_back(text.Mid(b));
178 }
179 }
180 }
181
182 void LinesToText(const std::vector<CString>& lines, const TCHAR* delimiter, CStr ing* text) {
183 ASSERT(delimiter, (L""));
184 ASSERT(text, (L""));
185
186 size_t delimiter_len = ::lstrlen(delimiter);
187 size_t len = 0;
188 for (size_t i = 0; i < lines.size(); ++i) {
189 len += lines[i].GetLength() + delimiter_len;
190 }
191 text->Empty();
192 text->Preallocate(len);
193 for (std::vector<CString>::size_type i = 0; i < lines.size(); ++i) {
194 text->Append(lines[i]);
195 if (delimiter_len) {
196 text->Append(delimiter);
197 }
198 }
199 }
200
201 int CleanupWhitespaceCString(CString &s) {
202 int len = CleanupWhitespace(s.GetBuffer());
203 s.ReleaseBufferSetLength(len);
204 return len;
205 }
206
207 int CleanupWhitespace(TCHAR *str) {
208 ASSERT(str, (L""));
209
210 TCHAR *src = str;
211 TCHAR *dest = str;
212 int spaces = 0;
213 bool at_start = true;
214 while (true) {
215 // At end of string?
216 TCHAR c = *src;
217 if (0 == c)
218 break;
219
220 // Look for whitespace; copy it over if not whitespace
221 if (IsSpace(c)) {
222 ++spaces;
223 }
224 else {
225 *dest++ = c;
226 at_start = false;
227 spaces = 0;
228 }
229
230 // Write only first consecutive space (but skip space at start)
231 if (1 == spaces && !at_start)
232 *dest++ = ' ';
233
234 ++src;
235 }
236
237 // Remove trailing space, if any
238 if (dest > str && *(dest - 1) == L' ')
239 --dest;
240
241 // 0-terminate
242 *dest = 0;
243
244 return dest - str;
245 }
246
247 // Take 1 single hexadecimal "digit" (as a character) and return its decimal val ue
248 // Returns -1 if given invalid hex digit
249 int HexDigitToDec(const TCHAR digit) {
250 if (digit >= L'A' && digit <= L'F')
251 return 10 + (digit - L'A');
252 else if (digit >= L'a' && digit <= L'f')
253 return 10 + (digit - L'a');
254 else if (digit >= L'0' && digit <= L'9')
255 return (digit - L'0');
256 else
257 return -1;
258 }
259
260 // Convert the 2 hex chars at positions <pos> and <pos>+1 in <s> to a char (<cha r_out>)
261 // Note: scanf was giving me troubles, so here's the manual version
262 // Extracted char gets written to <char_out>, which must be allocated by
263 // the caller; return true on success or false if parameters are incorrect
264 // or string does not have 2 hex digits at the specified position
265 // NOTE: <char_out> is NOT a string, just a pointer to a char for the result
266 bool ExtractChar(const CString & s, int pos, unsigned char * char_out) {
267 // char_out may be NULL
268
269 if (s.GetLength() < pos + 1) {
270 return false;
271 }
272
273 if (pos < 0 || NULL == char_out) {
274 ASSERT(0, (_T("invalid params: pos<0 or char_out is NULL")));
275 return false;
276 }
277
278 TCHAR c1 = s.GetAt(pos);
279 TCHAR c2 = s.GetAt(pos+1);
280
281 int p1 = HexDigitToDec(c1);
282 int p2 = HexDigitToDec(c2);
283
284 if (p1 == -1 || p2 == -1) {
285 return false;
286 }
287
288 *char_out = (unsigned char)(p1 * 16 + p2);
289 return true;
290 }
291
292 WCHAR *ToWide (const char *s, int len) {
293 ASSERT (s, (L""));
294 WCHAR *w = new WCHAR [len+1]; if (!w) { return NULL; }
295 // int rc = MultiByteToWideChar (CP_ACP, 0, s.GetString(), (int)s.GetLength( )+1, w, s.GetLength()+1);
296 // TODO(omaha): why would it ever be the case that rc > len?
297 int rc = MultiByteToWideChar (CP_ACP, 0, s, len, w, len);
298 if (rc > len) { delete [] w; return NULL; }
299 // ASSERT (rc <= len, (L""));
300 w[rc]=L'\0';
301 return w;
302 }
303
304 const byte *BufferContains (const byte *buf, uint32 buf_len, const byte *data, u int32 data_len) {
305 ASSERT(data, (L""));
306 ASSERT(buf, (L""));
307
308 for (uint32 i = 0; i < buf_len; i++) {
309 uint32 j = i;
310 uint32 k = 0;
311 uint32 len = 0;
312 while (j < buf_len && k < data_len && buf[j++] == data[k++]) { len++; }
313 if (len == data_len) { return buf + i; }
314 }
315 return 0;
316 }
317
318 // Converting the Ansi Multibyte String into unicode string. The multibyte
319 // string is encoded using the specified codepage.
320 // The code is pretty much like the U2W function, except the codepage can be
321 // any valid windows CP.
322 BOOL AnsiToWideString(const char *from, int length, UINT codepage, CString *to) {
323 ASSERT(from, (L""));
324 ASSERT(to, (L""));
325 ASSERT1(length >= -1);
326 // Figure out how long the string is
327 int req_chars = MultiByteToWideChar(codepage, 0, from, length, NULL, 0);
328
329 if (req_chars <= 0) {
330 UTIL_LOG(LEVEL_WARNING, (_T("MultiByteToWideChar Failed ")));
331 *to = AnsiToWideString(from, length);
332 return FALSE;
333 }
334
335 TCHAR *buffer = to->GetBufferSetLength(req_chars);
336 int conv_chars = MultiByteToWideChar(codepage, 0, from, length, buffer, req_ch ars);
337 if (conv_chars == 0) {
338 UTIL_LOG(LEVEL_WARNING, (_T("MultiByteToWideChar Failed ")));
339 to->ReleaseBuffer(0);
340 *to = AnsiToWideString(from, length);
341 return FALSE;
342 }
343
344 // Something truly horrible happened.
345 ASSERT (req_chars == conv_chars, (L"MBToWide returned unexpected value: GetLas tError()=%d",GetLastError()));
346 // If length was inferred, conv_chars includes the null terminator.
347 // Adjust the length here to remove null termination,
348 // because we use the length-qualified CString constructor,
349 // which automatically adds null termination given an unterminated array.
350 if (-1 == length) { --conv_chars; }
351 to->ReleaseBuffer(conv_chars);
352 return TRUE;
353 }
354
355 // CStringW(const char* from) did not cast all character properly
356 // so we write our own.
357 CString AnsiToWideString(const char *from, int length) {
358 ASSERT(from, (L""));
359 ASSERT1(length >= -1);
360 if (length < 0)
361 length = strlen(from);
362 CString to;
363 TCHAR *buffer = to.GetBufferSetLength(length);
364 for (int i = 0; i < length; ++i)
365 buffer[i] = static_cast<UINT8>(from[i]);
366 to.ReleaseBuffer(length);
367 return to;
368 }
369
370
371 // Transform a unicode string into UTF8, as represented in an ASCII string
372 CStringA WideToUtf8(const CString& w) {
373 // Add a cutoff. If it's all ascii, convert it directly
374 const TCHAR* input = static_cast<const TCHAR*>(w.GetString());
375 int input_len = w.GetLength(), i;
376 for (i = 0; i < input_len; ++i) {
377 if (input[i] > 127) {
378 break;
379 }
380 }
381
382 // If we made it to the end without breaking, then it's all ANSI, so do a quic k convert
383 if (i == input_len) {
384 return WideToAnsiDirect(w);
385 }
386
387 // Figure out how long the string is
388 int req_bytes = ::WideCharToMultiByte(CP_UTF8, 0, w, -1, NULL, 0, NULL, NULL);
389
390 scoped_array<char> utf8_buffer(new char[req_bytes]);
391
392 int conv_bytes = ::WideCharToMultiByte(CP_UTF8, 0, w, -1, utf8_buffer.get(), r eq_bytes, NULL, NULL);
393 ASSERT1(req_bytes == conv_bytes);
394
395 // conv_bytes includes the null terminator, when we read this in, don't read t he terminator
396 CStringA out(utf8_buffer.get(), conv_bytes - 1);
397
398 return out;
399 }
400
401 CString Utf8ToWideChar(const char* utf8, uint32 num_bytes) {
402 ASSERT1(utf8);
403 if (num_bytes == 0) {
404 return CString();
405 }
406
407 uint32 number_of_wide_chars = ::MultiByteToWideChar(CP_UTF8, 0, utf8, num_byte s, NULL, 0);
408 number_of_wide_chars += 1; // make room for NULL terminator
409
410 CString ret_string;
411 TCHAR* buffer = ret_string.GetBuffer(number_of_wide_chars);
412 DWORD number_of_characters_copied = ::MultiByteToWideChar(CP_UTF8, 0, utf8, nu m_bytes, buffer, number_of_wide_chars);
413 ASSERT1(number_of_characters_copied == number_of_wide_chars - 1);
414 buffer[number_of_wide_chars - 1] = _T('\0'); // ensure there is a NULL termin ator
415 ret_string.ReleaseBuffer();
416
417 // Strip the byte order marker if there is one in the document.
418 if (ret_string[0] == kUnicodeBom) {
419 ret_string = ret_string.Right(ret_string.GetLength() - 1);
420 }
421
422 if (number_of_characters_copied > 0) {
423 return ret_string;
424 }
425
426 // Failure case
427 return CString();
428 }
429
430 CString Utf8BufferToWideChar(const std::vector<uint8>& buffer) {
431 CString result;
432 if (!buffer.empty()) {
433 result = Utf8ToWideChar(
434 reinterpret_cast<const char*>(&buffer.front()), buffer.size());
435 }
436 return result;
437 }
438
439 CString AbbreviateString (const CString & title, int32 max_len) {
440 ASSERT (max_len, (L""));
441 CString s(title);
442 TrimCString(s); // remove whitespace at start/end
443 if (s.GetLength() > max_len) {
444 s = s.Left (max_len - 2);
445 CString orig(s);
446 // remove partial words
447 while (s.GetLength() > 1 && !IsSpace(s[s.GetLength()-1])) { s = s.Left ( s.GetLength() - 1); }
448 // but not if it would make the string very short
449 if (s.GetLength() < max_len / 2) { s = orig; }
450 s += _T("..");
451 }
452
453 return s;
454 }
455
456 CString GetAbsoluteUri(const CString& uri) {
457 int i = String_FindString(uri, _T("://"));
458 if (i==-1) return uri;
459
460 // add trailing / if none exists
461 int j = String_FindChar(uri, L'/',i+3);
462 if (j==-1) return (uri+NOTRANSL(_T("/")));
463
464 // remove duplicate trailing slashes
465 int len = uri.GetLength();
466 if (len > 1 && uri.GetAt(len-1) == '/' && uri.GetAt(len-2) == '/') {
467 CString new_uri(uri);
468 int new_len = new_uri.GetLength();
469 while (new_len > 1 && new_uri.GetAt(new_len-1) == '/' && new_uri.GetAt(new_l en-2) == '/') {
470 new_len--;
471 new_uri = new_uri.Left(new_len);
472 }
473 return new_uri;
474 }
475 else return uri;
476 }
477
478 // requires that input have a PROTOCOL (http://) for proper behavior
479 // items with the "file" protocol are returned as is (what is the hostname in th at case? C: ? doesn't make sense)
480 // TODO(omaha): loosen requirement
481 // includes http://, e.g. http://www.google.com/
482 CString GetUriHostName(const CString& uri, bool strip_leading) {
483 if (String_StartsWith(uri,NOTRANSL(_T("file:")),true)) return uri;
484
485 // correct any "errors"
486 CString s(GetAbsoluteUri(uri));
487
488 // Strip the leading "www."
489 if (strip_leading)
490 {
491 int index_www = String_FindString(s, kStrLeadingWww);
492 if (index_www != -1)
493 ReplaceCString (s, kStrLeadingWww, _T(""));
494 }
495
496 int i = String_FindString(s, _T("://"));
497 if(i==-1) return uri;
498 int j = String_FindChar(s, L'/',i+3);
499 if(j==-1) return uri;
500 return s.Left(j+1);
501 }
502
503 // requires that input have a PROTOCOL (http://) for proper behavior
504 // TODO(omaha): loosen requirement
505 // removes the http:// and the extra slash '/' at the end.
506 // http://www.google.com/ -> www.google.com (or google.com if strip_leading = tr ue)
507 CString GetUriHostNameHostOnly(const CString& uri, bool strip_leading) {
508 CString s(GetUriHostName(uri,strip_leading));
509
510 // remove protocol
511 int i = String_FindString (s, _T("://"));
512 if(i==-1) return s;
513 CString ss(s.Right (s.GetLength() - i-3));
514
515 // remove the last '/'
516 int j = ss.ReverseFind('/');
517 if (j == -1) return ss;
518 return ss.Left(j);
519 }
520
521 CString AbbreviateUri(const CString& uri, int32 max_len) {
522 ASSERT1(max_len);
523 ASSERT1(!uri.IsEmpty());
524
525 CString s(uri);
526 VERIFY1(String_FindString (s, _T("://")));
527
528 TrimCString(s);
529 // SKIP_LOC_BEGIN
530 RemoveFromStart (s, _T("ftp://"), false);
531 RemoveFromStart (s, _T("http://"), false);
532 RemoveFromStart (s, _T("https://"), false);
533 RemoveFromStart (s, _T("www."), false);
534 RemoveFromStart (s, _T("ftp."), false);
535 RemoveFromStart (s, _T("www-"), false);
536 RemoveFromStart (s, _T("ftp-"), false);
537 RemoveFromEnd (s, _T(".htm"));
538 RemoveFromEnd (s, _T(".html"));
539 RemoveFromEnd (s, _T(".asp"));
540 // SKIP_LOC_END
541 if (s.GetLength() > max_len) {
542 // try to keep the portion after the last /
543 int32 last_slash = s.ReverseFind ((TCHAR)'/');
544 CString after_last_slash;
545 if (last_slash == -1) { after_last_slash = _T(""); }
546 else { after_last_slash = s.Right (uri.GetLength() - last_slash - 1); }
547 if (after_last_slash.GetLength() > max_len / 2) {
548 after_last_slash = after_last_slash.Right (max_len / 2);
549 }
550 s = s.Left (max_len - after_last_slash.GetLength() - 2);
551 s += "..";
552 s += after_last_slash;
553 }
554 return s;
555 }
556
557 // normalized version of a URI intended to map duplicates to the same string
558 // the normalized URI is not a valid URI
559 CString NormalizeUri (const CString & uri) {
560 CString s(uri);
561 TrimCString(s);
562 MakeLowerCString(s);
563 // SKIP_LOC_BEGIN
564 ReplaceCString (s, _T(":80"), _T(""));
565
566 RemoveFromEnd (s, _T("/index.html"));
567 RemoveFromEnd (s, _T("/welcome.html")); // old netscape standard
568 RemoveFromEnd (s, _T("/"));
569
570 RemoveFromStart (s, _T("ftp://"), false);
571 RemoveFromStart (s, _T("http://"), false);
572 RemoveFromStart (s, _T("https://"), false);
573 RemoveFromStart (s, _T("www."), false);
574 RemoveFromStart (s, _T("ftp."), false);
575 RemoveFromStart (s, _T("www-"), false);
576 RemoveFromStart (s, _T("ftp-"), false);
577
578 ReplaceCString (s, _T("/./"), _T("/"));
579 // SKIP_LOC_END
580
581 // TODO(omaha):
582 // fixup URLs like a/b/../../c
583 // while ($s =~ m!\/\.\.\!!) {
584 // $s =~ s!/[^/]*/\.\./!/!;
585 // }
586
587 // TODO(omaha):
588 // unescape characters
589 // Note from RFC1630: "Sequences which start with a percent sign
590 // but are not followed by two hexadecimal characters are reserved
591 // for future extension"
592 // $str =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg if defined $str;
593
594 return s;
595 }
596
597 CString RemoveInternetProtocolHeader (const CString& url) {
598 int find_colon_slash_slash = String_FindString(url, NOTRANSL(L"://"));
599 if( find_colon_slash_slash != -1 ) {
600 // remove PROTOCOL://
601 return url.Right(url.GetLength() - find_colon_slash_slash - 3);
602 } else if (String_StartsWith(url, NOTRANSL(L"mailto:"), true)) {
603 // remove "mailto:"
604 return url.Right(url.GetLength() - 7);
605 } else {
606 // return as is
607 return url;
608 }
609 }
610
611 HRESULT ConvertFileUriToLocalPath(const CString& uri, CString* path_out) {
612 ASSERT1(path_out);
613 ASSERT1(uri.GetLength() < INTERNET_MAX_URL_LENGTH);
614
615 if (uri.IsEmpty()) {
616 return E_INVALIDARG;
617 }
618
619 DWORD buf_len = MAX_PATH;
620 HRESULT hr = ::PathCreateFromUrl(uri,
621 CStrBuf(*path_out, MAX_PATH),
622 &buf_len,
623 NULL);
624 return hr;
625 }
626
627 void RemoveFromStart (CString & s, const TCHAR* remove, bool ignore_case) {
628 ASSERT(remove, (L""));
629
630 // Remove the characters if it is the prefix
631 if (String_StartsWith(s, remove, ignore_case))
632 s.Delete(0, lstrlen(remove));
633 }
634
635 bool String_EndsWith(const TCHAR *str, const TCHAR *end_str, bool ignore_case) {
636 ASSERT(end_str, (L""));
637 ASSERT(str, (L""));
638
639 int str_len = lstrlen(str);
640 int end_len = lstrlen(end_str);
641
642 // Definitely false if the suffix is longer than the string
643 if (end_len > str_len)
644 return false;
645
646 const TCHAR *str_ptr = str + str_len;
647 const TCHAR *end_ptr = end_str + end_len;
648
649 while (end_ptr >= end_str) {
650 // Check for matching characters
651 TCHAR c1 = *str_ptr;
652 TCHAR c2 = *end_ptr;
653
654 if (ignore_case) {
655 c1 = Char_ToLower(c1);
656 c2 = Char_ToLower(c2);
657 }
658
659 if (c1 != c2)
660 return false;
661
662 --str_ptr;
663 --end_ptr;
664 }
665
666 // if we haven't failed out, it must be ok!
667 return true;
668 }
669
670 CString String_MakeEndWith(const TCHAR* str, const TCHAR* end_str, bool ignore_c ase) {
671 if (String_EndsWith(str, end_str, ignore_case)) {
672 return str;
673 } else {
674 CString r(str);
675 r += end_str;
676 return r;
677 }
678 }
679
680 void RemoveFromEnd (CString & s, const TCHAR* remove) {
681 ASSERT(remove, (L""));
682
683 // If the suffix is shorter than the string, don't bother
684 int remove_len = lstrlen(remove);
685 if (s.GetLength() < remove_len) return;
686
687 // If the suffix is equal
688 int suffix_begin = s.GetLength() - remove_len;
689 if (0 == lstrcmp(s.GetString() + suffix_begin, remove))
690 s.Delete(suffix_begin, remove_len);
691 }
692
693 CString ElideIfNeeded (const CString & input_string, int max_len, int min_len) {
694 ASSERT (min_len <= max_len, (L""));
695 ASSERT (max_len >= TSTR_SIZE(kEllipsis)+1, (L""));
696 ASSERT (min_len >= TSTR_SIZE(kEllipsis)+1, (L""));
697
698 CString s = input_string;
699
700 s.TrimRight();
701 if (s.GetLength() > max_len) {
702 int truncate_at = max_len - TSTR_SIZE(kEllipsis);
703 // find first space going backwards from character one after the truncation point
704 while (truncate_at >= min_len && !IsSpace(s.GetAt(truncate_at)))
705 truncate_at--;
706
707 // skip the space(s)
708 while (truncate_at >= min_len && IsSpace(s.GetAt(truncate_at)))
709 truncate_at--;
710
711 truncate_at++;
712
713 if (truncate_at <= min_len || truncate_at > (max_len - static_cast<int>(TSTR _SIZE(kEllipsis)))) {
714 // we weren't able to break at a word boundary, may as well use more of th e string
715 truncate_at = max_len - TSTR_SIZE(kEllipsis);
716
717 // skip space(s)
718 while (truncate_at > 0 && IsSpace(s.GetAt(truncate_at-1)))
719 truncate_at--;
720 }
721
722 s = s.Left(truncate_at);
723 s += kEllipsis;
724 }
725
726 UTIL_LOG(L6, (L"elide (%d %d) %s -> %s", min_len, max_len, input_string, s));
727 return s;
728 }
729
730 // these functions untested
731 // UTF8 parameter supported on XP/2000 only
732 HRESULT AnsiToUTF8 (char * src, int src_len, char * dest, int *dest_len) {
733 ASSERT (dest_len, (L""));
734 ASSERT (dest, (L""));
735 ASSERT (src, (L""));
736
737 // First use MultiByteToWideChar(CP_UTF8, ...) to convert to Unicode
738 // then use WideCharToMultiByte to convert from Unicode to UTF8
739 WCHAR *unicode = new WCHAR [(src_len + 1) * sizeof (TCHAR)]; ASSERT (unicode, (L""));
740 int chars_written = MultiByteToWideChar (CP_ACP, 0, src, src_len, unicode, src _len);
741 ASSERT (chars_written == src_len, (L""));
742 char *unmappable = " ";
743 BOOL unmappable_characters = false;
744 *dest_len = WideCharToMultiByte (CP_UTF8, 0, unicode, chars_written, dest, *de st_len, unmappable, &unmappable_characters);
745 delete [] unicode;
746 return S_OK;
747 }
748
749 // Convert Wide to ANSI directly. Use only when it is all ANSI
750 CStringA WideToAnsiDirect(const CString & in) {
751 int in_len = in.GetLength();
752 const TCHAR * in_buf = static_cast<const TCHAR*>(in.GetString());
753
754 CStringA out;
755 unsigned char * out_buf = (unsigned char *)out.GetBufferSetLength(in_len);
756
757 for(int i = 0; i < in_len; ++i)
758 out_buf[i] = static_cast<unsigned char>(in_buf[i]);
759
760 out.ReleaseBuffer(in_len);
761 return out;
762 }
763
764 HRESULT UCS2ToUTF8 (LPCWSTR src, int src_len, char * dest, int *dest_len) {
765 ASSERT(dest_len, (L""));
766 ASSERT(dest, (L""));
767
768 *dest_len = WideCharToMultiByte (CP_UTF8, 0, src, src_len, dest, *dest_len, NU LL,NULL);
769 return S_OK;
770 }
771
772 HRESULT UTF8ToUCS2 (const char * src, int src_len, LPWSTR dest, int *dest_len) {
773 ASSERT (dest_len, (L""));
774 ASSERT (src, (L""));
775
776 *dest_len = MultiByteToWideChar (CP_UTF8, 0, src, src_len, dest, *dest_len);
777 ASSERT (*dest_len == src_len, (L""));
778 return S_OK;
779 }
780
781 HRESULT UTF8ToAnsi (char * src, int, char * dest, int *dest_len) {
782 ASSERT(dest_len, (L""));
783 ASSERT(dest, (L""));
784 ASSERT(src, (L""));
785
786 src; dest; dest_len; // unreferenced formal parameter
787
788 // First use MultiByteToWideChar(CP_UTF8, ...) to convert to Unicode
789 // then use WideCharToMultiByte to convert from Unicode to ANSI
790 return E_FAIL;
791 }
792
793 // clean up a string so it can be included within a JavaScript string
794 // mainly involves escaping characters
795 CString SanitizeString(const CString & in, DWORD mode) {
796 CString out(in);
797
798 if (mode & kSanHtml) {
799 // SKIP_LOC_BEGIN
800 ReplaceCString(out, _T("&"), _T("&amp;"));
801 ReplaceCString(out, _T("<"), _T("&lt;"));
802 ReplaceCString(out, _T(">"), _T("&gt;"));
803 // SKIP_LOC_END
804 }
805
806 if ((mode & kSanXml) == kSanXml) {
807 // SKIP_LOC_BEGIN
808 ReplaceCString(out, _T("'"), _T("&apos;"));
809 ReplaceCString(out, _T("\""), _T("&quot;"));
810 // SKIP_LOC_END
811 }
812
813 // Note that this SAN_JAVASCRIPT and kSanXml should not be used together.
814 ASSERT ((mode & (kSanJs | kSanXml)) != (kSanJs | kSanXml), (L""));
815
816 if ((mode & kSanJs) == kSanJs) {
817 // SKIP_LOC_BEGIN
818 ReplaceCString(out, _T("\\"), _T("\\\\"));
819 ReplaceCString(out, _T("\'"), _T("\\\'"));
820 ReplaceCString(out, _T("\""), _T("\\\""));
821 ReplaceCString(out, _T("\n"), _T(" "));
822 ReplaceCString(out, _T("\t"), _T(" "));
823 // SKIP_LOC_END
824 }
825
826 if ((mode & kSanHtmlInput) == kSanHtmlInput) {
827 // SKIP_LOC_BEGIN
828 ReplaceCString(out, _T("\""), _T("&quot;"));
829 ReplaceCString(out, _T("'"), _T("&#39;"));
830 // SKIP_LOC_END
831 }
832
833 return out;
834 }
835
836 // Bolds the periods used for abbreviation. Call this after HighlightTerms.
837 CString BoldAbbreviationPeriods(const CString & in) {
838 CString out(in);
839 CString abbrev;
840 for (int i = 0; i < kAbbreviationPeriodLength; ++i)
841 abbrev += _T(".");
842 ReplaceCString(out, abbrev, NOTRANSL(_T("<b>")) + abbrev + NOTRANSL(_T("</b>") ));
843 return out;
844 }
845
846 // Unescape a escaped sequence leading by a percentage symbol '%',
847 // and converted the unescaped sequence (in UTF8) into unicode.
848 // Inputs: src is the input string.
849 // pos is the starting position.
850 // Returns: true if a EOS(null) char was encounted.
851 // out contains the unescaped and converted unicode string.
852 // consumed_length is how many bytes in the src string have been
853 // unescaped.
854 // We can avoid the expensive UTF8 conversion step if there are no higher
855 // ansi characters So if there aren't any, just convert it ANSI-to-WIDE
856 // directly, which is cheaper.
857 inline bool UnescapeSequence(const CString &src, int pos,
858 CStringW *out, int *consumed_length) {
859 ASSERT1(out);
860 ASSERT1(consumed_length);
861
862 int length = src.GetLength();
863 // (input_len - pos) / 3 is enough for un-escaping the (%xx)+ sequences.
864 int max_dst_length = (length - pos) / 3;
865 scoped_array<char> unescaped(new char[max_dst_length]);
866 char *buf = unescaped.get();
867 if (buf == NULL) { // no enough space ???
868 *consumed_length = 0;
869 return false;
870 }
871 char *dst = buf;
872 bool is_utf8 = false;
873 // It is possible that there is a null character '\0' in the sequence.
874 // Because the CStringT does't support '\0' in it, we stop
875 // parsing the input string when it is encounted.
876 bool eos_encounted = false;
877 uint8 ch;
878 int s = pos;
879 while (s + 2 < length && src[s] == '%' && !eos_encounted &&
880 ExtractChar(src, s + 1, &ch)) {
881 if (ch != 0)
882 *dst++ = ch;
883 else
884 eos_encounted = true;
885 if (ch >= 128)
886 is_utf8 = true;
887 s += 3;
888 }
889
890 ASSERT1(dst <= buf + max_dst_length); // just to make sure
891
892 *consumed_length = s - pos;
893 if (is_utf8)
894 AnsiToWideString(buf, dst - buf, CP_UTF8, out);
895 else
896 *out = AnsiToWideString(buf, dst - buf);
897 return eos_encounted;
898 }
899
900 // There is an encoding called "URL-encoding". This function takes a URL-encoded string
901 // and converts it back to the original representation
902 // example: "?q=moon+doggy_%25%5E%26&" = "moon doggy_%^&"
903 CString Unencode(const CString &input) {
904 const int input_len = input.GetLength();
905 const TCHAR *src = input.GetString();
906 // input_len is enough for containing the unencoded string.
907 CString out;
908 TCHAR *head = out.GetBuffer(input_len);
909 TCHAR *dst = head;
910 int s = 0;
911 bool eos_encounted = false;
912 bool is_utf8 = false;
913 CStringW fragment;
914 int consumed_length = 0;
915 while (s < input_len && !eos_encounted) {
916 switch (src[s]) {
917 case '+' :
918 *dst++ = ' ';
919 ASSERT1(dst <= head + input_len);
920 ++s;
921 break;
922 case '%' :
923 eos_encounted =
924 UnescapeSequence(input, s, &fragment, &consumed_length);
925 if (consumed_length > 0) {
926 s += consumed_length;
927 ASSERT1(dst + fragment.GetLength() <= head + input_len);
928 for (int i = 0; i < fragment.GetLength(); ++i)
929 *dst++ = fragment[i];
930 } else {
931 *dst++ = src[s++];
932 ASSERT1(dst <= head + input_len);
933 }
934 break;
935 default:
936 *dst++ = src[s];
937 ASSERT1(dst <= head + input_len);
938 ++s;
939 }
940 }
941 int out_len = dst - head;
942 out.ReleaseBuffer(out_len);
943 return out;
944 }
945
946 CString GetTextInbetween(const CString &input, const CString &start, const CStri ng &end) {
947 int start_index = String_FindString(input, start);
948 if (start_index == -1)
949 return L"";
950
951 start_index += start.GetLength();
952 int end_index = String_FindString(input, end, start_index);
953 if (end_index == -1)
954 return L"";
955
956 return input.Mid(start_index, end_index - start_index);
957 }
958
959 // Given a string, get the parameter and url-unencode it
960 CString GetParam(const CString & input, const CString & key) {
961 CString my_key(_T("?"));
962 my_key.Append(key);
963 my_key += L'=';
964
965 return Unencode(GetTextInbetween(input, my_key, NOTRANSL(L"?")));
966 }
967
968 // Get an xml-like field from a string
969 CString GetField (const CString & input, const CString & field) {
970 CString start_field(NOTRANSL(_T("<")));
971 start_field += field;
972 start_field += L'>';
973
974 int32 start = String_FindString(input, start_field);
975 if (start == -1) { return _T(""); }
976 start += 2 + lstrlen (field);
977
978 CString end_field(NOTRANSL(_T("</")));
979 end_field += field;
980 end_field += L'>';
981
982 int32 end = String_FindString(input, end_field);
983 if (end == -1) { return _T(""); }
984
985 return input.Mid (start, end - start);
986 }
987
988 // ------------------------------------------------------------
989 // Finds a whole word match in the query.
990 // If the word has non-spaces either before or after, it will not qualify as
991 // a match. i.e. "pie!" is not a match because of the exclamation point.
992 // TODO(omaha): Add parameter that will consider punctuation acceptable.
993 //
994 // Optionally will look for a colon at the end.
995 // If not found, return -1.
996 int FindWholeWordMatch (const CString &query,
997 const CString &word_to_match,
998 const bool end_with_colon,
999 const int index_begin) {
1000 if (word_to_match.IsEmpty()) {
1001 return -1;
1002 }
1003
1004 int index_word_begin = index_begin;
1005
1006 // Keep going until we find a whole word match, or the string ends.
1007 do {
1008 index_word_begin = String_FindString (query, word_to_match, index_word_begin );
1009
1010 if (-1 == index_word_begin) {
1011 return index_word_begin;
1012 }
1013
1014 // If it's not a whole word match, keep going.
1015 if (index_word_begin > 0 &&
1016 !IsSpaceW (query[index_word_begin - 1])) {
1017 goto LoopEnd;
1018 }
1019
1020 if (end_with_colon) {
1021 int index_colon = String_FindChar (query, L':', index_word_begin);
1022
1023 // If there is no colon in the string, return now.
1024 if (-1 == index_colon) {
1025 return -1;
1026 }
1027
1028 // If there is text between the end of the word and the colon, keep going.
1029 if (index_colon - index_word_begin != word_to_match.GetLength()) {
1030 goto LoopEnd;
1031 }
1032 } else {
1033 // If there are more chars left after this word/phrase, and
1034 // they are not spaces, return.
1035 if (query.GetLength() > index_word_begin + word_to_match.GetLength() &&
1036 !IsSpaceW (query.GetAt (index_word_begin + word_to_match.GetLength()))) {
1037 goto LoopEnd;
1038 }
1039 }
1040
1041 // It fits all the requirements, so return the index to the beginning of the word.
1042 return index_word_begin;
1043
1044 LoopEnd:
1045 ++index_word_begin;
1046
1047 } while (-1 != index_word_begin);
1048
1049 return index_word_begin;
1050 }
1051
1052 // --------------------------------------------------------
1053 // Do whole-word replacement in "str".
1054 void ReplaceWholeWord (const CString &string_to_replace,
1055 const CString &replacement,
1056 const bool trim_whitespace,
1057 CString *str) {
1058 ASSERT (str, (L"ReplaceWholeWord"));
1059
1060 if (string_to_replace.IsEmpty() || str->IsEmpty()) {
1061 return;
1062 }
1063
1064 int index_str = 0;
1065 do {
1066 index_str = FindWholeWordMatch (*str, string_to_replace, false, index_str);
1067
1068 if (-1 != index_str) {
1069 // Get the strings before and after, and trim whitespace.
1070 CString str_before_word(str->Left (index_str));
1071 if (trim_whitespace) {
1072 str_before_word.TrimRight();
1073 }
1074
1075 CString str_after_word(str->Mid (index_str + string_to_replace.GetLength() ));
1076 if (trim_whitespace) {
1077 str_after_word.TrimLeft();
1078 }
1079
1080 *str = str_before_word + replacement + str_after_word;
1081 index_str += replacement.GetLength() + 1;
1082 }
1083 } while (index_str != -1);
1084 }
1085
1086 // --------------------------------------------------------
1087 // Reverse (big-endian<->little-endian) the shorts that make up
1088 // Unicode characters in a byte array of Unicode chars
1089 HRESULT ReverseUnicodeByteOrder(byte* unicode_string, int size_in_bytes) {
1090 ASSERT (unicode_string, (L""));
1091
1092 // If odd # of bytes, just leave the last one alone
1093 for (int i = 0; i < size_in_bytes - 1; i += 2) {
1094 byte b = unicode_string[i];
1095 unicode_string[i] = unicode_string[i+1];
1096 unicode_string[i+1] = b;
1097 }
1098
1099 return S_OK;
1100 }
1101
1102 // case insensitive strstr
1103 // adapted from http://c.snippets.org/snip_lister.php?fname=stristr.c
1104 const char *stristr(const char *string, const char *pattern)
1105 {
1106 ASSERT (pattern, (L""));
1107 ASSERT (string, (L""));
1108 ASSERT (string && pattern, (L""));
1109 char *pattern_ptr, *string_ptr;
1110 const char *start;
1111
1112 for (start = string; *start != 0; start++)
1113 {
1114 // find start of pattern in string
1115 for ( ; ((*start!=0) && (String_ToUpperA(*start) != String_ToUpperA(*pattern ))); start++)
1116 ;
1117 if (0 == *start)
1118 return NULL;
1119
1120 pattern_ptr = (char *)pattern;
1121 string_ptr = (char *)start;
1122
1123 while (String_ToUpperA(*string_ptr) == String_ToUpperA(*pattern_ptr))
1124 {
1125 string_ptr++;
1126 pattern_ptr++;
1127
1128 // if end of pattern then pattern was found
1129 if (0 == *pattern_ptr)
1130 return (start);
1131 }
1132 }
1133
1134 return NULL;
1135 }
1136
1137 // case insensitive Unicode strstr
1138 // adapted from http://c.snippets.org/snip_lister.php?fname=stristr.c
1139 const WCHAR *stristrW(const WCHAR *string, const WCHAR *pattern)
1140 {
1141 ASSERT (pattern, (L""));
1142 ASSERT (string, (L""));
1143 ASSERT (string && pattern, (L""));
1144 const WCHAR *start;
1145
1146 for (start = string; *start != 0; start++)
1147 {
1148 // find start of pattern in string
1149 for ( ; ((*start!=0) && (String_ToUpper(*start) != String_ToUpper(*pattern)) ); start++)
1150 ;
1151 if (0 == *start)
1152 return NULL;
1153
1154 const WCHAR *pattern_ptr = pattern;
1155 const WCHAR *string_ptr = start;
1156
1157 while (String_ToUpper(*string_ptr) == String_ToUpper(*pattern_ptr))
1158 {
1159 string_ptr++;
1160 pattern_ptr++;
1161
1162 // if end of pattern then pattern was found
1163 if (0 == *pattern_ptr)
1164 return (start);
1165 }
1166 }
1167
1168 return NULL;
1169 }
1170
1171 // case sensitive Unicode strstr
1172 // adapted from http://c.snippets.org/snip_lister.php?fname=stristr.c
1173 const WCHAR *strstrW(const WCHAR *string, const WCHAR *pattern)
1174 {
1175 ASSERT (pattern, (L""));
1176 ASSERT (string, (L""));
1177 ASSERT (string && pattern, (L""));
1178 const WCHAR *start;
1179
1180 for (start = string; *start != 0; start++)
1181 {
1182 // find start of pattern in string
1183 for ( ; ((*start!=0) && (*start != *pattern)); start++)
1184 ;
1185 if (0 == *start)
1186 return NULL;
1187
1188 const WCHAR *pattern_ptr = pattern;
1189 const WCHAR *string_ptr = start;
1190
1191 while (*string_ptr == *pattern_ptr)
1192 {
1193 string_ptr++;
1194 pattern_ptr++;
1195
1196 // if end of pattern then pattern was found
1197 if (0 == *pattern_ptr)
1198 return (start);
1199 }
1200 }
1201
1202 return NULL;
1203 }
1204
1205 // -------------------------------------------------------------------------
1206 // Helper function
1207 float GetLenWithWordWrap (const float len_so_far,
1208 const float len_to_add,
1209 const uint32 len_line) {
1210 // lint -save -e414 Possible division by 0
1211 ASSERT (len_line != 0, (L""));
1212
1213 float len_total = len_so_far + len_to_add;
1214
1215 // Figure out if we need to word wrap by seeing if adding the second
1216 // string will cause us to span more lines than before.
1217 uint32 num_lines_before = static_cast<uint32> (len_so_far / len_line);
1218 uint32 num_lines_after = static_cast<uint32> (len_total / len_line);
1219
1220 // If it just barely fit onto the line, do not wrap to the next line.
1221 if (num_lines_after > 0 && (len_total / len_line - num_lines_after == 0)) {
1222 --num_lines_after;
1223 }
1224
1225 if (num_lines_after > num_lines_before) {
1226 // Need to word wrap.
1227 // lint -e{790} Suspicious truncation
1228 return num_lines_after * len_line + len_to_add;
1229 }
1230 else
1231 return len_total;
1232
1233 // lint -restore
1234 }
1235
1236 int CalculateBase64EscapedLen(int input_len, bool do_padding) {
1237 // these formulae were copied from comments that used to go with the base64
1238 // encoding functions
1239 int intermediate_result = 8 * input_len + 5;
1240 ASSERT(intermediate_result > 0,(L"")); // make sure we didn't overflow
1241 int len = intermediate_result / 6;
1242 if (do_padding) len = ((len + 3) / 4) * 4;
1243 return len;
1244 }
1245
1246 // Base64Escape does padding, so this calculation includes padding.
1247 int CalculateBase64EscapedLen(int input_len) {
1248 return CalculateBase64EscapedLen(input_len, true);
1249 }
1250
1251 // Base64Escape
1252 // Largely based on b2a_base64 in google/docid_encryption.c
1253 //
1254 //
1255 int Base64EscapeInternal(const char *src, int szsrc,
1256 char *dest, int szdest, const char *base64,
1257 bool do_padding)
1258 {
1259 ASSERT(base64, (L""));
1260 ASSERT(dest, (L""));
1261 ASSERT(src, (L""));
1262
1263 static const char kPad64 = '=';
1264
1265 if (szsrc <= 0) return 0;
1266
1267 char *cur_dest = dest;
1268 const unsigned char *cur_src = reinterpret_cast<const unsigned char*>(src);
1269
1270 // Three bytes of data encodes to four characters of cyphertext.
1271 // So we can pump through three-byte chunks atomically.
1272 while (szsrc > 2) { /* keep going until we have less than 24 bits */
1273 if( (szdest -= 4) < 0 ) return 0;
1274 cur_dest[0] = base64[cur_src[0] >> 2];
1275 cur_dest[1] = base64[((cur_src[0] & 0x03) << 4) + (cur_src[1] >> 4)];
1276 cur_dest[2] = base64[((cur_src[1] & 0x0f) << 2) + (cur_src[2] >> 6)];
1277 cur_dest[3] = base64[cur_src[2] & 0x3f];
1278
1279 cur_dest += 4;
1280 cur_src += 3;
1281 szsrc -= 3;
1282 }
1283
1284 /* now deal with the tail (<=2 bytes) */
1285 switch (szsrc) {
1286 case 0:
1287 // Nothing left; nothing more to do.
1288 break;
1289 case 1:
1290 // One byte left: this encodes to two characters, and (optionally)
1291 // two pad characters to round out the four-character cypherblock.
1292 if( (szdest -= 2) < 0 ) return 0;
1293 cur_dest[0] = base64[cur_src[0] >> 2];
1294 cur_dest[1] = base64[(cur_src[0] & 0x03) << 4];
1295 cur_dest += 2;
1296 if (do_padding) {
1297 if( (szdest -= 2) < 0 ) return 0;
1298 cur_dest[0] = kPad64;
1299 cur_dest[1] = kPad64;
1300 cur_dest += 2;
1301 }
1302 break;
1303 case 2:
1304 // Two bytes left: this encodes to three characters, and (optionally)
1305 // one pad character to round out the four-character cypherblock.
1306 if( (szdest -= 3) < 0 ) return 0;
1307 cur_dest[0] = base64[cur_src[0] >> 2];
1308 cur_dest[1] = base64[((cur_src[0] & 0x03) << 4) + (cur_src[1] >> 4)];
1309 cur_dest[2] = base64[(cur_src[1] & 0x0f) << 2];
1310 cur_dest += 3;
1311 if (do_padding) {
1312 if( (szdest -= 1) < 0 ) return 0;
1313 cur_dest[0] = kPad64;
1314 cur_dest += 1;
1315 }
1316 break;
1317 default:
1318 // Should not be reached: blocks of 3 bytes are handled
1319 // in the while loop before this switch statement.
1320 ASSERT(false, (L"Logic problem? szsrc = %S",szsrc));
1321 break;
1322 }
1323 return (cur_dest - dest);
1324 }
1325
1326 #define kBase64Chars "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz01234 56789+/"
1327
1328 #define kWebSafeBase64Chars "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxy z0123456789-_"
1329
1330 int Base64Escape(const char *src, int szsrc, char *dest, int szdest) {
1331 ASSERT(dest, (L""));
1332 ASSERT(src, (L""));
1333
1334 return Base64EscapeInternal(src, szsrc, dest, szdest, kBase64Chars, true);
1335 }
1336 int WebSafeBase64Escape(const char *src, int szsrc, char *dest,
1337 int szdest, bool do_padding) {
1338 ASSERT(dest, (L""));
1339 ASSERT(src, (L""));
1340
1341 return Base64EscapeInternal(src, szsrc, dest, szdest,
1342 kWebSafeBase64Chars, do_padding);
1343 }
1344
1345 void Base64Escape(const char *src, int szsrc,
1346 CStringA* dest, bool do_padding)
1347 {
1348 ASSERT(src, (L""));
1349 ASSERT(dest,(L""));
1350 const int max_escaped_size = CalculateBase64EscapedLen(szsrc, do_padding);
1351 dest->Empty();
1352 const int escaped_len = Base64EscapeInternal(src, szsrc,
1353 dest->GetBufferSetLength(max_escaped_size + 1), max_escaped_size + 1,
1354 kBase64Chars,
1355 do_padding);
1356 ASSERT(max_escaped_size <= escaped_len,(L""));
1357 dest->ReleaseBuffer(escaped_len);
1358 }
1359
1360 void WebSafeBase64Escape(const char *src, int szsrc,
1361 CStringA *dest, bool do_padding)
1362 {
1363 ASSERT(src, (L""));
1364 ASSERT(dest,(L""));
1365 const int max_escaped_size =
1366 CalculateBase64EscapedLen(szsrc, do_padding);
1367 dest->Empty();
1368 const int escaped_len = Base64EscapeInternal(src, szsrc,
1369 dest->GetBufferSetLength(max_escaped_size + 1), max_escaped_size + 1,
1370 kWebSafeBase64Chars,
1371 do_padding);
1372 ASSERT(max_escaped_size <= escaped_len,(L""));
1373 dest->ReleaseBuffer(escaped_len);
1374 }
1375
1376 void WebSafeBase64Escape(const CStringA& src, CStringA* dest) {
1377 ASSERT(dest,(L""));
1378 int encoded_len = CalculateBase64EscapedLen(src.GetLength());
1379 scoped_array<char> buf(new char[encoded_len]);
1380 int len = WebSafeBase64Escape(src,src.GetLength(), buf.get(), encoded_len, fal se);
1381 dest->SetString(buf.get(), len);
1382 }
1383
1384 // ----------------------------------------------------------------------
1385 // int Base64Unescape() - base64 decoder
1386 //
1387 // Check out
1388 // http://www.cis.ohio-state.edu/htbin/rfc/rfc2045.html for formal
1389 // description, but what we care about is that...
1390 // Take the encoded stuff in groups of 4 characters and turn each
1391 // character into a code 0 to 63 thus:
1392 // A-Z map to 0 to 25
1393 // a-z map to 26 to 51
1394 // 0-9 map to 52 to 61
1395 // +(- for WebSafe) maps to 62
1396 // /(_ for WebSafe) maps to 63
1397 // There will be four numbers, all less than 64 which can be represented
1398 // by a 6 digit binary number (aaaaaa, bbbbbb, cccccc, dddddd respectively).
1399 // Arrange the 6 digit binary numbers into three bytes as such:
1400 // aaaaaabb bbbbcccc ccdddddd
1401 // Equals signs (one or two) are used at the end of the encoded block to
1402 // indicate that the text was not an integer multiple of three bytes long.
1403 // ----------------------------------------------------------------------
1404 int Base64UnescapeInternal(const char *src, int len_src,
1405 char *dest, int len_dest, const char* unbase64) {
1406 ASSERT (unbase64, (L""));
1407 ASSERT (src, (L""));
1408
1409 static const char kPad64 = '=';
1410
1411 int decode;
1412 int destidx = 0;
1413 int state = 0;
1414 // Used an unsigned char, since ch is used as an array index (into unbase64).
1415 unsigned char ch = 0;
1416 while (len_src-- && (ch = *src++) != '\0') {
1417 if (IsSpaceA(ch)) // Skip whitespace
1418 continue;
1419
1420 if (ch == kPad64)
1421 break;
1422
1423 decode = unbase64[ch];
1424 if (decode == 99) // A non-base64 character
1425 return (-1);
1426
1427 // Four cyphertext characters decode to three bytes.
1428 // Therefore we can be in one of four states.
1429 switch (state) {
1430 case 0:
1431 // We're at the beginning of a four-character cyphertext block.
1432 // This sets the high six bits of the first byte of the
1433 // plaintext block.
1434 if (dest) {
1435 if (destidx >= len_dest)
1436 return (-1);
1437 // lint -e{734} Loss of precision
1438 dest[destidx] = static_cast<char>(decode << 2);
1439 }
1440 state = 1;
1441 break;
1442 case 1:
1443 // We're one character into a four-character cyphertext block.
1444 // This sets the low two bits of the first plaintext byte,
1445 // and the high four bits of the second plaintext byte.
1446 // However, if this is the end of data, and those four
1447 // bits are zero, it could be that those four bits are
1448 // leftovers from the encoding of data that had a length
1449 // of one mod three.
1450 if (dest) {
1451 if (destidx >= len_dest)
1452 return (-1);
1453 // lint -e{734} Loss of precision
1454 dest[destidx] |= decode >> 4;
1455 if (destidx + 1 >= len_dest) {
1456 if (0 != (decode & 0x0f))
1457 return (-1);
1458 else
1459 ;
1460 } else {
1461 // lint -e{734} Loss of precision
1462 dest[destidx+1] = static_cast<char>((decode & 0x0f) << 4);
1463 }
1464 }
1465 destidx++;
1466 state = 2;
1467 break;
1468 case 2:
1469 // We're two characters into a four-character cyphertext block.
1470 // This sets the low four bits of the second plaintext
1471 // byte, and the high two bits of the third plaintext byte.
1472 // However, if this is the end of data, and those two
1473 // bits are zero, it could be that those two bits are
1474 // leftovers from the encoding of data that had a length
1475 // of two mod three.
1476 if (dest) {
1477 if (destidx >= len_dest)
1478 return (-1);
1479 // lint -e{734} Loss of precision
1480 dest[destidx] |= decode >> 2;
1481 if (destidx +1 >= len_dest) {
1482 if (0 != (decode & 0x03))
1483 return (-1);
1484 else
1485 ;
1486 } else {
1487 // lint -e{734} Loss of precision
1488 dest[destidx+1] = static_cast<char>((decode & 0x03) << 6);
1489 }
1490 }
1491 destidx++;
1492 state = 3;
1493 break;
1494 case 3:
1495 // We're at the last character of a four-character cyphertext block.
1496 // This sets the low six bits of the third plaintext byte.
1497 if (dest) {
1498 if (destidx >= len_dest)
1499 return (-1);
1500 // lint -e{734} Loss of precision
1501 dest[destidx] |= decode;
1502 }
1503 destidx++;
1504 state = 0;
1505 break;
1506
1507 default:
1508 ASSERT (false, (L""));
1509 break;
1510 }
1511 }
1512
1513 // We are done decoding Base-64 chars. Let's see if we ended
1514 // on a byte boundary, and/or with erroneous trailing characters.
1515 if (ch == kPad64) { // We got a pad char
1516 if ((state == 0) || (state == 1))
1517 return (-1); // Invalid '=' in first or second position
1518 if (len_src == 0) {
1519 if (state == 2) // We run out of input but we still need another '='
1520 return (-1);
1521 // Otherwise, we are in state 3 and only need this '='
1522 } else {
1523 if (state == 2) { // need another '='
1524 while ((ch = *src++) != '\0' && (len_src-- > 0)) {
1525 if (!IsSpaceA(ch))
1526 break;
1527 }
1528 if (ch != kPad64)
1529 return (-1);
1530 }
1531 // state = 1 or 2, check if all remain padding is space
1532 while ((ch = *src++) != '\0' && (len_src-- > 0)) {
1533 if (!IsSpaceA(ch))
1534 return(-1);
1535 }
1536 }
1537 } else {
1538 // We ended by seeing the end of the string. Make sure we
1539 // have no partial bytes lying around. Note that we
1540 // do not require trailing '=', so states 2 and 3 are okay too.
1541 if (state == 1)
1542 return (-1);
1543 }
1544
1545 return (destidx);
1546 }
1547
1548 int Base64Unescape(const char *src, int len_src, char *dest, int len_dest) {
1549 ASSERT(dest, (L""));
1550 ASSERT(src, (L""));
1551
1552 static const char UnBase64[] = {
1553 99, 99, 99, 99, 99, 99, 99, 99,
1554 99, 99, 99, 99, 99, 99, 99, 99,
1555 99, 99, 99, 99, 99, 99, 99, 99,
1556 99, 99, 99, 99, 99, 99, 99, 99,
1557 99, 99, 99, 99, 99, 99, 99, 99,
1558 99, 99, 99, 62/*+*/, 99, 99, 99, 63/*/ */,
1559 52/*0*/, 53/*1*/, 54/*2*/, 55/*3*/, 56/*4*/, 57/*5*/, 58/*6*/, 59/*7*/,
1560 60/*8*/, 61/*9*/, 99, 99, 99, 99, 99, 99,
1561 99, 0/*A*/, 1/*B*/, 2/*C*/, 3/*D*/, 4/*E*/, 5/*F*/, 6/*G*/,
1562 7/*H*/, 8/*I*/, 9/*J*/, 10/*K*/, 11/*L*/, 12/*M*/, 13/*N*/, 14/*O*/,
1563 15/*P*/, 16/*Q*/, 17/*R*/, 18/*S*/, 19/*T*/, 20/*U*/, 21/*V*/, 22/*W*/,
1564 23/*X*/, 24/*Y*/, 25/*Z*/, 99, 99, 99, 99, 99,
1565 99, 26/*a*/, 27/*b*/, 28/*c*/, 29/*d*/, 30/*e*/, 31/*f*/, 32/*g*/,
1566 33/*h*/, 34/*i*/, 35/*j*/, 36/*k*/, 37/*l*/, 38/*m*/, 39/*n*/, 40/*o*/,
1567 41/*p*/, 42/*q*/, 43/*r*/, 44/*s*/, 45/*t*/, 46/*u*/, 47/*v*/, 48/*w*/,
1568 49/*x*/, 50/*y*/, 51/*z*/, 99, 99, 99, 99, 99,
1569 99, 99, 99, 99, 99, 99, 99, 99,
1570 99, 99, 99, 99, 99, 99, 99, 99,
1571 99, 99, 99, 99, 99, 99, 99, 99,
1572 99, 99, 99, 99, 99, 99, 99, 99,
1573 99, 99, 99, 99, 99, 99, 99, 99,
1574 99, 99, 99, 99, 99, 99, 99, 99,
1575 99, 99, 99, 99, 99, 99, 99, 99,
1576 99, 99, 99, 99, 99, 99, 99, 99,
1577 99, 99, 99, 99, 99, 99, 99, 99,
1578 99, 99, 99, 99, 99, 99, 99, 99,
1579 99, 99, 99, 99, 99, 99, 99, 99,
1580 99, 99, 99, 99, 99, 99, 99, 99,
1581 99, 99, 99, 99, 99, 99, 99, 99,
1582 99, 99, 99, 99, 99, 99, 99, 99,
1583 99, 99, 99, 99, 99, 99, 99, 99,
1584 99, 99, 99, 99, 99, 99, 99, 99
1585 };
1586
1587 // The above array was generated by the following code
1588 // #include <sys/time.h>
1589 // #include <stdlib.h>
1590 // #include <string.h>
1591 // main()
1592 // {
1593 // static const char Base64[] =
1594 // "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
1595 // char *pos;
1596 // int idx, i, j;
1597 // printf(" ");
1598 // for (i = 0; i < 255; i += 8) {
1599 // for (j = i; j < i + 8; j++) {
1600 // pos = strchr(Base64, j);
1601 // if ((pos == NULL) || (j == 0))
1602 // idx = 99;
1603 // else
1604 // idx = pos - Base64;
1605 // if (idx == 99)
1606 // printf(" %2d, ", idx);
1607 // else
1608 // printf(" %2d/*%c*/,", idx, j);
1609 // }
1610 // printf("\n ");
1611 // }
1612 // }
1613
1614 return Base64UnescapeInternal(src, len_src, dest, len_dest, UnBase64);
1615 }
1616
1617 int WebSafeBase64Unescape(const char *src, int szsrc, char *dest, int szdest) {
1618 ASSERT(dest, (L""));
1619 ASSERT(src, (L""));
1620
1621 static const char UnBase64[] = {
1622 99, 99, 99, 99, 99, 99, 99, 99,
1623 99, 99, 99, 99, 99, 99, 99, 99,
1624 99, 99, 99, 99, 99, 99, 99, 99,
1625 99, 99, 99, 99, 99, 99, 99, 99,
1626 99, 99, 99, 99, 99, 99, 99, 99,
1627 99, 99, 99, 99, 99, 62/*-*/, 99, 99,
1628 52/*0*/, 53/*1*/, 54/*2*/, 55/*3*/, 56/*4*/, 57/*5*/, 58/*6*/, 59/*7*/,
1629 60/*8*/, 61/*9*/, 99, 99, 99, 99, 99, 99,
1630 99, 0/*A*/, 1/*B*/, 2/*C*/, 3/*D*/, 4/*E*/, 5/*F*/, 6/*G*/,
1631 7/*H*/, 8/*I*/, 9/*J*/, 10/*K*/, 11/*L*/, 12/*M*/, 13/*N*/, 14/*O*/,
1632 15/*P*/, 16/*Q*/, 17/*R*/, 18/*S*/, 19/*T*/, 20/*U*/, 21/*V*/, 22/*W*/,
1633 23/*X*/, 24/*Y*/, 25/*Z*/, 99, 99, 99, 99, 63/*_*/,
1634 99, 26/*a*/, 27/*b*/, 28/*c*/, 29/*d*/, 30/*e*/, 31/*f*/, 32/*g*/,
1635 33/*h*/, 34/*i*/, 35/*j*/, 36/*k*/, 37/*l*/, 38/*m*/, 39/*n*/, 40/*o*/,
1636 41/*p*/, 42/*q*/, 43/*r*/, 44/*s*/, 45/*t*/, 46/*u*/, 47/*v*/, 48/*w*/,
1637 49/*x*/, 50/*y*/, 51/*z*/, 99, 99, 99, 99, 99,
1638 99, 99, 99, 99, 99, 99, 99, 99,
1639 99, 99, 99, 99, 99, 99, 99, 99,
1640 99, 99, 99, 99, 99, 99, 99, 99,
1641 99, 99, 99, 99, 99, 99, 99, 99,
1642 99, 99, 99, 99, 99, 99, 99, 99,
1643 99, 99, 99, 99, 99, 99, 99, 99,
1644 99, 99, 99, 99, 99, 99, 99, 99,
1645 99, 99, 99, 99, 99, 99, 99, 99,
1646 99, 99, 99, 99, 99, 99, 99, 99,
1647 99, 99, 99, 99, 99, 99, 99, 99,
1648 99, 99, 99, 99, 99, 99, 99, 99,
1649 99, 99, 99, 99, 99, 99, 99, 99,
1650 99, 99, 99, 99, 99, 99, 99, 99,
1651 99, 99, 99, 99, 99, 99, 99, 99,
1652 99, 99, 99, 99, 99, 99, 99, 99,
1653 99, 99, 99, 99, 99, 99, 99, 99
1654 };
1655 // The above array was generated by the following code
1656 // #include <sys/time.h>
1657 // #include <stdlib.h>
1658 // #include <string.h>
1659 // main()
1660 // {
1661 // static const char Base64[] =
1662 // "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
1663 // char *pos;
1664 // int idx, i, j;
1665 // printf(" ");
1666 // for (i = 0; i < 255; i += 8) {
1667 // for (j = i; j < i + 8; j++) {
1668 // pos = strchr(Base64, j);
1669 // if ((pos == NULL) || (j == 0))
1670 // idx = 99;
1671 // else
1672 // idx = pos - Base64;
1673 // if (idx == 99)
1674 // printf(" %2d, ", idx);
1675 // else
1676 // printf(" %2d/*%c*/,", idx, j);
1677 // }
1678 // printf("\n ");
1679 // }
1680 // }
1681
1682 return Base64UnescapeInternal(src, szsrc, dest, szdest, UnBase64);
1683 }
1684
1685 bool IsHexDigit (WCHAR c) {
1686 return (((c >= L'a') && (c <= L'f'))
1687 || ((c >= L'A') && (c <= L'F'))
1688 || ((c >= L'0') && (c <= L'9')));
1689 }
1690
1691 int HexDigitToInt (WCHAR c) {
1692 return ((c >= L'a') ? ((c - L'a') + 10) :
1693 (c >= L'A') ? ((c - L'A') + 10) :
1694 (c - L'0'));
1695 }
1696
1697 // ----------------------------------------------------------------------
1698 // int QuotedPrintableUnescape()
1699 //
1700 // Check out http://www.cis.ohio-state.edu/htbin/rfc/rfc2045.html for
1701 // more details, only briefly implemented. But from the web...
1702 // Quoted-printable is an encoding method defined in the MIME
1703 // standard. It is used primarily to encode 8-bit text (such as text
1704 // that includes foreign characters) into 7-bit US ASCII, creating a
1705 // document that is mostly readable by humans, even in its encoded
1706 // form. All MIME compliant applications can decode quoted-printable
1707 // text, though they may not necessarily be able to properly display the
1708 // document as it was originally intended. As quoted-printable encoding
1709 // is implemented most commonly, printable ASCII characters (values 33
1710 // through 126, excluding 61), tabs and spaces that do not appear at the
1711 // end of lines, and end-of-line characters are not encoded. Other
1712 // characters are represented by an equal sign (=) immediately followed
1713 // by that character's hexadecimal value. Lines that are longer than 76
1714 // characters are shortened by line breaks, with the equal sign marking
1715 // where the breaks occurred.
1716 //
1717 // Update: we really want QuotedPrintableUnescape to conform to rfc2047,
1718 // which expands the q encoding. In particular, it specifices that _'s are
1719 // to be treated as spaces.
1720 // ----------------------------------------------------------------------
1721 int QuotedPrintableUnescape(const WCHAR *source, int slen,
1722 WCHAR *dest, int len_dest) {
1723 ASSERT(dest, (L""));
1724 ASSERT(source, (L""));
1725
1726 WCHAR* d = dest;
1727 const WCHAR* p = source;
1728
1729 while (*p != '\0' && p < source+slen && d < dest+len_dest) {
1730 switch (*p) {
1731 case '=':
1732 if (p == source+slen-1) {
1733 // End of line, no need to print the =..
1734 return (d-dest);
1735 }
1736 // if its valid, convert to hex and insert
1737 if (p < source+slen-2 && IsHexDigit(p[1]) && IsHexDigit(p[2])) {
1738 // lint -e{734} Loss of precision
1739 *d++ = static_cast<WCHAR>(
1740 HexDigitToInt(p[1]) * 16 + HexDigitToInt(p[2]));
1741 p += 3;
1742 } else {
1743 p++;
1744 }
1745 break;
1746 case '_': // According to rfc2047, _'s are to be treated as spaces
1747 *d++ = ' '; p++;
1748 break;
1749 default:
1750 *d++ = *p++;
1751 break;
1752 }
1753 }
1754 return (d-dest);
1755 }
1756
1757 // TODO(omaha): currently set not to use IsCharUpper because that is relatively slow
1758 // this is used in the QUIB; consider if we need to use IsCharUpper or a replace ment
1759 bool String_IsUpper(TCHAR c) {
1760 return (c >= 'A' && c <= 'Z');
1761 // return (IsCharUpper (c));
1762 }
1763
1764 // Replacement for the CRT toupper(c)
1765 int String_ToUpper(int c) {
1766 // If it's < 128, then convert is ourself, which is far cheaper than the syste m conversion
1767 if (c < 128)
1768 return String_ToUpperA(static_cast<char>(c));
1769
1770 TCHAR * p_c = reinterpret_cast<TCHAR *>(c);
1771 int conv_c = reinterpret_cast<int>(::CharUpper(p_c));
1772 return conv_c;
1773 }
1774
1775 // Replacement for the CRT toupper(c)
1776 char String_ToUpperA(char c) {
1777 if (c >= 'a' && c <= 'z') return (c - ('a' - 'A'));
1778 return c;
1779 }
1780
1781 void String_ToLower(TCHAR* str) {
1782 ASSERT1(str);
1783 ::CharLower(str);
1784 }
1785
1786 void String_ToUpper(TCHAR* str) {
1787 ASSERT1(str);
1788 ::CharUpper(str);
1789 }
1790
1791 // String comparison based on length
1792 // Replacement for the CRT strncmp(i)
1793 int String_StrNCmp(const TCHAR * str1, const TCHAR * str2, uint32 len, bool igno re_case) {
1794 ASSERT(str2, (L""));
1795 ASSERT(str1, (L""));
1796
1797 TCHAR c1, c2;
1798
1799 if (len == 0)
1800 return 0;
1801
1802 // compare each char
1803 // TODO(omaha): If we use a lot of case sensitive compares consider having 2 l oops.
1804 do {
1805 c1 = *str1++;
1806 c2 = *str2++;
1807 if (ignore_case) {
1808 c1 = (TCHAR)String_ToLowerChar((int)(c1)); // lint !e507 Suspicious trun cation
1809 c2 = (TCHAR)String_ToLowerChar((int)(c2)); // lint !e507
1810 }
1811 } while ( (--len) && c1 && (c1 == c2) );
1812
1813 return (int)(c1 - c2);
1814 }
1815
1816 // TODO(omaha): Why do we introduce this behaviorial difference?
1817 // Replacement for strncpy() - except ALWAYS ends string with null
1818 TCHAR* String_StrNCpy(TCHAR* destination, const TCHAR* source, uint32 len) {
1819 ASSERT (source, (L""));
1820 ASSERT (destination, (L""));
1821
1822 TCHAR* result = destination;
1823
1824 ASSERT (0 != len, (L"")); // Too short a destination for even the null cha racter
1825
1826 while (*source && len) {
1827 *destination++ = *source++;
1828 len--;
1829 }
1830
1831 // If we ran out of space, back up one
1832 if (0 == len) {
1833 destination--;
1834 }
1835
1836 // Null-terminate the string
1837 *destination = _T('\0');
1838
1839 return result;
1840 }
1841
1842 // check if a string starts with another string
1843 bool String_StartsWith(const TCHAR *str, const TCHAR *start_str,
1844 bool ignore_case) {
1845 ASSERT(start_str, (L""));
1846 ASSERT(str, (L""));
1847
1848 while (0 != *str) {
1849 // Check for matching characters
1850 TCHAR c1 = *str;
1851 TCHAR c2 = *start_str;
1852
1853 // Reached the end of start_str?
1854 if (0 == c2)
1855 return true;
1856
1857 if (ignore_case) {
1858 c1 = (TCHAR)String_ToLowerChar((int)(c1)); // lint !e507 Suspicious trun cation
1859 c2 = (TCHAR)String_ToLowerChar((int)(c2)); // lint !e507 Suspicious trun cation
1860 }
1861
1862 if (c1 != c2)
1863 return false;
1864
1865 ++str;
1866 ++start_str;
1867 }
1868
1869 // If str is shorter than start_str, no match. If equal size, match.
1870 return 0 == *start_str;
1871 }
1872
1873 // check if a string starts with another string
1874 bool String_StartsWithA(const char *str, const char *start_str, bool ignore_case ) {
1875 ASSERT(start_str, (L""));
1876 ASSERT(str, (L""));
1877
1878 while (0 != *str) {
1879 // Check for matching characters
1880 char c1 = *str;
1881 char c2 = *start_str;
1882
1883 // Reached the end of start_str?
1884 if (0 == c2)
1885 return true;
1886
1887 if (ignore_case) {
1888 c1 = String_ToLowerCharAnsi(c1);
1889 c2 = String_ToLowerCharAnsi(c2);
1890 }
1891
1892 if (c1 != c2)
1893 return false;
1894
1895 ++str;
1896 ++start_str;
1897 }
1898
1899 // If str is shorter than start_str, no match. If equal size, match.
1900 return 0 == *start_str;
1901 }
1902
1903 // the wrapper version below actually increased code size as of 5/31/04
1904 // perhaps because the int64 version is larger and in some EXE/DLLs we only need the int32 version
1905
1906 // converts a string to an int
1907 // Does not check for overflow
1908 // is the direct int32 version significantly faster for our usage?
1909 // int32 String_StringToInt(const TCHAR * str) {
1910 // ASSERT(str, (L""));
1911 // return static_cast<int32>(String_StringToInt64 (str));
1912 // }
1913
1914 // converts a string to an int
1915 // Does not check for overflow
1916 int32 String_StringToInt(const TCHAR * str) {
1917 ASSERT(str, (L""));
1918
1919 int c; // current char
1920 int32 total; // current total
1921 int sign; // if '-', then negative, otherwise positive
1922
1923 // remove spaces
1924 while ( *str == _T(' '))
1925 ++str;
1926
1927 c = (int)*str++;
1928 sign = c; // save sign indication
1929 if (c == _T('-') || c == _T('+'))
1930 c = (int)*str++; // skip sign
1931
1932 total = 0;
1933
1934 while ((c = String_CharToDigit(static_cast<TCHAR>(c))) != -1 ) {
1935 total = 10 * total + c; // accumulate digit
1936 c = *str++; // get next char
1937 }
1938
1939 if (sign == '-')
1940 return -total;
1941 else
1942 return total; // return result, negated if necessary
1943 }
1944
1945 // converts a string to an int64
1946 // Does not check for overflow
1947 int64 String_StringToInt64(const TCHAR * str) {
1948 ASSERT(str, (L""));
1949
1950 int c; // current char
1951 int64 total; // current total
1952 int sign;
1953
1954 while (*str == ' ') ++str; // skip space
1955
1956 c = (int)*str++;
1957 sign = c; /* save sign indication */
1958 if (c == '-' || c == '+')
1959 c = (int)*str++;
1960
1961 total = 0;
1962
1963 while ((c = String_CharToDigit(static_cast<TCHAR>(c))) != -1) {
1964 total = 10 * total + c; /* accumulate digit */
1965 c = *str++; /* get next char */
1966 }
1967
1968 if (sign == '-')
1969 return -total;
1970 else
1971 return total;
1972 }
1973
1974 // A faster version of the ::CharLower command. We first check if all characters are in low ANSI
1975 // If so, we can convert it ourselves [which is about 10x faster]
1976 // Otherwise, ask the system to do it for us.
1977 TCHAR * String_FastToLower(TCHAR * str) {
1978 ASSERT(str, (L""));
1979
1980 TCHAR * p = str;
1981 while (*p) {
1982 // If we can't process it ourselves, then do it with the API
1983 if (*p > 127)
1984 return ::CharLower(str);
1985 ++p;
1986 }
1987
1988 // If we're still here, do it ourselves
1989 p = str;
1990 while (*p) {
1991 // Lower case it
1992 if (*p >= L'A' && *p <= 'Z')
1993 *p |= 0x20;
1994 ++p;
1995 }
1996
1997 return str;
1998 }
1999
2000 // Convert a size_t to a CString
2001 CString sizet_to_str(const size_t & i) {
2002 CString out;
2003 out.Format(NOTRANSL(_T("%u")),i);
2004 return out;
2005 }
2006
2007 // Convert an int to a CString
2008 CString itostr(const int i) {
2009 return String_Int64ToString(i, 10);
2010 }
2011
2012 // Convert a uint to a CString
2013 CString itostr(const uint32 i) {
2014 return String_Int64ToString(i, 10);
2015 }
2016
2017 // converts an int to a string
2018 // Does not check for overflow
2019 CString String_Int64ToString(int64 value, int radix) {
2020 ASSERT(radix > 0, (L""));
2021
2022 // Space big enough for it in binary, plus the sign
2023 TCHAR temp[66];
2024
2025 bool negative = false;
2026 if (value < 0) {
2027 negative = true;
2028 value = -value;
2029 }
2030
2031 int pos = 0;
2032
2033 // Add digits in reverse order
2034 do {
2035 TCHAR digit = (TCHAR) (value % radix);
2036 if (digit > 9)
2037 temp[pos] = L'a' + digit - 10;
2038 else
2039 temp[pos] = L'0' + digit;
2040
2041 pos++;
2042 value /= radix;
2043 } while (value > 0);
2044
2045 if (negative)
2046 temp[pos++] = L'-';
2047
2048 // Reverse it before making a CString out of it
2049 int start = 0, end = pos - 1;
2050 while (start < end) {
2051 TCHAR t = temp[start];
2052 temp[start] = temp[end];
2053 temp[end] = t;
2054
2055 end--;
2056 start++;
2057 }
2058
2059 return CString(temp, pos);
2060 }
2061
2062 // converts an uint64 to a string
2063 // Does not check for overflow
2064 CString String_Uint64ToString(uint64 value, int radix) {
2065 ASSERT1(radix > 0);
2066
2067 CString ret;
2068
2069 const uint32 kMaxUint64Digits = 65;
2070
2071 // Space big enough for it in binary
2072 TCHAR* temp = ret.GetBufferSetLength(kMaxUint64Digits);
2073
2074 int pos = 0;
2075
2076 // Add digits in reverse order
2077 do {
2078 TCHAR digit = static_cast<TCHAR>(value % radix);
2079 if (digit > 9) {
2080 temp[pos] = _T('a') + digit - 10;
2081 } else {
2082 temp[pos] = _T('0') + digit;
2083 }
2084
2085 pos++;
2086 value /= radix;
2087 } while (value > 0 && pos < kMaxUint64Digits);
2088
2089 ret.ReleaseBuffer(pos);
2090
2091 // Reverse it before making a CString out of it
2092 ret.MakeReverse();
2093
2094 return ret;
2095 }
2096
2097 // converts an double to a string specifies the number of digits after
2098 // the decimal point
2099 CString String_DoubleToString(double value, int point_digits) {
2100 int64 int_val = (int64) value;
2101
2102 // Deal with integer part
2103 CString result(String_Int64ToString(int_val, 10));
2104
2105 if (point_digits > 0) {
2106 result.AppendChar(L'.');
2107
2108 // get the fp digits
2109 double rem_val = value - int_val;
2110 if (rem_val < 0)
2111 rem_val = -rem_val;
2112
2113 // multiply w/ the requested number of significant digits
2114 // construct the string in place
2115 for(int i=0; i<point_digits; i++) {
2116 // TODO(omaha): I have seen 1.2 turn into 1.1999999999999, and generate th at string.
2117 // We should round better. For now, I'll add a quick fix to favor high
2118 rem_val += 1e-12;
2119 rem_val *= 10;
2120 // Get the ones digit
2121 int64 int_rem_dig = std::min(10LL, static_cast<int64>(rem_val));
2122 result += static_cast<TCHAR>(int_rem_dig + L'0');
2123 rem_val = rem_val - int_rem_dig;
2124 }
2125 }
2126
2127 return result;
2128 }
2129
2130 double String_StringToDouble (const TCHAR *s) {
2131 ASSERT(s, (L""));
2132
2133 double value, power;
2134 int i = 0, sign;
2135
2136 while (IsSpaceW(s[i])) i++;
2137
2138 // get sign
2139 sign = (s[i] == '-') ? -1 : 1;
2140 if (s[i] == '+' || s[i] == '-') i++;
2141
2142 for (value = 0.0; s[i] >= '0' && s[i] <= '9'; i++)
2143 value = 10.0 * value + (s[i] - '0');
2144
2145 if (s[i] == '.') i++;
2146
2147 for (power = 1.0; s[i] >= '0' && s[i] <= '9'; i++) {
2148 value = 10.0 * value + (s[i] - '0');
2149 power *= 10.0;
2150 }
2151
2152 return sign * value / power;
2153 }
2154
2155 // Converts a character to a digit
2156 // if the character is not a digit return -1 (same as CRT)
2157 int32 String_CharToDigit(const TCHAR c) {
2158 return ((c) >= '0' && (c) <= '9' ? (c) - '0' : -1);
2159 }
2160
2161 bool String_IsDigit (const TCHAR c) {
2162 return ((c) >= '0' && (c) <= '9');
2163 }
2164
2165 TCHAR String_DigitToChar(unsigned int n) {
2166 ASSERT1(n < 10);
2167 return static_cast<TCHAR>(_T('0') + n % 10);
2168 }
2169
2170 // Returns true if an identifier character: letter, digit, or "_"
2171 bool String_IsIdentifierChar(const TCHAR c) {
2172 return ((c >= _T('A') && c <= _T('Z')) ||
2173 (c >= _T('a') && c <= _T('z')) ||
2174 (c >= _T('0') && c <= _T('9')) ||
2175 c == _T('_'));
2176 }
2177
2178 // Returns true if the string has letters in it.
2179 // This is used by the keyword extractor to downweight numbers,
2180 // IDs (sequences of numbers like social security numbers), etc.
2181 bool String_HasAlphabetLetters (const TCHAR * str) {
2182 ASSERT (str, (L""));
2183
2184 while (*str != '\0') {
2185 // if (iswalpha (*str)) {
2186 // Note that IsCharAlpha is slower but we want to avoid the CRT
2187 if (IsCharAlpha (*str)) {
2188 return true;
2189 }
2190 ++str;
2191 }
2192
2193 return false;
2194 }
2195
2196 CString String_LargeIntToApproximateString(uint64 value, bool base_ten, int* pow er) {
2197 uint32 to_one_decimal;
2198
2199 uint32 gig = base_ten ? 1000000000 : (1<<30);
2200 uint32 gig_div_10 = base_ten ? 100000000 : (1<<30)/10;
2201 uint32 meg = base_ten ? 1000000 : (1<<20);
2202 uint32 meg_div_10 = base_ten ? 100000 : (1<<20)/10;
2203 uint32 kilo = base_ten ? 1000 : (1<<10);
2204 uint32 kilo_div_10 = base_ten ? 100 : (1<<10)/10;
2205
2206 if (value >= gig) {
2207 if (power) *power = 3;
2208 to_one_decimal = static_cast<uint32>(value / gig_div_10);
2209 } else if (value >= meg) {
2210 if (power) *power = 2;
2211 to_one_decimal = static_cast<uint32>(value / meg_div_10);
2212 } else if (value >= kilo) {
2213 if (power) *power = 1;
2214 to_one_decimal = static_cast<uint32>(value / kilo_div_10);
2215 } else {
2216 if (power) *power = 0;
2217 return String_Int64ToString(static_cast<uint32>(value), 10 /*radix*/);
2218 }
2219
2220 uint32 whole_part = to_one_decimal / 10;
2221
2222 if (whole_part < 10)
2223 return Show(0.1 * static_cast<double>(to_one_decimal), 1);
2224
2225 return String_Int64ToString(whole_part, 10 /*radix*/);
2226 }
2227
2228 int String_FindString(const TCHAR *s1, const TCHAR *s2) {
2229 ASSERT(s2, (L""));
2230 ASSERT(s1, (L""));
2231
2232 // Naive implementation, but still oodles better than ATL's implementation
2233 // (which deals with variable character widths---we don't).
2234
2235 const TCHAR *found = _tcsstr(s1, s2);
2236 if (NULL == found)
2237 return -1;
2238
2239 return found - s1;
2240 }
2241
2242 int String_FindString(const TCHAR *s1, const TCHAR *s2, int start_pos) {
2243 ASSERT(s2, (L""));
2244 ASSERT(s1, (L""));
2245
2246 // Naive implementation, but still oodles better than ATL's implementation
2247 // (which deals with variable character widths---we don't).
2248
2249 int skip = start_pos;
2250
2251 const TCHAR *s = s1;
2252 while (skip && *s) {
2253 ++s;
2254 --skip;
2255 }
2256 if (!(*s))
2257 return -1;
2258
2259 const TCHAR *found = _tcsstr(s, s2);
2260 if (NULL == found)
2261 return -1;
2262
2263 return found - s1;
2264 }
2265
2266 int String_FindChar(const TCHAR *str, const TCHAR c) {
2267 ASSERT (str, (L""));
2268 const TCHAR *s = str;
2269 while (*s) {
2270 if (*s == c)
2271 return s - str;
2272 ++s;
2273 }
2274
2275 return -1;
2276 }
2277
2278 // taken from wcsrchr, modified to behave in the CString way
2279 int String_ReverseFindChar(const TCHAR * str,TCHAR c) {
2280 ASSERT (str, (L""));
2281 TCHAR *start = (TCHAR *)str;
2282
2283 while (*str++) /* find end of string */
2284 ;
2285 /* search towards front */
2286 while (--str != start && *str != (TCHAR)c)
2287 ;
2288
2289 if (*str == (TCHAR)c) /* found ? */
2290 return( str - start );
2291
2292 return -1;
2293 }
2294
2295 int String_FindChar(const TCHAR *str, const TCHAR c, int start_pos) {
2296 ASSERT (str, (L""));
2297 int n = 0;
2298 const TCHAR *s = str;
2299 while (*s) {
2300 if (n++ >= start_pos && *s == c)
2301 return s - str;
2302 ++s;
2303 }
2304
2305 return -1;
2306 }
2307
2308 bool String_Contains(const TCHAR *s1, const TCHAR *s2) {
2309 ASSERT(s2, (L""));
2310 ASSERT(s1, (L""));
2311
2312 return -1 != String_FindString(s1, s2);
2313 }
2314
2315 void String_ReplaceChar(TCHAR *str, TCHAR old_char, TCHAR new_char) {
2316 ASSERT (str, (L""));
2317 while (*str) {
2318 if (*str == old_char)
2319 *str = new_char;
2320
2321 ++str;
2322 }
2323 }
2324
2325 void String_ReplaceChar(CString & str, TCHAR old_char, TCHAR new_char) {
2326 String_ReplaceChar (str.GetBuffer(), old_char, new_char);
2327 str.ReleaseBuffer();
2328 }
2329
2330 int ReplaceCString (CString & src, const TCHAR *from, const TCHAR *to) {
2331 ASSERT(to, (L""));
2332 ASSERT(from, (L""));
2333
2334 return ReplaceCString(src, from, lstrlen(from), to, lstrlen(to), kRepMax);
2335 }
2336
2337 // A special version of the replace function which takes advantage of CString pr operties
2338 // to make it much faster when the string grows
2339 // 1) It will resize the string in place if possible. Even if it has to 'grow' t he string
2340 // 2) It will cutoff after a maximum number of matches
2341 // 3) It expects sizing data to be passed to it
2342 int ReplaceCString (CString & src, const TCHAR *from, unsigned int from_len,
2343 const TCHAR *to, unsigned int to_len,
2344 unsigned int max_matches) {
2345 ASSERT (from, (L""));
2346 ASSERT (to, (L""));
2347 ASSERT (from[0] != '\0', (L""));
2348 int i = 0, j = 0;
2349 unsigned int matches = 0;
2350
2351 // Keep track of the matches, it's easier than recalculating them
2352 unsigned int match_pos_stack[kExpectedMaxReplaceMatches];
2353
2354 // We might need to dynamically allocate space for the matches
2355 bool dynamic_allocate = false;
2356 unsigned int * match_pos = (unsigned int*)match_pos_stack;
2357 unsigned int max_match_size = kExpectedMaxReplaceMatches;
2358
2359 // Is the string getting bigger?
2360 bool longer = to_len > from_len;
2361
2362 // don't compute the lengths unless we know we need to
2363 int src_len = src.GetLength();
2364 int cur_len = src_len;
2365
2366 // Trick: We temporarily add 1 extra character to the string. The first char f rom the from
2367 // string. This way we can avoid searching for NULL, since we are guaranteed t o find it
2368 TCHAR * buffer = src.GetBufferSetLength(src_len+1);
2369 const TCHAR from_0 = from[0];
2370 buffer[src_len] = from[0];
2371
2372 while (i < cur_len) {
2373 // If we have too many matches, then re-allocate to a dynamic buffer that is
2374 // twice as big as the one we are currently using
2375 if (longer && (matches == max_match_size)) {
2376 // Double the buffer size, and copy it over
2377 unsigned int * temp = new unsigned int[max_match_size * 2];
2378 memcpy(temp, match_pos, matches * sizeof(unsigned int));
2379 if (dynamic_allocate)
2380 delete [] match_pos; // lint !e424 Inappropriate deallocation
2381 match_pos = temp;
2382
2383 max_match_size *= 2;
2384 dynamic_allocate = true;
2385 }
2386
2387 // If we have the maximum number of matches already, then stop
2388 if (matches >= max_matches) {
2389 break;
2390 }
2391
2392 // For each potential match
2393 // Note: oddly enough, this is the most expensive line in the function under normal usage. So I am optimizing the heck out of it
2394 TCHAR * buf_ptr = buffer + i;
2395 while (*buf_ptr != from_0) { ++buf_ptr; }
2396 i = buf_ptr - buffer;
2397
2398 // We're done!
2399 if (i >= cur_len)
2400 break;
2401
2402 // buffer is not NULL terminated, we replaced the NULL above
2403 while (i < cur_len && buffer[i] && buffer[i] == from[j]) {
2404 ++i; ++j;
2405 if (from[j] == '\0') { // found match
2406
2407 if (!longer) { // modify in place
2408
2409 memcpy ((byte *)(buffer+i) - (sizeof (TCHAR) * from_len), (byte *)to, sizeof (TCHAR) * to_len);
2410 // if there are often a lot of replacements, it would be faster to cre ate a new string instead
2411 // of using memmove
2412
2413 // TODO(omaha): - memmove will cause n^2 behavior in strings with mult iple matches since it will be moved many times...
2414 if (to_len < from_len) { memmove ((byte *)(buffer+i) - (sizeof (TCHAR) * (from_len - to_len)),
2415 (byte *)(buffer+i), (src_len - i + 1) * sizeof (TCHAR)); }
2416
2417 i -= (from_len - to_len);
2418 cur_len -= (from_len - to_len);
2419 }
2420 else
2421 match_pos[matches] = i - from_len;
2422
2423 ++matches;
2424
2425 break;
2426 }
2427 }
2428
2429 j = 0;
2430 }
2431
2432 if (to_len <= from_len)
2433 src_len -= matches * (from_len - to_len);
2434
2435 // if the new string is longer we do another pass now that we know how long th e new string needs to be
2436 if (matches && to_len > from_len) {
2437 src.ReleaseBuffer(src_len);
2438
2439 int new_len = src_len + matches * (to_len - from_len);
2440 buffer = src.GetBufferSetLength(new_len);
2441
2442 // It's easier to assemble it backwards...
2443 int temp_end = new_len;
2444 for(i = matches-1; i >= 0; --i) {
2445 // Figure out where the trailing portion isthe trailing portion
2446 int len = src_len - match_pos[i] - from_len;
2447 int start = match_pos[i] + from_len;
2448 int dest = temp_end - len;
2449 memmove(buffer+dest, buffer+start, (len) * sizeof(TCHAR));
2450
2451 // copy the new item
2452 memcpy(buffer + dest - to_len, to, to_len * sizeof(TCHAR));
2453
2454 // Update the pointers
2455 temp_end = dest - to_len;
2456 src_len = match_pos[i];
2457
2458 }
2459 src_len = new_len;
2460 }
2461
2462 src.ReleaseBuffer(src_len);
2463 if (dynamic_allocate)
2464 delete [] match_pos; // lint !e673 Possibly inappropriate deallocation
2465
2466 return matches;
2467 }
2468
2469 /*
2470 The following 2 functions will do replacement on TCHAR* directly. They is cur rently unused.
2471 Feel free to put it back if you need to.
2472 */
2473 int ReplaceString (TCHAR *src, const TCHAR *from, const TCHAR *to, TCHAR **out, int *out_len) {
2474 ASSERT(out_len, (L""));
2475 ASSERT(out, (L""));
2476 ASSERT(to, (L""));
2477 ASSERT(from, (L""));
2478 ASSERT(src, (L""));
2479
2480 bool created_new_string;
2481 int matches = ReplaceStringMaybeInPlace (src, from, to, out, out_len, &created _new_string);
2482 if (!created_new_string) {
2483 *out = new TCHAR [(*out_len)+1];
2484 if (!(*out)) { *out = src; return 0; }
2485 _tcscpy_s(*out, *out_len + 1, src);
2486 }
2487
2488 return matches;
2489 }
2490
2491 int ReplaceStringMaybeInPlace (TCHAR *src, const TCHAR *from, const TCHAR *to, T CHAR **out, int *out_len, bool *created_new_string) {
2492 ASSERT (created_new_string, (L""));
2493 ASSERT (out_len, (L""));
2494 ASSERT (src, (L""));
2495 ASSERT (from, (L""));
2496 ASSERT (to, (L""));
2497 ASSERT (out, (L""));
2498 ASSERT (from[0] != '\0', (L""));
2499 int i = 0, j = 0;
2500 int matches = 0;
2501
2502 // don't compute the lengths unless we know we need to
2503 int from_len = -1, to_len = -1, src_len = -1;
2504
2505 *created_new_string = false;
2506 *out = src;
2507
2508 while (src[i]) {
2509 while (src[i] && src[i] != from[0]) { i++; }
2510 while (src[i] && src[i] == from[j]) {
2511 i++; j++;
2512 if (from[j] == '\0') { // found match
2513 if (from_len == -1) { // compute lengths if not known
2514 from_len = lstrlen (from);
2515 to_len = lstrlen (to);
2516 src_len = lstrlen (src);
2517 }
2518
2519 matches++;
2520
2521 if (to_len <= from_len) { // modify in place
2522 memcpy ((byte *)(src+i) - (sizeof (TCHAR) * from_len), (byte *)to, siz eof (TCHAR) * to_len);
2523 // if there are often a lot of replacements, it would be faster to cre ate a new string instead
2524 // of using memmove
2525 if (to_len < from_len) { memmove ((byte *)(src+i) - (sizeof (TCHAR) * (from_len - to_len)),
2526 (byte *)(src+i), (src_len - i + 1) * sizeof (TCHAR)); }
2527 i -= (from_len - to_len);
2528 }
2529
2530 break;
2531 }
2532 }
2533
2534 j = 0;
2535 }
2536
2537 *out_len = i;
2538
2539 // if the new string is longer we do another pass now that we know how long th e new string needs to be
2540 if (matches && to_len > from_len) {
2541 ASSERT (src_len == i, (L""));
2542 int new_len = src_len + matches * (to_len - from_len);
2543 *out = new TCHAR [new_len+1];
2544 if (!(*out)) { *out = src; *out_len = lstrlen (src); return 0; }
2545 *created_new_string = true;
2546 i = 0; j = 0; int k = 0;
2547
2548 while (src[i]) {
2549 while (src[i] && src[i] != from[0]) {
2550 (*out)[k++] = src[i++];
2551 }
2552 while (src[i] && src[i] == from[j]) {
2553 (*out)[k++] = src[i++];
2554 j++;
2555
2556 if (from[j] == '\0') { // found match
2557 k -= from_len;
2558 ASSERT (k >= 0, (L""));
2559 memcpy ((byte *)((*out)+k), (byte *)to, sizeof (TCHAR) * to_le n);
2560 k += to_len;
2561 break;
2562 }
2563 }
2564
2565 j = 0;
2566 }
2567
2568 (*out)[k] = '\0';
2569 ASSERT (k == new_len, (L""));
2570 *out_len = new_len;
2571 }
2572
2573 return matches;
2574 }
2575
2576 /****************************************************************************
2577 * wcstol, wcstoul(nptr,endptr,ibase) - Convert ascii string to long un/signed in t.
2578 *
2579 * modified from:
2580 *
2581 * wcstol.c - Contains C runtimes wcstol and wcstoul
2582 *
2583 * Copyright (c) Microsoft Corporation. All rights reserved.
2584 *
2585 * Purpose:
2586 * Convert an ascii string to a long 32-bit value. The base
2587 * used for the caculations is supplied by the caller. The base
2588 * must be in the range 0, 2-36. If a base of 0 is supplied, the
2589 * ascii string must be examined to determine the base of the
2590 * number:
2591 * (a) First char = '0', second char = 'x' or 'X',
2592 * use base 16.
2593 * (b) First char = '0', use base 8
2594 * (c) First char in range '1' - '9', use base 10.
2595 *
2596 * If the 'endptr' value is non-NULL, then wcstol/wcstoul places
2597 * a pointer to the terminating character in this value.
2598 * See ANSI standard for details
2599 *
2600 *Entry:
2601 * nptr == NEAR/FAR pointer to the start of string.
2602 * endptr == NEAR/FAR pointer to the end of the string.
2603 * ibase == integer base to use for the calculations.
2604 *
2605 * string format: [whitespace] [sign] [0] [x] [digits/letters]
2606 *
2607 *Exit:
2608 * Good return:
2609 * result
2610 *
2611 * Overflow return:
2612 * wcstol -- LONG_MAX or LONG_MIN
2613 * wcstoul -- ULONG_MAX
2614 * wcstol/wcstoul -- errno == ERANGE
2615 *
2616 * No digits or bad base return:
2617 * 0
2618 * endptr = nptr*
2619 *
2620 *Exceptions:
2621 * None.
2622 *
2623 *******************************************************************************/
2624
2625 // flag values */
2626 #define kFlUnsigned (1) // wcstoul called */
2627 #define kFlNeg (2) // negative sign found */
2628 #define kFlOverflow (4) // overflow occured */
2629 #define kFlReaddigit (8) // we've read at least one correct digit */
2630
2631 static unsigned long __cdecl wcstoxl (const wchar_t *nptr, wchar_t **endptr, int ibase, int flags) {
2632 ASSERT(nptr, (L""));
2633
2634 const wchar_t *p;
2635 wchar_t c;
2636 unsigned long number;
2637 unsigned digval;
2638 unsigned long maxval;
2639 // #ifdef _MT
2640 // pthreadlocinfo ptloci = _getptd()->ptlocinfo;
2641
2642 // if ( ptloci != __ptlocinfo )
2643 // ptloci = __updatetlocinfo();
2644 // #endif // _MT */
2645
2646 p = nptr; // p is our scanning pointer */
2647 number = 0; // start with zero */
2648
2649 c = *p++; // read char */
2650
2651 // #ifdef _MT
2652 // while ( __iswspace_mt(ptloci, c) )
2653 // #else // _MT */
2654 while (c == ' ')
2655 // while ( iswspace(c) )
2656 // #endif // _MT */
2657 c = *p++; // skip whitespace */
2658
2659 if (c == '-') {
2660 flags |= kFlNeg; // remember minus sign */
2661 c = *p++;
2662 }
2663 else if (c == '+')
2664 c = *p++; // skip sign */
2665
2666 if (ibase < 0 || ibase == 1 || ibase > 36) {
2667 // bad base! */
2668 if (endptr)
2669 // store beginning of string in endptr */
2670 *endptr = const_cast<wchar_t *>(nptr);
2671 return 0L; // return 0 */
2672 }
2673 else if (ibase == 0) {
2674 // determine base free-lance, based on first two chars of
2675 // string */
2676 if (String_CharToDigit(c) != 0)
2677 ibase = 10;
2678 else if (*p == L'x' || *p == L'X')
2679 ibase = 16;
2680 else
2681 ibase = 8;
2682 }
2683
2684 if (ibase == 16) {
2685 // we might have 0x in front of number; remove if there */
2686 if (String_CharToDigit(c) == 0 && (*p == L'x' || *p == L'X')) {
2687 ++p;
2688 c = *p++; // advance past prefix */
2689 }
2690 }
2691
2692 // if our number exceeds this, we will overflow on multiply */
2693 maxval = ULONG_MAX / ibase;
2694
2695 for (;;) { // exit in middle of loop */
2696
2697 // convert c to value */
2698 if ( (digval = String_CharToDigit(c)) != (unsigned) -1 )
2699 ;
2700 else if (c >= 'A' && c <= 'F') { digval = c - 'A' + 10; }
2701 else if (c >= 'a' && c <= 'f') { digval = c - 'a' + 10; }
2702 // else if ( __ascii_iswalpha(c))
2703 // digval = __ascii_towupper(c) - L'A' + 10;
2704 else
2705 break;
2706
2707 if (digval >= (unsigned)ibase)
2708 break; // exit loop if bad digit found */
2709
2710 // record the fact we have read one digit */
2711 flags |= kFlReaddigit;
2712
2713 // we now need to compute number = number * base + digval,
2714 // but we need to know if overflow occured. This requires
2715 // a tricky pre-check. */
2716
2717 if (number < maxval || (number == maxval &&
2718 (unsigned long)digval <= ULONG_MAX % ibase)) {
2719 // we won't overflow, go ahead and multiply */
2720 number = number * ibase + digval;
2721 }
2722 else {
2723 // we would have overflowed -- set the overflow flag */
2724 flags |= kFlOverflow;
2725 }
2726
2727 c = *p++; // read next digit */
2728 }
2729
2730 --p; // point to place that stopped scan */
2731
2732 if (!(flags & kFlReaddigit)) {
2733 // no number there; return 0 and point to beginning of string */
2734 if (endptr)
2735 // store beginning of string in endptr later on */
2736 p = nptr;
2737 number = 0L; // return 0 */
2738 }
2739 // lint -save -e648 -e650 Overflow in -LONG_MIN
2740 #pragma warning(push)
2741 // C4287 : unsigned/negative constant mismatch.
2742 // The offending expression is number > -LONG_MIN. -LONG_MIN overflows and
2743 // technically -LONG_MIN == LONG_MIN == 0x80000000. It should actually
2744 // result in a compiler warning, such as C4307: integral constant overflow.
2745 // Anyway, in the expression (number > -LONG_MIN) the right operand is converted
2746 // to unsigned long, so the expression is actually evaluated as
2747 // number > 0x80000000UL. The code is probably correct but subtle, to say the
2748 // least.
2749 #pragma warning(disable : 4287)
2750 else if ( (flags & kFlOverflow) ||
2751 ( !(flags & kFlUnsigned) &&
2752 ( ( (flags & kFlNeg) && (number > -LONG_MIN) ) ||
2753 ( !(flags & kFlNeg) && (number > LONG_MAX) ) ) ) )
2754 {
2755 // overflow or signed overflow occurred */
2756 // errno = ERANGE;
2757 if ( flags & kFlUnsigned )
2758 number = ULONG_MAX;
2759 else if ( flags & kFlNeg )
2760 // lint -e{648, 650} Overflow in -LONG_MIN
2761 number = (unsigned long)(-LONG_MIN);
2762 else
2763 number = LONG_MAX;
2764 }
2765 #pragma warning(pop)
2766 // lint -restore
2767
2768 if (endptr != NULL)
2769 // store pointer to char that stopped the scan */
2770 *endptr = const_cast<wchar_t *>(p);
2771
2772 if (flags & kFlNeg)
2773 // negate result if there was a neg sign */
2774 number = (unsigned long)(-(long)number);
2775
2776 return number; // done. */
2777 }
2778
2779 long __cdecl Wcstol (const wchar_t *nptr, wchar_t **endptr, int ibase) {
2780 ASSERT(endptr, (L""));
2781 ASSERT(nptr, (L""));
2782
2783 return (long) wcstoxl(nptr, endptr, ibase, 0);
2784 }
2785
2786 unsigned long __cdecl Wcstoul (const wchar_t *nptr, wchar_t **endptr, int ibase) {
2787 // endptr may be NULL
2788 ASSERT(nptr, (L""));
2789
2790 return wcstoxl(nptr, endptr, ibase, kFlUnsigned);
2791 }
2792
2793 // Functions on arrays of strings
2794
2795 // Returns true iff s is in the array strings (case-insensitive compare)
2796 bool String_MemberOf(const TCHAR* const* strings, const TCHAR* s) {
2797 ASSERT(s, (L""));
2798 // strings may be NULL
2799
2800 const int s_length = lstrlen(s);
2801 if (strings == NULL)
2802 return false;
2803 for (; *strings != NULL; strings++) {
2804 if (0 == String_StrNCmp(*strings, s, s_length, true)) {
2805 return true; // Found equal string
2806 }
2807 }
2808 return false;
2809 }
2810
2811 // Returns index of s in the array of strings (or -1 for missing) (case-insensit ive compare)
2812 int String_IndexOf(const TCHAR* const* strings, const TCHAR* s) {
2813 ASSERT(s, (L""));
2814 // strings may be NULL
2815
2816 const int s_length = lstrlen(s);
2817 if (strings == NULL)
2818 return -1;
2819 for (int i = 0; *strings != NULL; i++, strings++) {
2820 if (0 == String_StrNCmp(*strings, s, s_length, true)) {
2821 return i; // Found equal string
2822 }
2823 }
2824 return -1;
2825 }
2826
2827 // The internal format is a int64.
2828 time64 StringToTime(const CString & time) {
2829 return static_cast<time64>(String_StringToInt64(time));
2830 }
2831
2832 // See above comment from StringToTime.
2833 // Just show it as a INT64 for now
2834 // NOTE: this will truncating it to INT64, which may lop off some times in the f uture
2835 CString TimeToString(const time64 & time) {
2836 return String_Int64ToString(static_cast<int64>(time), 10);
2837 }
2838
2839 const TCHAR *FindStringASpaceStringB (const TCHAR *s, const TCHAR *a, const TCHA R *b) {
2840 ASSERT(s, (L""));
2841 ASSERT(a, (L""));
2842 ASSERT(b, (L""));
2843
2844 const TCHAR *search_from = s;
2845 const TCHAR *pos;
2846 while (*search_from && (pos = stristrW (search_from, a)) != NULL) {
2847 const TCHAR *start = pos;
2848 pos += lstrlen(a);
2849 search_from = pos;
2850 while (*pos == ' ' || *pos == '\t') pos++;
2851 if (!String_StrNCmp (pos, b, lstrlen(b), true)) return start;
2852 }
2853
2854 return 0;
2855 }
2856
2857 bool IsAlphaA (const char c) {
2858 return ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'));
2859 }
2860
2861 bool IsDigitA (const char c) {
2862 return (c >= '0' && c <= '9');
2863 }
2864
2865 void SafeStrCat (TCHAR *dest, const TCHAR *src, int dest_buffer_len) {
2866 _tcscat_s(dest, dest_buffer_len, src);
2867 }
2868
2869 // extracts next float in a string
2870 // skips any non-digit characters
2871 // return position after end of float
2872 const TCHAR *ExtractNextDouble (const TCHAR *s, double *f) {
2873 ASSERT (f, (L""));
2874 ASSERT (s, (L""));
2875
2876 CString num;
2877 while (*s && !String_IsDigit (*s)) s++;
2878 while (*s && (*s == '.' || String_IsDigit (*s))) { num += *s; s++; }
2879 ASSERT (num.GetLength(), (L""));
2880 *f = String_StringToDouble (num);
2881 return s;
2882 }
2883
2884 TCHAR *String_PathFindExtension(const TCHAR *path) {
2885 ASSERT(path, (L""));
2886
2887 // Documentation says PathFindExtension string must be of max length
2888 // MAX_PATH but a trusted tester hit the ASSERT and we don't really
2889 // need it here, so commented out. We can't address where it is
2890 // called because it's called from ATL code.
2891 // ASSERT(lstrlen(path)<=MAX_PATH, (L""));
2892
2893 // point to terminating NULL
2894 const TCHAR *ret = path + lstrlen(path);
2895 const TCHAR *pos = ret;
2896
2897 while (--pos >= path) {
2898 if (*pos == '.')
2899 return const_cast<TCHAR *>(pos);
2900 }
2901
2902 return const_cast<TCHAR *>(ret);
2903 }
2904
2905 char String_ToLowerCharAnsi(char c) {
2906 if (c >= 'A' && c <= 'Z') return (c + ('a' - 'A'));
2907 return c;
2908 }
2909
2910 int String_ToLowerChar(int c) {
2911 // If it's < 128, then convert is ourself, which is far cheaper than the syste m conversion
2912 if (c < 128)
2913 return String_ToLowerCharAnsi(static_cast<char>(c));
2914
2915 return Char_ToLower(static_cast<TCHAR>(c));
2916 }
2917
2918
2919 bool String_PathRemoveFileSpec(TCHAR *path) {
2920 ASSERT (path, (L""));
2921
2922 int len, pos;
2923 len = pos = lstrlen (path);
2924
2925 // You might think that the SHLWAPI API does not change "c:\windows" -> "c:\"
2926 // when c:\windows is a directory, but it does.
2927
2928 // If we don't want to match this weird API we can use the following to check
2929 // for directories:
2930
2931 // Check if we are already a directory.
2932 WIN32_FILE_ATTRIBUTE_DATA attrs;
2933 // Failure (if file does not exist) is OK.
2934 BOOL success = GetFileAttributesEx(path, GetFileExInfoStandard, &attrs);
2935 UTIL_LOG(L4, (_T("[String_PathRemoveFileSpec][path %s][success %d][dir %d]"),
2936 path,
2937 success,
2938 attrs.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY));
2939 if (success && (attrs.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) {
2940 // Remove trailing backslash, if any.
2941 if (path[pos-1] == '\\')
2942 path[pos-1] = '\0';
2943 return 1;
2944 }
2945
2946 // Find last backslash.
2947 while (pos && path[pos] != '\\') pos--;
2948 if (!pos && path[pos] != '\\') return 0;
2949
2950 ASSERT (pos < len, (L""));
2951
2952 // The documentation says it removes backslash but it doesn't for c:\.
2953 if (!pos || path[pos-1] == ':' || (pos == 1 && path[0] == '\\'))
2954 // Keep the backslash in this case.
2955 path[pos+1] = '\0';
2956 else
2957 path[pos] = '\0';
2958
2959 return 1;
2960 }
2961
2962 void String_EndWithChar(TCHAR *str, TCHAR c) {
2963 ASSERT (str, (L""));
2964 int len = lstrlen(str);
2965 if (len == 0 || str[len - 1] != c) {
2966 str[len] = c;
2967 str[len + 1] = 0;
2968 }
2969 }
2970
2971 bool StartsWithBOM(const TCHAR* string) {
2972 ASSERT(string, (L""));
2973 wchar_t c = string[0];
2974 if (c == 0xFFFE || c == 0xFEFF)
2975 return true;
2976 else
2977 return false;
2978 }
2979
2980 const TCHAR* StringAfterBOM(const TCHAR* string) {
2981 ASSERT(string, (L""));
2982 return &string[StartsWithBOM(string) ? 1 : 0];
2983 }
2984
2985 bool String_StringToDecimalIntChecked(const TCHAR* str, int* value) {
2986 ASSERT1(str);
2987 ASSERT1(value);
2988
2989 if (_set_errno(0)) {
2990 return false;
2991 }
2992
2993 TCHAR* end_ptr = NULL;
2994 *value = _tcstol(str, &end_ptr, 10);
2995 ASSERT1(end_ptr);
2996
2997 if (errno) {
2998 ASSERT1(ERANGE == errno);
2999 // Overflow or underflow.
3000 return false;
3001 } else if (*value == 0) {
3002 // The value returned could be an error code. tcsltol returns
3003 // zero when it cannot convert the string. However we need to
3004 // distinguish a real zero. Thus check to see if end_ptr is not the start
3005 // of the string (str is not an empty string) and is pointing to a '\0'.
3006 // If not, we have an error.
3007 if ((str == end_ptr) || (*end_ptr != '\0')) {
3008 return false;
3009 }
3010 } else if (*end_ptr != '\0') {
3011 // The end_ptr is pointing at a character that is
3012 // not the end of the string. Only part of the string could be converted.
3013 return false;
3014 }
3015
3016 return true;
3017 }
3018
3019 bool CLSIDToCString(const GUID& guid, CString* str) {
3020 ASSERT(str, (L""));
3021
3022 LPOLESTR string_guid = NULL;
3023 if (::StringFromCLSID(guid, &string_guid) != S_OK) {
3024 return false;
3025 }
3026 *str = string_guid;
3027 ::CoTaskMemFree(string_guid);
3028
3029 return true;
3030 }
3031
3032 HRESULT String_StringToBool(const TCHAR* str, bool* value) {
3033 ASSERT1(str);
3034 ASSERT1(value);
3035
3036 // This method now performs a case-insentitive
3037 // culture aware compare. We should however be ok as we are only comparing
3038 // latin characters.
3039 if (_tcsicmp(kFalse, str) == 0) {
3040 *value = false;
3041 } else if (_tcsicmp(kTrue, str) == 0) {
3042 *value = true;
3043 } else {
3044 // we found another string. should error out.
3045 return E_FAIL;
3046 }
3047 return S_OK;
3048 }
3049
3050 HRESULT String_BoolToString(bool value, CString* string) {
3051 ASSERT1(string);
3052 *string = value ? kTrue : kFalse;
3053 return S_OK;
3054 }
3055
3056 CString String_ReplaceIgnoreCase(const CString& string,
3057 const CString& token,
3058 const CString& replacement) {
3059 int token_length = token.GetLength();
3060 if (!token_length) {
3061 return string;
3062 }
3063
3064 CString string_lowercase(string);
3065 CString token_lowercase(token);
3066 string_lowercase.MakeLower();
3067 token_lowercase.MakeLower();
3068
3069 CString output(string);
3070 int replacement_length = replacement.GetLength();
3071
3072 int index = 0;
3073 int output_index = 0;
3074
3075 for (int new_index = 0;
3076 (new_index = string_lowercase.Find(token_lowercase, index)) != -1;
3077 index = new_index + token_length) {
3078 output_index += new_index - index;
3079 output.Delete(output_index, token_length);
3080 output.Insert(output_index, replacement);
3081 output_index += replacement_length;
3082 }
3083
3084 return output;
3085 }
3086
3087 // Escape and unescape strings (shlwapi-based implementation).
3088 // The intended usage for these APIs is escaping strings to make up
3089 // URLs, for example building query strings.
3090 //
3091 // Pass false to the flag segment_only to escape the url. This will not
3092 // cause the conversion of the # (%23), ? (%3F), and / (%2F) characters.
3093
3094 // Characters that must be encoded include any characters that have no
3095 // corresponding graphic character in the US-ASCII coded character
3096 // set (hexadecimal 80-FF, which are not used in the US-ASCII coded character
3097 // set, and hexadecimal 00-1F and 7F, which are control characters),
3098 // blank spaces, "%" (which is used to encode other characters),
3099 // and unsafe characters (<, >, ", #, {, }, |, \, ^, ~, [, ], and ').
3100 //
3101 // The input and output strings can't be longer than INTERNET_MAX_URL_LENGTH
3102
3103 HRESULT StringEscape(const CString& str_in,
3104 bool segment_only,
3105 CString* str_out) {
3106 ASSERT1(str_out);
3107 ASSERT1(str_in.GetLength() < INTERNET_MAX_URL_LENGTH);
3108
3109 DWORD buf_len = INTERNET_MAX_URL_LENGTH + 1;
3110 HRESULT hr = ::UrlEscape(str_in, str_out->GetBufferSetLength(buf_len), &buf_le n,
3111 segment_only ? URL_ESCAPE_PERCENT | URL_ESCAPE_SEGMENT_ONLY : URL_ESCAPE_PER CENT);
3112 if (SUCCEEDED(hr)) {
3113 str_out->ReleaseBuffer();
3114 ASSERT1(buf_len <= INTERNET_MAX_URL_LENGTH);
3115 }
3116 return hr;
3117 }
3118
3119 HRESULT StringUnescape(const CString& str_in, CString* str_out) {
3120 ASSERT1(str_out);
3121 ASSERT1(str_in.GetLength() < INTERNET_MAX_URL_LENGTH);
3122
3123 DWORD buf_len = INTERNET_MAX_URL_LENGTH + 1;
3124 HRESULT hr = ::UrlUnescape(const_cast<TCHAR*>(str_in.GetString()),
3125 str_out->GetBufferSetLength(buf_len), &buf_len, 0);
3126 if (SUCCEEDED(hr)) {
3127 str_out->ReleaseBuffer(buf_len + 1);
3128 ASSERT1(buf_len <= INTERNET_MAX_URL_LENGTH);
3129 }
3130 return hr;
3131 }
3132
3133 bool String_StringToTristate(const TCHAR* str, Tristate* value) {
3134 ASSERT1(str);
3135 ASSERT1(value);
3136
3137 int numerical_value = 0;
3138 if (!String_StringToDecimalIntChecked(str, &numerical_value)) {
3139 return false;
3140 }
3141
3142 switch (numerical_value) {
3143 case 0:
3144 *value = TRISTATE_FALSE;
3145 break;
3146 case 1:
3147 *value = TRISTATE_TRUE;
3148 break;
3149 case 2:
3150 *value = TRISTATE_NONE;
3151 break;
3152 default:
3153 return false;
3154 }
3155
3156 return true;
3157 }
3158
3159 // Extracts the name and value from a string that contains a name/value pair.
3160 bool ParseNameValuePair(const CString& token,
3161 TCHAR separator,
3162 CString* name,
3163 CString* value) {
3164 ASSERT1(name);
3165 ASSERT1(value);
3166
3167 int separator_index = token.Find(separator);
3168 if ((separator_index == -1) || // Not a name-value pair.
3169 (separator_index == 0) || // No name was supplied.
3170 (separator_index == (token.GetLength() - 1))) { // No value was supplied.
3171 return false;
3172 }
3173
3174 *name = token.Left(separator_index);
3175 *value = token.Right(token.GetLength() - separator_index - 1);
3176
3177 ASSERT1(token.GetLength() == name->GetLength() + value->GetLength() + 1);
3178
3179 // It's not possible for the name to contain the separator.
3180 ASSERT1(-1 == name->Find(separator));
3181 if (-1 != value->Find(separator)) {
3182 // The value contains the separator.
3183 return false;
3184 }
3185
3186 return true;
3187 }
3188
3189 bool SplitCommandLineInPlace(TCHAR *command_line,
3190 TCHAR **first_argument_parameter,
3191 TCHAR **remaining_arguments_parameter) {
3192 if (!command_line ||
3193 !first_argument_parameter ||
3194 !remaining_arguments_parameter) {
3195 return false;
3196 }
3197
3198 TCHAR end_char;
3199 TCHAR *&first_argument = *first_argument_parameter;
3200 TCHAR *&remaining_arguments = *remaining_arguments_parameter;
3201 if (_T('\"') == *command_line) {
3202 end_char = _T('\"');
3203 first_argument = remaining_arguments = command_line + 1;
3204 } else {
3205 end_char = _T(' ');
3206 first_argument = remaining_arguments = command_line;
3207 }
3208 // Search for the end of the first argument
3209 while (end_char != *remaining_arguments && '\0' != *remaining_arguments) {
3210 ++remaining_arguments;
3211 }
3212 if (end_char == *remaining_arguments) {
3213 *remaining_arguments = '\0';
3214 do {
3215 // Skip the spaces between the first argument and the remaining arguments.
3216 ++remaining_arguments;
3217 } while (_T(' ') == *remaining_arguments);
3218 }
3219 return true;
3220 }
3221
3222 bool ContainsOnlyAsciiChars(const CString& str) {
3223 for (int i = 0; i < str.GetLength(); ++i) {
3224 if (str[i] > 0x7F) {
3225 return false;
3226 }
3227 }
3228 return true;
3229 }
3230 CString BytesToHex(const uint8* bytes, size_t num_bytes) {
3231 CString result;
3232 if (bytes) {
3233 result.Preallocate(num_bytes * sizeof(TCHAR));
3234 static const TCHAR* const kHexChars = _T("0123456789abcdef");
3235 for (size_t i = 0; i != num_bytes; ++i) {
3236 result.AppendChar(kHexChars[(bytes[i] >> 4)]);
3237 result.AppendChar(kHexChars[(bytes[i] & 0xf)]);
3238 }
3239 }
3240 return result;
3241 }
3242
3243 CString BytesToHex(const std::vector<uint8>& bytes) {
3244 CString result;
3245 if (!bytes.empty()) {
3246 result.SetString(BytesToHex(&bytes.front(), bytes.size()));
3247 }
3248 return result;
3249 }
3250
3251 void JoinStrings(const std::vector<CString>& components,
3252 const TCHAR* delim,
3253 CString* result) {
3254 ASSERT1(result);
3255 result->Empty();
3256
3257 // Compute length so we can reserve memory.
3258 size_t length = 0;
3259 size_t delim_length = delim ? _tcslen(delim) : 0;
3260 for (size_t i = 0; i != components.size(); ++i) {
3261 if (i != 0) {
3262 length += delim_length;
3263 }
3264 length += components[i].GetLength();
3265 }
3266
3267 result->Preallocate(length);
3268
3269 for (size_t i = 0; i != components.size(); ++i) {
3270 if (i != 0 && delim) {
3271 result->Append(delim, delim_length);
3272 }
3273 result->Append(components[i]);
3274 }
3275 }
3276
3277 void JoinStringsInArray(const TCHAR* components[],
3278 int num_components,
3279 const TCHAR* delim,
3280 CString* result) {
3281 ASSERT1(result);
3282 result->Empty();
3283
3284 for (int i = 0; i != num_components; ++i) {
3285 if (i != 0 && delim) {
3286 result->Append(delim);
3287 }
3288 if (components[i]) {
3289 result->Append(components[i]);
3290 }
3291 }
3292 }
3293
3294 CString FormatResourceMessage(uint32 resource_id, ...) {
3295 CString format;
3296 const bool is_loaded = !!format.LoadString(resource_id);
3297
3298 if (!is_loaded) {
3299 return CString();
3300 }
3301
3302 va_list arg_list;
3303 va_start(arg_list, resource_id);
3304
3305 CString formatted;
3306 formatted.FormatMessageV(format, &arg_list);
3307
3308 va_end(arg_list);
3309
3310 return formatted;
3311 }
3312
3313 CString FormatErrorCode(DWORD error_code) {
3314 CString error_code_string;
3315 if (FAILED(error_code)) {
3316 error_code_string.Format(_T("0x%08x"), error_code);
3317 } else {
3318 error_code_string.Format(_T("%u"), error_code);
3319 }
3320 return error_code_string;
3321 }
3322
3323 HRESULT WideStringToUtf8UrlEncodedString(const CString& str, CString* out) {
3324 ASSERT1(out);
3325
3326 out->Empty();
3327 if (str.IsEmpty()) {
3328 return S_OK;
3329 }
3330
3331 // Utf8 encode the Utf16 string first. Next urlencode it.
3332 CStringA utf8str = WideToUtf8(str);
3333 ASSERT1(!utf8str.IsEmpty());
3334 DWORD buf_len = INTERNET_MAX_URL_LENGTH;
3335 CStringA escaped_utf8_name;
3336 HRESULT hr = ::UrlEscapeA(utf8str,
3337 CStrBufA(escaped_utf8_name, buf_len),
3338 &buf_len,
3339 0);
3340 ASSERT1(buf_len <= INTERNET_MAX_URL_LENGTH);
3341 ASSERT1(escaped_utf8_name.GetLength() == static_cast<int>(buf_len));
3342 if (FAILED(hr)) {
3343 UTIL_LOG(LE, (_T("[UrlEscapeA failed][0x%08x]"), hr));
3344 return hr;
3345 }
3346
3347 *out = CString(escaped_utf8_name);
3348 return S_OK;
3349 }
3350
3351 HRESULT Utf8UrlEncodedStringToWideString(const CString& str, CString* out) {
3352 ASSERT1(out);
3353
3354 out->Empty();
3355 if (str.IsEmpty()) {
3356 return S_OK;
3357 }
3358
3359 // The value is a utf8 encoded url escaped string that is stored as a
3360 // unicode string. Because of this, it should contain only ascii chars.
3361 if (!ContainsOnlyAsciiChars(str)) {
3362 UTIL_LOG(LE, (_T("[String contains non ascii chars]")));
3363 return E_INVALIDARG;
3364 }
3365
3366 CStringA escaped_utf8_val = WideToAnsiDirect(str);
3367 DWORD buf_len = INTERNET_MAX_URL_LENGTH;
3368 CStringA unescaped_val;
3369 HRESULT hr = ::UrlUnescapeA(const_cast<char*>(escaped_utf8_val.GetString()),
3370 CStrBufA(unescaped_val, buf_len),
3371 &buf_len,
3372 0);
3373 ASSERT1(unescaped_val.GetLength() == static_cast<int>(buf_len));
3374 if (FAILED(hr)) {
3375 UTIL_LOG(LE, (_T("[UrlUnescapeA failed][0x%08x]"), hr));
3376 return hr;
3377 }
3378 ASSERT1(buf_len == static_cast<DWORD>(unescaped_val.GetLength()));
3379 ASSERT1(buf_len <= INTERNET_MAX_URL_LENGTH);
3380 CString app_name = Utf8ToWideChar(unescaped_val,
3381 unescaped_val.GetLength());
3382 if (app_name.IsEmpty()) {
3383 return E_INVALIDARG;
3384 }
3385
3386 *out = app_name;
3387 return S_OK;
3388 }
3389
3390 } // namespace omaha
3391
OLDNEW
« no previous file with comments | « base/string.h ('k') | base/string_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698