OLD | NEW |
| (Empty) |
1 // Copyright 2003-2009 Google Inc. | |
2 // | |
3 // Licensed under the Apache License, Version 2.0 (the "License"); | |
4 // you may not use this file except in compliance with the License. | |
5 // You may obtain a copy of the License at | |
6 // | |
7 // http://www.apache.org/licenses/LICENSE-2.0 | |
8 // | |
9 // Unless required by applicable law or agreed to in writing, software | |
10 // distributed under the License is distributed on an "AS IS" BASIS, | |
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
12 // See the License for the specific language governing permissions and | |
13 // limitations under the License. | |
14 // ======================================================================== | |
15 | |
16 #include "omaha/base/string.h" | |
17 | |
18 #include <wininet.h> // For INTERNET_MAX_URL_LENGTH. | |
19 #include <algorithm> | |
20 #include <cstdlib> | |
21 #include "base/scoped_ptr.h" | |
22 #include "omaha/base/commontypes.h" | |
23 #include "omaha/base/debug.h" | |
24 #include "omaha/base/localization.h" | |
25 #include "omaha/base/logging.h" | |
26 | |
27 namespace omaha { | |
28 | |
29 namespace { | |
30 // Testing shows that only the following ASCII characters are | |
31 // considered spaces by GetStringTypeA: 9-13, 32, 160. | |
32 // Rather than call GetStringTypeA with no locale, as we used to, | |
33 // we look up the values directly in a precomputed array. | |
34 | |
35 SELECTANY byte spaces[256] = { | |
36 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, // 0-9 | |
37 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, // 10-19 | |
38 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 20-29 | |
39 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, // 30-39 | |
40 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 40-49 | |
41 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 50-59 | |
42 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 60-69 | |
43 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 70-79 | |
44 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80-89 | |
45 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 90-99 | |
46 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 100-109 | |
47 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 110-119 | |
48 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 120-129 | |
49 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 130-139 | |
50 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 140-149 | |
51 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 150-159 | |
52 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 160-169 | |
53 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 170-179 | |
54 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 180-189 | |
55 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 190-199 | |
56 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 200-209 | |
57 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 210-219 | |
58 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 220-229 | |
59 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 230-239 | |
60 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 240-249 | |
61 0, 0, 0, 0, 0, 1, // 250-255 | |
62 }; | |
63 } // namespace | |
64 | |
65 const TCHAR* const kFalse = _T("false"); | |
66 const TCHAR* const kTrue = _T("true"); | |
67 | |
68 bool IsSpaceW(WCHAR c) { | |
69 // GetStringTypeW considers these characters to be spaces: | |
70 // 9-13, 32, 133, 160, 5760, 8192-8203, 8232, 8233, 12288 | |
71 if (c < 256) | |
72 return (c == 133 || IsSpaceA((char) (c & 0xff))); | |
73 | |
74 return (c >= 8192 && c <= 8203) || c == 8232 || | |
75 c == 8233 || c == 12288; | |
76 } | |
77 | |
78 bool IsSpaceA(char c) { | |
79 return spaces[static_cast<unsigned char>(c)] == 1; | |
80 } | |
81 | |
82 int TrimCString(CString &s) { | |
83 int len = Trim(s.GetBuffer()); | |
84 s.ReleaseBufferSetLength(len); | |
85 return len; | |
86 } | |
87 | |
88 void MakeLowerCString(CString & s) { | |
89 int len = s.GetLength(); | |
90 String_FastToLower(s.GetBuffer()); | |
91 s.ReleaseBufferSetLength(len); | |
92 } | |
93 | |
94 int Trim(TCHAR *s) { | |
95 ASSERT(s, (L"")); | |
96 | |
97 // First find end of leading spaces | |
98 TCHAR *start = s; | |
99 while (*start) { | |
100 if (!IsSpace(*start)) | |
101 break; | |
102 ++start; | |
103 } | |
104 | |
105 // Now search for the end, remembering the start of the last spaces | |
106 TCHAR *end = start; | |
107 TCHAR *last_space = end; | |
108 while (*end) { | |
109 if (!IsSpace(*end)) | |
110 last_space = end + 1; | |
111 ++end; | |
112 } | |
113 | |
114 // Copy the part we want | |
115 int len = last_space - start; | |
116 // lint -e{802} Conceivably passing a NULL pointer | |
117 memmove(s, start, len * sizeof(TCHAR)); | |
118 | |
119 // 0 terminate | |
120 s[len] = 0; | |
121 | |
122 return len; | |
123 } | |
124 | |
125 void TrimString(CString& s, const TCHAR* delimiters) { | |
126 s = s.Trim(delimiters); | |
127 } | |
128 | |
129 // Strip the first token from the front of argument s. A token is a | |
130 // series of consecutive non-blank characters - unless the first | |
131 // character is a double-quote ("), in that case the token is the full | |
132 // quoted string | |
133 CString StripFirstQuotedToken(const CString& s) { | |
134 const int npos = -1; | |
135 | |
136 // Make a writeable copy | |
137 CString str(s); | |
138 | |
139 // Trim any surrounding blanks (and tabs, for the heck of it) | |
140 TrimString(str, L" \t"); | |
141 | |
142 // Too short to have a second token | |
143 if (str.GetLength() <= 1) | |
144 return L""; | |
145 | |
146 // What kind of token are we stripping? | |
147 if (str[0] == L'\"') { | |
148 // Remove leading quoting string | |
149 int i = str.Find(L"\"", 1); | |
150 if (i != npos) | |
151 i++; | |
152 return str.Mid(i); | |
153 } else { | |
154 // Remove leading token | |
155 int i = str.FindOneOf(L" \t"); | |
156 if (i != npos) | |
157 i++; | |
158 return str.Mid(i); | |
159 } | |
160 } | |
161 | |
162 // A block of text to separate lines, and back | |
163 void TextToLines(const CString& text, const TCHAR* delimiter, std::vector<CStrin
g>* lines) { | |
164 ASSERT(delimiter, (L"")); | |
165 ASSERT(lines, (L"")); | |
166 | |
167 size_t delimiter_len = ::lstrlen(delimiter); | |
168 int b = 0; | |
169 int e = 0; | |
170 | |
171 for (b = 0; e != -1 && b < text.GetLength(); b = e + delimiter_len) { | |
172 e = text.Find(delimiter, b); | |
173 if (e != -1) { | |
174 ASSERT1(e - b > 0); | |
175 lines->push_back(text.Mid(b, e - b)); | |
176 } else { | |
177 lines->push_back(text.Mid(b)); | |
178 } | |
179 } | |
180 } | |
181 | |
182 void LinesToText(const std::vector<CString>& lines, const TCHAR* delimiter, CStr
ing* text) { | |
183 ASSERT(delimiter, (L"")); | |
184 ASSERT(text, (L"")); | |
185 | |
186 size_t delimiter_len = ::lstrlen(delimiter); | |
187 size_t len = 0; | |
188 for (size_t i = 0; i < lines.size(); ++i) { | |
189 len += lines[i].GetLength() + delimiter_len; | |
190 } | |
191 text->Empty(); | |
192 text->Preallocate(len); | |
193 for (std::vector<CString>::size_type i = 0; i < lines.size(); ++i) { | |
194 text->Append(lines[i]); | |
195 if (delimiter_len) { | |
196 text->Append(delimiter); | |
197 } | |
198 } | |
199 } | |
200 | |
201 int CleanupWhitespaceCString(CString &s) { | |
202 int len = CleanupWhitespace(s.GetBuffer()); | |
203 s.ReleaseBufferSetLength(len); | |
204 return len; | |
205 } | |
206 | |
207 int CleanupWhitespace(TCHAR *str) { | |
208 ASSERT(str, (L"")); | |
209 | |
210 TCHAR *src = str; | |
211 TCHAR *dest = str; | |
212 int spaces = 0; | |
213 bool at_start = true; | |
214 while (true) { | |
215 // At end of string? | |
216 TCHAR c = *src; | |
217 if (0 == c) | |
218 break; | |
219 | |
220 // Look for whitespace; copy it over if not whitespace | |
221 if (IsSpace(c)) { | |
222 ++spaces; | |
223 } | |
224 else { | |
225 *dest++ = c; | |
226 at_start = false; | |
227 spaces = 0; | |
228 } | |
229 | |
230 // Write only first consecutive space (but skip space at start) | |
231 if (1 == spaces && !at_start) | |
232 *dest++ = ' '; | |
233 | |
234 ++src; | |
235 } | |
236 | |
237 // Remove trailing space, if any | |
238 if (dest > str && *(dest - 1) == L' ') | |
239 --dest; | |
240 | |
241 // 0-terminate | |
242 *dest = 0; | |
243 | |
244 return dest - str; | |
245 } | |
246 | |
247 // Take 1 single hexadecimal "digit" (as a character) and return its decimal val
ue | |
248 // Returns -1 if given invalid hex digit | |
249 int HexDigitToDec(const TCHAR digit) { | |
250 if (digit >= L'A' && digit <= L'F') | |
251 return 10 + (digit - L'A'); | |
252 else if (digit >= L'a' && digit <= L'f') | |
253 return 10 + (digit - L'a'); | |
254 else if (digit >= L'0' && digit <= L'9') | |
255 return (digit - L'0'); | |
256 else | |
257 return -1; | |
258 } | |
259 | |
260 // Convert the 2 hex chars at positions <pos> and <pos>+1 in <s> to a char (<cha
r_out>) | |
261 // Note: scanf was giving me troubles, so here's the manual version | |
262 // Extracted char gets written to <char_out>, which must be allocated by | |
263 // the caller; return true on success or false if parameters are incorrect | |
264 // or string does not have 2 hex digits at the specified position | |
265 // NOTE: <char_out> is NOT a string, just a pointer to a char for the result | |
266 bool ExtractChar(const CString & s, int pos, unsigned char * char_out) { | |
267 // char_out may be NULL | |
268 | |
269 if (s.GetLength() < pos + 1) { | |
270 return false; | |
271 } | |
272 | |
273 if (pos < 0 || NULL == char_out) { | |
274 ASSERT(0, (_T("invalid params: pos<0 or char_out is NULL"))); | |
275 return false; | |
276 } | |
277 | |
278 TCHAR c1 = s.GetAt(pos); | |
279 TCHAR c2 = s.GetAt(pos+1); | |
280 | |
281 int p1 = HexDigitToDec(c1); | |
282 int p2 = HexDigitToDec(c2); | |
283 | |
284 if (p1 == -1 || p2 == -1) { | |
285 return false; | |
286 } | |
287 | |
288 *char_out = (unsigned char)(p1 * 16 + p2); | |
289 return true; | |
290 } | |
291 | |
292 WCHAR *ToWide (const char *s, int len) { | |
293 ASSERT (s, (L"")); | |
294 WCHAR *w = new WCHAR [len+1]; if (!w) { return NULL; } | |
295 // int rc = MultiByteToWideChar (CP_ACP, 0, s.GetString(), (int)s.GetLength(
)+1, w, s.GetLength()+1); | |
296 // TODO(omaha): why would it ever be the case that rc > len? | |
297 int rc = MultiByteToWideChar (CP_ACP, 0, s, len, w, len); | |
298 if (rc > len) { delete [] w; return NULL; } | |
299 // ASSERT (rc <= len, (L"")); | |
300 w[rc]=L'\0'; | |
301 return w; | |
302 } | |
303 | |
304 const byte *BufferContains (const byte *buf, uint32 buf_len, const byte *data, u
int32 data_len) { | |
305 ASSERT(data, (L"")); | |
306 ASSERT(buf, (L"")); | |
307 | |
308 for (uint32 i = 0; i < buf_len; i++) { | |
309 uint32 j = i; | |
310 uint32 k = 0; | |
311 uint32 len = 0; | |
312 while (j < buf_len && k < data_len && buf[j++] == data[k++]) { len++; } | |
313 if (len == data_len) { return buf + i; } | |
314 } | |
315 return 0; | |
316 } | |
317 | |
318 // Converting the Ansi Multibyte String into unicode string. The multibyte | |
319 // string is encoded using the specified codepage. | |
320 // The code is pretty much like the U2W function, except the codepage can be | |
321 // any valid windows CP. | |
322 BOOL AnsiToWideString(const char *from, int length, UINT codepage, CString *to)
{ | |
323 ASSERT(from, (L"")); | |
324 ASSERT(to, (L"")); | |
325 ASSERT1(length >= -1); | |
326 // Figure out how long the string is | |
327 int req_chars = MultiByteToWideChar(codepage, 0, from, length, NULL, 0); | |
328 | |
329 if (req_chars <= 0) { | |
330 UTIL_LOG(LEVEL_WARNING, (_T("MultiByteToWideChar Failed "))); | |
331 *to = AnsiToWideString(from, length); | |
332 return FALSE; | |
333 } | |
334 | |
335 TCHAR *buffer = to->GetBufferSetLength(req_chars); | |
336 int conv_chars = MultiByteToWideChar(codepage, 0, from, length, buffer, req_ch
ars); | |
337 if (conv_chars == 0) { | |
338 UTIL_LOG(LEVEL_WARNING, (_T("MultiByteToWideChar Failed "))); | |
339 to->ReleaseBuffer(0); | |
340 *to = AnsiToWideString(from, length); | |
341 return FALSE; | |
342 } | |
343 | |
344 // Something truly horrible happened. | |
345 ASSERT (req_chars == conv_chars, (L"MBToWide returned unexpected value: GetLas
tError()=%d",GetLastError())); | |
346 // If length was inferred, conv_chars includes the null terminator. | |
347 // Adjust the length here to remove null termination, | |
348 // because we use the length-qualified CString constructor, | |
349 // which automatically adds null termination given an unterminated array. | |
350 if (-1 == length) { --conv_chars; } | |
351 to->ReleaseBuffer(conv_chars); | |
352 return TRUE; | |
353 } | |
354 | |
355 // CStringW(const char* from) did not cast all character properly | |
356 // so we write our own. | |
357 CString AnsiToWideString(const char *from, int length) { | |
358 ASSERT(from, (L"")); | |
359 ASSERT1(length >= -1); | |
360 if (length < 0) | |
361 length = strlen(from); | |
362 CString to; | |
363 TCHAR *buffer = to.GetBufferSetLength(length); | |
364 for (int i = 0; i < length; ++i) | |
365 buffer[i] = static_cast<UINT8>(from[i]); | |
366 to.ReleaseBuffer(length); | |
367 return to; | |
368 } | |
369 | |
370 | |
371 // Transform a unicode string into UTF8, as represented in an ASCII string | |
372 CStringA WideToUtf8(const CString& w) { | |
373 // Add a cutoff. If it's all ascii, convert it directly | |
374 const TCHAR* input = static_cast<const TCHAR*>(w.GetString()); | |
375 int input_len = w.GetLength(), i; | |
376 for (i = 0; i < input_len; ++i) { | |
377 if (input[i] > 127) { | |
378 break; | |
379 } | |
380 } | |
381 | |
382 // If we made it to the end without breaking, then it's all ANSI, so do a quic
k convert | |
383 if (i == input_len) { | |
384 return WideToAnsiDirect(w); | |
385 } | |
386 | |
387 // Figure out how long the string is | |
388 int req_bytes = ::WideCharToMultiByte(CP_UTF8, 0, w, -1, NULL, 0, NULL, NULL); | |
389 | |
390 scoped_array<char> utf8_buffer(new char[req_bytes]); | |
391 | |
392 int conv_bytes = ::WideCharToMultiByte(CP_UTF8, 0, w, -1, utf8_buffer.get(), r
eq_bytes, NULL, NULL); | |
393 ASSERT1(req_bytes == conv_bytes); | |
394 | |
395 // conv_bytes includes the null terminator, when we read this in, don't read t
he terminator | |
396 CStringA out(utf8_buffer.get(), conv_bytes - 1); | |
397 | |
398 return out; | |
399 } | |
400 | |
401 CString Utf8ToWideChar(const char* utf8, uint32 num_bytes) { | |
402 ASSERT1(utf8); | |
403 if (num_bytes == 0) { | |
404 return CString(); | |
405 } | |
406 | |
407 uint32 number_of_wide_chars = ::MultiByteToWideChar(CP_UTF8, 0, utf8, num_byte
s, NULL, 0); | |
408 number_of_wide_chars += 1; // make room for NULL terminator | |
409 | |
410 CString ret_string; | |
411 TCHAR* buffer = ret_string.GetBuffer(number_of_wide_chars); | |
412 DWORD number_of_characters_copied = ::MultiByteToWideChar(CP_UTF8, 0, utf8, nu
m_bytes, buffer, number_of_wide_chars); | |
413 ASSERT1(number_of_characters_copied == number_of_wide_chars - 1); | |
414 buffer[number_of_wide_chars - 1] = _T('\0'); // ensure there is a NULL termin
ator | |
415 ret_string.ReleaseBuffer(); | |
416 | |
417 // Strip the byte order marker if there is one in the document. | |
418 if (ret_string[0] == kUnicodeBom) { | |
419 ret_string = ret_string.Right(ret_string.GetLength() - 1); | |
420 } | |
421 | |
422 if (number_of_characters_copied > 0) { | |
423 return ret_string; | |
424 } | |
425 | |
426 // Failure case | |
427 return CString(); | |
428 } | |
429 | |
430 CString Utf8BufferToWideChar(const std::vector<uint8>& buffer) { | |
431 CString result; | |
432 if (!buffer.empty()) { | |
433 result = Utf8ToWideChar( | |
434 reinterpret_cast<const char*>(&buffer.front()), buffer.size()); | |
435 } | |
436 return result; | |
437 } | |
438 | |
439 CString AbbreviateString (const CString & title, int32 max_len) { | |
440 ASSERT (max_len, (L"")); | |
441 CString s(title); | |
442 TrimCString(s); // remove whitespace at start/end | |
443 if (s.GetLength() > max_len) { | |
444 s = s.Left (max_len - 2); | |
445 CString orig(s); | |
446 // remove partial words | |
447 while (s.GetLength() > 1 && !IsSpace(s[s.GetLength()-1])) { s = s.Left (
s.GetLength() - 1); } | |
448 // but not if it would make the string very short | |
449 if (s.GetLength() < max_len / 2) { s = orig; } | |
450 s += _T(".."); | |
451 } | |
452 | |
453 return s; | |
454 } | |
455 | |
456 CString GetAbsoluteUri(const CString& uri) { | |
457 int i = String_FindString(uri, _T("://")); | |
458 if (i==-1) return uri; | |
459 | |
460 // add trailing / if none exists | |
461 int j = String_FindChar(uri, L'/',i+3); | |
462 if (j==-1) return (uri+NOTRANSL(_T("/"))); | |
463 | |
464 // remove duplicate trailing slashes | |
465 int len = uri.GetLength(); | |
466 if (len > 1 && uri.GetAt(len-1) == '/' && uri.GetAt(len-2) == '/') { | |
467 CString new_uri(uri); | |
468 int new_len = new_uri.GetLength(); | |
469 while (new_len > 1 && new_uri.GetAt(new_len-1) == '/' && new_uri.GetAt(new_l
en-2) == '/') { | |
470 new_len--; | |
471 new_uri = new_uri.Left(new_len); | |
472 } | |
473 return new_uri; | |
474 } | |
475 else return uri; | |
476 } | |
477 | |
478 // requires that input have a PROTOCOL (http://) for proper behavior | |
479 // items with the "file" protocol are returned as is (what is the hostname in th
at case? C: ? doesn't make sense) | |
480 // TODO(omaha): loosen requirement | |
481 // includes http://, e.g. http://www.google.com/ | |
482 CString GetUriHostName(const CString& uri, bool strip_leading) { | |
483 if (String_StartsWith(uri,NOTRANSL(_T("file:")),true)) return uri; | |
484 | |
485 // correct any "errors" | |
486 CString s(GetAbsoluteUri(uri)); | |
487 | |
488 // Strip the leading "www." | |
489 if (strip_leading) | |
490 { | |
491 int index_www = String_FindString(s, kStrLeadingWww); | |
492 if (index_www != -1) | |
493 ReplaceCString (s, kStrLeadingWww, _T("")); | |
494 } | |
495 | |
496 int i = String_FindString(s, _T("://")); | |
497 if(i==-1) return uri; | |
498 int j = String_FindChar(s, L'/',i+3); | |
499 if(j==-1) return uri; | |
500 return s.Left(j+1); | |
501 } | |
502 | |
503 // requires that input have a PROTOCOL (http://) for proper behavior | |
504 // TODO(omaha): loosen requirement | |
505 // removes the http:// and the extra slash '/' at the end. | |
506 // http://www.google.com/ -> www.google.com (or google.com if strip_leading = tr
ue) | |
507 CString GetUriHostNameHostOnly(const CString& uri, bool strip_leading) { | |
508 CString s(GetUriHostName(uri,strip_leading)); | |
509 | |
510 // remove protocol | |
511 int i = String_FindString (s, _T("://")); | |
512 if(i==-1) return s; | |
513 CString ss(s.Right (s.GetLength() - i-3)); | |
514 | |
515 // remove the last '/' | |
516 int j = ss.ReverseFind('/'); | |
517 if (j == -1) return ss; | |
518 return ss.Left(j); | |
519 } | |
520 | |
521 CString AbbreviateUri(const CString& uri, int32 max_len) { | |
522 ASSERT1(max_len); | |
523 ASSERT1(!uri.IsEmpty()); | |
524 | |
525 CString s(uri); | |
526 VERIFY1(String_FindString (s, _T("://"))); | |
527 | |
528 TrimCString(s); | |
529 // SKIP_LOC_BEGIN | |
530 RemoveFromStart (s, _T("ftp://"), false); | |
531 RemoveFromStart (s, _T("http://"), false); | |
532 RemoveFromStart (s, _T("https://"), false); | |
533 RemoveFromStart (s, _T("www."), false); | |
534 RemoveFromStart (s, _T("ftp."), false); | |
535 RemoveFromStart (s, _T("www-"), false); | |
536 RemoveFromStart (s, _T("ftp-"), false); | |
537 RemoveFromEnd (s, _T(".htm")); | |
538 RemoveFromEnd (s, _T(".html")); | |
539 RemoveFromEnd (s, _T(".asp")); | |
540 // SKIP_LOC_END | |
541 if (s.GetLength() > max_len) { | |
542 // try to keep the portion after the last / | |
543 int32 last_slash = s.ReverseFind ((TCHAR)'/'); | |
544 CString after_last_slash; | |
545 if (last_slash == -1) { after_last_slash = _T(""); } | |
546 else { after_last_slash = s.Right (uri.GetLength() - last_slash - 1); } | |
547 if (after_last_slash.GetLength() > max_len / 2) { | |
548 after_last_slash = after_last_slash.Right (max_len / 2); | |
549 } | |
550 s = s.Left (max_len - after_last_slash.GetLength() - 2); | |
551 s += ".."; | |
552 s += after_last_slash; | |
553 } | |
554 return s; | |
555 } | |
556 | |
557 // normalized version of a URI intended to map duplicates to the same string | |
558 // the normalized URI is not a valid URI | |
559 CString NormalizeUri (const CString & uri) { | |
560 CString s(uri); | |
561 TrimCString(s); | |
562 MakeLowerCString(s); | |
563 // SKIP_LOC_BEGIN | |
564 ReplaceCString (s, _T(":80"), _T("")); | |
565 | |
566 RemoveFromEnd (s, _T("/index.html")); | |
567 RemoveFromEnd (s, _T("/welcome.html")); // old netscape standard | |
568 RemoveFromEnd (s, _T("/")); | |
569 | |
570 RemoveFromStart (s, _T("ftp://"), false); | |
571 RemoveFromStart (s, _T("http://"), false); | |
572 RemoveFromStart (s, _T("https://"), false); | |
573 RemoveFromStart (s, _T("www."), false); | |
574 RemoveFromStart (s, _T("ftp."), false); | |
575 RemoveFromStart (s, _T("www-"), false); | |
576 RemoveFromStart (s, _T("ftp-"), false); | |
577 | |
578 ReplaceCString (s, _T("/./"), _T("/")); | |
579 // SKIP_LOC_END | |
580 | |
581 // TODO(omaha): | |
582 // fixup URLs like a/b/../../c | |
583 // while ($s =~ m!\/\.\.\!!) { | |
584 // $s =~ s!/[^/]*/\.\./!/!; | |
585 // } | |
586 | |
587 // TODO(omaha): | |
588 // unescape characters | |
589 // Note from RFC1630: "Sequences which start with a percent sign | |
590 // but are not followed by two hexadecimal characters are reserved | |
591 // for future extension" | |
592 // $str =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg if defined $str; | |
593 | |
594 return s; | |
595 } | |
596 | |
597 CString RemoveInternetProtocolHeader (const CString& url) { | |
598 int find_colon_slash_slash = String_FindString(url, NOTRANSL(L"://")); | |
599 if( find_colon_slash_slash != -1 ) { | |
600 // remove PROTOCOL:// | |
601 return url.Right(url.GetLength() - find_colon_slash_slash - 3); | |
602 } else if (String_StartsWith(url, NOTRANSL(L"mailto:"), true)) { | |
603 // remove "mailto:" | |
604 return url.Right(url.GetLength() - 7); | |
605 } else { | |
606 // return as is | |
607 return url; | |
608 } | |
609 } | |
610 | |
611 HRESULT ConvertFileUriToLocalPath(const CString& uri, CString* path_out) { | |
612 ASSERT1(path_out); | |
613 ASSERT1(uri.GetLength() < INTERNET_MAX_URL_LENGTH); | |
614 | |
615 if (uri.IsEmpty()) { | |
616 return E_INVALIDARG; | |
617 } | |
618 | |
619 DWORD buf_len = MAX_PATH; | |
620 HRESULT hr = ::PathCreateFromUrl(uri, | |
621 CStrBuf(*path_out, MAX_PATH), | |
622 &buf_len, | |
623 NULL); | |
624 return hr; | |
625 } | |
626 | |
627 void RemoveFromStart (CString & s, const TCHAR* remove, bool ignore_case) { | |
628 ASSERT(remove, (L"")); | |
629 | |
630 // Remove the characters if it is the prefix | |
631 if (String_StartsWith(s, remove, ignore_case)) | |
632 s.Delete(0, lstrlen(remove)); | |
633 } | |
634 | |
635 bool String_EndsWith(const TCHAR *str, const TCHAR *end_str, bool ignore_case) { | |
636 ASSERT(end_str, (L"")); | |
637 ASSERT(str, (L"")); | |
638 | |
639 int str_len = lstrlen(str); | |
640 int end_len = lstrlen(end_str); | |
641 | |
642 // Definitely false if the suffix is longer than the string | |
643 if (end_len > str_len) | |
644 return false; | |
645 | |
646 const TCHAR *str_ptr = str + str_len; | |
647 const TCHAR *end_ptr = end_str + end_len; | |
648 | |
649 while (end_ptr >= end_str) { | |
650 // Check for matching characters | |
651 TCHAR c1 = *str_ptr; | |
652 TCHAR c2 = *end_ptr; | |
653 | |
654 if (ignore_case) { | |
655 c1 = Char_ToLower(c1); | |
656 c2 = Char_ToLower(c2); | |
657 } | |
658 | |
659 if (c1 != c2) | |
660 return false; | |
661 | |
662 --str_ptr; | |
663 --end_ptr; | |
664 } | |
665 | |
666 // if we haven't failed out, it must be ok! | |
667 return true; | |
668 } | |
669 | |
670 CString String_MakeEndWith(const TCHAR* str, const TCHAR* end_str, bool ignore_c
ase) { | |
671 if (String_EndsWith(str, end_str, ignore_case)) { | |
672 return str; | |
673 } else { | |
674 CString r(str); | |
675 r += end_str; | |
676 return r; | |
677 } | |
678 } | |
679 | |
680 void RemoveFromEnd (CString & s, const TCHAR* remove) { | |
681 ASSERT(remove, (L"")); | |
682 | |
683 // If the suffix is shorter than the string, don't bother | |
684 int remove_len = lstrlen(remove); | |
685 if (s.GetLength() < remove_len) return; | |
686 | |
687 // If the suffix is equal | |
688 int suffix_begin = s.GetLength() - remove_len; | |
689 if (0 == lstrcmp(s.GetString() + suffix_begin, remove)) | |
690 s.Delete(suffix_begin, remove_len); | |
691 } | |
692 | |
693 CString ElideIfNeeded (const CString & input_string, int max_len, int min_len) { | |
694 ASSERT (min_len <= max_len, (L"")); | |
695 ASSERT (max_len >= TSTR_SIZE(kEllipsis)+1, (L"")); | |
696 ASSERT (min_len >= TSTR_SIZE(kEllipsis)+1, (L"")); | |
697 | |
698 CString s = input_string; | |
699 | |
700 s.TrimRight(); | |
701 if (s.GetLength() > max_len) { | |
702 int truncate_at = max_len - TSTR_SIZE(kEllipsis); | |
703 // find first space going backwards from character one after the truncation
point | |
704 while (truncate_at >= min_len && !IsSpace(s.GetAt(truncate_at))) | |
705 truncate_at--; | |
706 | |
707 // skip the space(s) | |
708 while (truncate_at >= min_len && IsSpace(s.GetAt(truncate_at))) | |
709 truncate_at--; | |
710 | |
711 truncate_at++; | |
712 | |
713 if (truncate_at <= min_len || truncate_at > (max_len - static_cast<int>(TSTR
_SIZE(kEllipsis)))) { | |
714 // we weren't able to break at a word boundary, may as well use more of th
e string | |
715 truncate_at = max_len - TSTR_SIZE(kEllipsis); | |
716 | |
717 // skip space(s) | |
718 while (truncate_at > 0 && IsSpace(s.GetAt(truncate_at-1))) | |
719 truncate_at--; | |
720 } | |
721 | |
722 s = s.Left(truncate_at); | |
723 s += kEllipsis; | |
724 } | |
725 | |
726 UTIL_LOG(L6, (L"elide (%d %d) %s -> %s", min_len, max_len, input_string, s)); | |
727 return s; | |
728 } | |
729 | |
730 // these functions untested | |
731 // UTF8 parameter supported on XP/2000 only | |
732 HRESULT AnsiToUTF8 (char * src, int src_len, char * dest, int *dest_len) { | |
733 ASSERT (dest_len, (L"")); | |
734 ASSERT (dest, (L"")); | |
735 ASSERT (src, (L"")); | |
736 | |
737 // First use MultiByteToWideChar(CP_UTF8, ...) to convert to Unicode | |
738 // then use WideCharToMultiByte to convert from Unicode to UTF8 | |
739 WCHAR *unicode = new WCHAR [(src_len + 1) * sizeof (TCHAR)]; ASSERT (unicode,
(L"")); | |
740 int chars_written = MultiByteToWideChar (CP_ACP, 0, src, src_len, unicode, src
_len); | |
741 ASSERT (chars_written == src_len, (L"")); | |
742 char *unmappable = " "; | |
743 BOOL unmappable_characters = false; | |
744 *dest_len = WideCharToMultiByte (CP_UTF8, 0, unicode, chars_written, dest, *de
st_len, unmappable, &unmappable_characters); | |
745 delete [] unicode; | |
746 return S_OK; | |
747 } | |
748 | |
749 // Convert Wide to ANSI directly. Use only when it is all ANSI | |
750 CStringA WideToAnsiDirect(const CString & in) { | |
751 int in_len = in.GetLength(); | |
752 const TCHAR * in_buf = static_cast<const TCHAR*>(in.GetString()); | |
753 | |
754 CStringA out; | |
755 unsigned char * out_buf = (unsigned char *)out.GetBufferSetLength(in_len); | |
756 | |
757 for(int i = 0; i < in_len; ++i) | |
758 out_buf[i] = static_cast<unsigned char>(in_buf[i]); | |
759 | |
760 out.ReleaseBuffer(in_len); | |
761 return out; | |
762 } | |
763 | |
764 HRESULT UCS2ToUTF8 (LPCWSTR src, int src_len, char * dest, int *dest_len) { | |
765 ASSERT(dest_len, (L"")); | |
766 ASSERT(dest, (L"")); | |
767 | |
768 *dest_len = WideCharToMultiByte (CP_UTF8, 0, src, src_len, dest, *dest_len, NU
LL,NULL); | |
769 return S_OK; | |
770 } | |
771 | |
772 HRESULT UTF8ToUCS2 (const char * src, int src_len, LPWSTR dest, int *dest_len) { | |
773 ASSERT (dest_len, (L"")); | |
774 ASSERT (src, (L"")); | |
775 | |
776 *dest_len = MultiByteToWideChar (CP_UTF8, 0, src, src_len, dest, *dest_len); | |
777 ASSERT (*dest_len == src_len, (L"")); | |
778 return S_OK; | |
779 } | |
780 | |
781 HRESULT UTF8ToAnsi (char * src, int, char * dest, int *dest_len) { | |
782 ASSERT(dest_len, (L"")); | |
783 ASSERT(dest, (L"")); | |
784 ASSERT(src, (L"")); | |
785 | |
786 src; dest; dest_len; // unreferenced formal parameter | |
787 | |
788 // First use MultiByteToWideChar(CP_UTF8, ...) to convert to Unicode | |
789 // then use WideCharToMultiByte to convert from Unicode to ANSI | |
790 return E_FAIL; | |
791 } | |
792 | |
793 // clean up a string so it can be included within a JavaScript string | |
794 // mainly involves escaping characters | |
795 CString SanitizeString(const CString & in, DWORD mode) { | |
796 CString out(in); | |
797 | |
798 if (mode & kSanHtml) { | |
799 // SKIP_LOC_BEGIN | |
800 ReplaceCString(out, _T("&"), _T("&")); | |
801 ReplaceCString(out, _T("<"), _T("<")); | |
802 ReplaceCString(out, _T(">"), _T(">")); | |
803 // SKIP_LOC_END | |
804 } | |
805 | |
806 if ((mode & kSanXml) == kSanXml) { | |
807 // SKIP_LOC_BEGIN | |
808 ReplaceCString(out, _T("'"), _T("'")); | |
809 ReplaceCString(out, _T("\""), _T(""")); | |
810 // SKIP_LOC_END | |
811 } | |
812 | |
813 // Note that this SAN_JAVASCRIPT and kSanXml should not be used together. | |
814 ASSERT ((mode & (kSanJs | kSanXml)) != (kSanJs | kSanXml), (L"")); | |
815 | |
816 if ((mode & kSanJs) == kSanJs) { | |
817 // SKIP_LOC_BEGIN | |
818 ReplaceCString(out, _T("\\"), _T("\\\\")); | |
819 ReplaceCString(out, _T("\'"), _T("\\\'")); | |
820 ReplaceCString(out, _T("\""), _T("\\\"")); | |
821 ReplaceCString(out, _T("\n"), _T(" ")); | |
822 ReplaceCString(out, _T("\t"), _T(" ")); | |
823 // SKIP_LOC_END | |
824 } | |
825 | |
826 if ((mode & kSanHtmlInput) == kSanHtmlInput) { | |
827 // SKIP_LOC_BEGIN | |
828 ReplaceCString(out, _T("\""), _T(""")); | |
829 ReplaceCString(out, _T("'"), _T("'")); | |
830 // SKIP_LOC_END | |
831 } | |
832 | |
833 return out; | |
834 } | |
835 | |
836 // Bolds the periods used for abbreviation. Call this after HighlightTerms. | |
837 CString BoldAbbreviationPeriods(const CString & in) { | |
838 CString out(in); | |
839 CString abbrev; | |
840 for (int i = 0; i < kAbbreviationPeriodLength; ++i) | |
841 abbrev += _T("."); | |
842 ReplaceCString(out, abbrev, NOTRANSL(_T("<b>")) + abbrev + NOTRANSL(_T("</b>")
)); | |
843 return out; | |
844 } | |
845 | |
846 // Unescape a escaped sequence leading by a percentage symbol '%', | |
847 // and converted the unescaped sequence (in UTF8) into unicode. | |
848 // Inputs: src is the input string. | |
849 // pos is the starting position. | |
850 // Returns: true if a EOS(null) char was encounted. | |
851 // out contains the unescaped and converted unicode string. | |
852 // consumed_length is how many bytes in the src string have been | |
853 // unescaped. | |
854 // We can avoid the expensive UTF8 conversion step if there are no higher | |
855 // ansi characters So if there aren't any, just convert it ANSI-to-WIDE | |
856 // directly, which is cheaper. | |
857 inline bool UnescapeSequence(const CString &src, int pos, | |
858 CStringW *out, int *consumed_length) { | |
859 ASSERT1(out); | |
860 ASSERT1(consumed_length); | |
861 | |
862 int length = src.GetLength(); | |
863 // (input_len - pos) / 3 is enough for un-escaping the (%xx)+ sequences. | |
864 int max_dst_length = (length - pos) / 3; | |
865 scoped_array<char> unescaped(new char[max_dst_length]); | |
866 char *buf = unescaped.get(); | |
867 if (buf == NULL) { // no enough space ??? | |
868 *consumed_length = 0; | |
869 return false; | |
870 } | |
871 char *dst = buf; | |
872 bool is_utf8 = false; | |
873 // It is possible that there is a null character '\0' in the sequence. | |
874 // Because the CStringT does't support '\0' in it, we stop | |
875 // parsing the input string when it is encounted. | |
876 bool eos_encounted = false; | |
877 uint8 ch; | |
878 int s = pos; | |
879 while (s + 2 < length && src[s] == '%' && !eos_encounted && | |
880 ExtractChar(src, s + 1, &ch)) { | |
881 if (ch != 0) | |
882 *dst++ = ch; | |
883 else | |
884 eos_encounted = true; | |
885 if (ch >= 128) | |
886 is_utf8 = true; | |
887 s += 3; | |
888 } | |
889 | |
890 ASSERT1(dst <= buf + max_dst_length); // just to make sure | |
891 | |
892 *consumed_length = s - pos; | |
893 if (is_utf8) | |
894 AnsiToWideString(buf, dst - buf, CP_UTF8, out); | |
895 else | |
896 *out = AnsiToWideString(buf, dst - buf); | |
897 return eos_encounted; | |
898 } | |
899 | |
900 // There is an encoding called "URL-encoding". This function takes a URL-encoded
string | |
901 // and converts it back to the original representation | |
902 // example: "?q=moon+doggy_%25%5E%26&" = "moon doggy_%^&" | |
903 CString Unencode(const CString &input) { | |
904 const int input_len = input.GetLength(); | |
905 const TCHAR *src = input.GetString(); | |
906 // input_len is enough for containing the unencoded string. | |
907 CString out; | |
908 TCHAR *head = out.GetBuffer(input_len); | |
909 TCHAR *dst = head; | |
910 int s = 0; | |
911 bool eos_encounted = false; | |
912 bool is_utf8 = false; | |
913 CStringW fragment; | |
914 int consumed_length = 0; | |
915 while (s < input_len && !eos_encounted) { | |
916 switch (src[s]) { | |
917 case '+' : | |
918 *dst++ = ' '; | |
919 ASSERT1(dst <= head + input_len); | |
920 ++s; | |
921 break; | |
922 case '%' : | |
923 eos_encounted = | |
924 UnescapeSequence(input, s, &fragment, &consumed_length); | |
925 if (consumed_length > 0) { | |
926 s += consumed_length; | |
927 ASSERT1(dst + fragment.GetLength() <= head + input_len); | |
928 for (int i = 0; i < fragment.GetLength(); ++i) | |
929 *dst++ = fragment[i]; | |
930 } else { | |
931 *dst++ = src[s++]; | |
932 ASSERT1(dst <= head + input_len); | |
933 } | |
934 break; | |
935 default: | |
936 *dst++ = src[s]; | |
937 ASSERT1(dst <= head + input_len); | |
938 ++s; | |
939 } | |
940 } | |
941 int out_len = dst - head; | |
942 out.ReleaseBuffer(out_len); | |
943 return out; | |
944 } | |
945 | |
946 CString GetTextInbetween(const CString &input, const CString &start, const CStri
ng &end) { | |
947 int start_index = String_FindString(input, start); | |
948 if (start_index == -1) | |
949 return L""; | |
950 | |
951 start_index += start.GetLength(); | |
952 int end_index = String_FindString(input, end, start_index); | |
953 if (end_index == -1) | |
954 return L""; | |
955 | |
956 return input.Mid(start_index, end_index - start_index); | |
957 } | |
958 | |
959 // Given a string, get the parameter and url-unencode it | |
960 CString GetParam(const CString & input, const CString & key) { | |
961 CString my_key(_T("?")); | |
962 my_key.Append(key); | |
963 my_key += L'='; | |
964 | |
965 return Unencode(GetTextInbetween(input, my_key, NOTRANSL(L"?"))); | |
966 } | |
967 | |
968 // Get an xml-like field from a string | |
969 CString GetField (const CString & input, const CString & field) { | |
970 CString start_field(NOTRANSL(_T("<"))); | |
971 start_field += field; | |
972 start_field += L'>'; | |
973 | |
974 int32 start = String_FindString(input, start_field); | |
975 if (start == -1) { return _T(""); } | |
976 start += 2 + lstrlen (field); | |
977 | |
978 CString end_field(NOTRANSL(_T("</"))); | |
979 end_field += field; | |
980 end_field += L'>'; | |
981 | |
982 int32 end = String_FindString(input, end_field); | |
983 if (end == -1) { return _T(""); } | |
984 | |
985 return input.Mid (start, end - start); | |
986 } | |
987 | |
988 // ------------------------------------------------------------ | |
989 // Finds a whole word match in the query. | |
990 // If the word has non-spaces either before or after, it will not qualify as | |
991 // a match. i.e. "pie!" is not a match because of the exclamation point. | |
992 // TODO(omaha): Add parameter that will consider punctuation acceptable. | |
993 // | |
994 // Optionally will look for a colon at the end. | |
995 // If not found, return -1. | |
996 int FindWholeWordMatch (const CString &query, | |
997 const CString &word_to_match, | |
998 const bool end_with_colon, | |
999 const int index_begin) { | |
1000 if (word_to_match.IsEmpty()) { | |
1001 return -1; | |
1002 } | |
1003 | |
1004 int index_word_begin = index_begin; | |
1005 | |
1006 // Keep going until we find a whole word match, or the string ends. | |
1007 do { | |
1008 index_word_begin = String_FindString (query, word_to_match, index_word_begin
); | |
1009 | |
1010 if (-1 == index_word_begin) { | |
1011 return index_word_begin; | |
1012 } | |
1013 | |
1014 // If it's not a whole word match, keep going. | |
1015 if (index_word_begin > 0 && | |
1016 !IsSpaceW (query[index_word_begin - 1])) { | |
1017 goto LoopEnd; | |
1018 } | |
1019 | |
1020 if (end_with_colon) { | |
1021 int index_colon = String_FindChar (query, L':', index_word_begin); | |
1022 | |
1023 // If there is no colon in the string, return now. | |
1024 if (-1 == index_colon) { | |
1025 return -1; | |
1026 } | |
1027 | |
1028 // If there is text between the end of the word and the colon, keep going. | |
1029 if (index_colon - index_word_begin != word_to_match.GetLength()) { | |
1030 goto LoopEnd; | |
1031 } | |
1032 } else { | |
1033 // If there are more chars left after this word/phrase, and | |
1034 // they are not spaces, return. | |
1035 if (query.GetLength() > index_word_begin + word_to_match.GetLength() && | |
1036 !IsSpaceW (query.GetAt (index_word_begin + word_to_match.GetLength())))
{ | |
1037 goto LoopEnd; | |
1038 } | |
1039 } | |
1040 | |
1041 // It fits all the requirements, so return the index to the beginning of the
word. | |
1042 return index_word_begin; | |
1043 | |
1044 LoopEnd: | |
1045 ++index_word_begin; | |
1046 | |
1047 } while (-1 != index_word_begin); | |
1048 | |
1049 return index_word_begin; | |
1050 } | |
1051 | |
1052 // -------------------------------------------------------- | |
1053 // Do whole-word replacement in "str". | |
1054 void ReplaceWholeWord (const CString &string_to_replace, | |
1055 const CString &replacement, | |
1056 const bool trim_whitespace, | |
1057 CString *str) { | |
1058 ASSERT (str, (L"ReplaceWholeWord")); | |
1059 | |
1060 if (string_to_replace.IsEmpty() || str->IsEmpty()) { | |
1061 return; | |
1062 } | |
1063 | |
1064 int index_str = 0; | |
1065 do { | |
1066 index_str = FindWholeWordMatch (*str, string_to_replace, false, index_str); | |
1067 | |
1068 if (-1 != index_str) { | |
1069 // Get the strings before and after, and trim whitespace. | |
1070 CString str_before_word(str->Left (index_str)); | |
1071 if (trim_whitespace) { | |
1072 str_before_word.TrimRight(); | |
1073 } | |
1074 | |
1075 CString str_after_word(str->Mid (index_str + string_to_replace.GetLength()
)); | |
1076 if (trim_whitespace) { | |
1077 str_after_word.TrimLeft(); | |
1078 } | |
1079 | |
1080 *str = str_before_word + replacement + str_after_word; | |
1081 index_str += replacement.GetLength() + 1; | |
1082 } | |
1083 } while (index_str != -1); | |
1084 } | |
1085 | |
1086 // -------------------------------------------------------- | |
1087 // Reverse (big-endian<->little-endian) the shorts that make up | |
1088 // Unicode characters in a byte array of Unicode chars | |
1089 HRESULT ReverseUnicodeByteOrder(byte* unicode_string, int size_in_bytes) { | |
1090 ASSERT (unicode_string, (L"")); | |
1091 | |
1092 // If odd # of bytes, just leave the last one alone | |
1093 for (int i = 0; i < size_in_bytes - 1; i += 2) { | |
1094 byte b = unicode_string[i]; | |
1095 unicode_string[i] = unicode_string[i+1]; | |
1096 unicode_string[i+1] = b; | |
1097 } | |
1098 | |
1099 return S_OK; | |
1100 } | |
1101 | |
1102 // case insensitive strstr | |
1103 // adapted from http://c.snippets.org/snip_lister.php?fname=stristr.c | |
1104 const char *stristr(const char *string, const char *pattern) | |
1105 { | |
1106 ASSERT (pattern, (L"")); | |
1107 ASSERT (string, (L"")); | |
1108 ASSERT (string && pattern, (L"")); | |
1109 char *pattern_ptr, *string_ptr; | |
1110 const char *start; | |
1111 | |
1112 for (start = string; *start != 0; start++) | |
1113 { | |
1114 // find start of pattern in string | |
1115 for ( ; ((*start!=0) && (String_ToUpperA(*start) != String_ToUpperA(*pattern
))); start++) | |
1116 ; | |
1117 if (0 == *start) | |
1118 return NULL; | |
1119 | |
1120 pattern_ptr = (char *)pattern; | |
1121 string_ptr = (char *)start; | |
1122 | |
1123 while (String_ToUpperA(*string_ptr) == String_ToUpperA(*pattern_ptr)) | |
1124 { | |
1125 string_ptr++; | |
1126 pattern_ptr++; | |
1127 | |
1128 // if end of pattern then pattern was found | |
1129 if (0 == *pattern_ptr) | |
1130 return (start); | |
1131 } | |
1132 } | |
1133 | |
1134 return NULL; | |
1135 } | |
1136 | |
1137 // case insensitive Unicode strstr | |
1138 // adapted from http://c.snippets.org/snip_lister.php?fname=stristr.c | |
1139 const WCHAR *stristrW(const WCHAR *string, const WCHAR *pattern) | |
1140 { | |
1141 ASSERT (pattern, (L"")); | |
1142 ASSERT (string, (L"")); | |
1143 ASSERT (string && pattern, (L"")); | |
1144 const WCHAR *start; | |
1145 | |
1146 for (start = string; *start != 0; start++) | |
1147 { | |
1148 // find start of pattern in string | |
1149 for ( ; ((*start!=0) && (String_ToUpper(*start) != String_ToUpper(*pattern))
); start++) | |
1150 ; | |
1151 if (0 == *start) | |
1152 return NULL; | |
1153 | |
1154 const WCHAR *pattern_ptr = pattern; | |
1155 const WCHAR *string_ptr = start; | |
1156 | |
1157 while (String_ToUpper(*string_ptr) == String_ToUpper(*pattern_ptr)) | |
1158 { | |
1159 string_ptr++; | |
1160 pattern_ptr++; | |
1161 | |
1162 // if end of pattern then pattern was found | |
1163 if (0 == *pattern_ptr) | |
1164 return (start); | |
1165 } | |
1166 } | |
1167 | |
1168 return NULL; | |
1169 } | |
1170 | |
1171 // case sensitive Unicode strstr | |
1172 // adapted from http://c.snippets.org/snip_lister.php?fname=stristr.c | |
1173 const WCHAR *strstrW(const WCHAR *string, const WCHAR *pattern) | |
1174 { | |
1175 ASSERT (pattern, (L"")); | |
1176 ASSERT (string, (L"")); | |
1177 ASSERT (string && pattern, (L"")); | |
1178 const WCHAR *start; | |
1179 | |
1180 for (start = string; *start != 0; start++) | |
1181 { | |
1182 // find start of pattern in string | |
1183 for ( ; ((*start!=0) && (*start != *pattern)); start++) | |
1184 ; | |
1185 if (0 == *start) | |
1186 return NULL; | |
1187 | |
1188 const WCHAR *pattern_ptr = pattern; | |
1189 const WCHAR *string_ptr = start; | |
1190 | |
1191 while (*string_ptr == *pattern_ptr) | |
1192 { | |
1193 string_ptr++; | |
1194 pattern_ptr++; | |
1195 | |
1196 // if end of pattern then pattern was found | |
1197 if (0 == *pattern_ptr) | |
1198 return (start); | |
1199 } | |
1200 } | |
1201 | |
1202 return NULL; | |
1203 } | |
1204 | |
1205 // ------------------------------------------------------------------------- | |
1206 // Helper function | |
1207 float GetLenWithWordWrap (const float len_so_far, | |
1208 const float len_to_add, | |
1209 const uint32 len_line) { | |
1210 // lint -save -e414 Possible division by 0 | |
1211 ASSERT (len_line != 0, (L"")); | |
1212 | |
1213 float len_total = len_so_far + len_to_add; | |
1214 | |
1215 // Figure out if we need to word wrap by seeing if adding the second | |
1216 // string will cause us to span more lines than before. | |
1217 uint32 num_lines_before = static_cast<uint32> (len_so_far / len_line); | |
1218 uint32 num_lines_after = static_cast<uint32> (len_total / len_line); | |
1219 | |
1220 // If it just barely fit onto the line, do not wrap to the next line. | |
1221 if (num_lines_after > 0 && (len_total / len_line - num_lines_after == 0)) { | |
1222 --num_lines_after; | |
1223 } | |
1224 | |
1225 if (num_lines_after > num_lines_before) { | |
1226 // Need to word wrap. | |
1227 // lint -e{790} Suspicious truncation | |
1228 return num_lines_after * len_line + len_to_add; | |
1229 } | |
1230 else | |
1231 return len_total; | |
1232 | |
1233 // lint -restore | |
1234 } | |
1235 | |
1236 int CalculateBase64EscapedLen(int input_len, bool do_padding) { | |
1237 // these formulae were copied from comments that used to go with the base64 | |
1238 // encoding functions | |
1239 int intermediate_result = 8 * input_len + 5; | |
1240 ASSERT(intermediate_result > 0,(L"")); // make sure we didn't overflow | |
1241 int len = intermediate_result / 6; | |
1242 if (do_padding) len = ((len + 3) / 4) * 4; | |
1243 return len; | |
1244 } | |
1245 | |
1246 // Base64Escape does padding, so this calculation includes padding. | |
1247 int CalculateBase64EscapedLen(int input_len) { | |
1248 return CalculateBase64EscapedLen(input_len, true); | |
1249 } | |
1250 | |
1251 // Base64Escape | |
1252 // Largely based on b2a_base64 in google/docid_encryption.c | |
1253 // | |
1254 // | |
1255 int Base64EscapeInternal(const char *src, int szsrc, | |
1256 char *dest, int szdest, const char *base64, | |
1257 bool do_padding) | |
1258 { | |
1259 ASSERT(base64, (L"")); | |
1260 ASSERT(dest, (L"")); | |
1261 ASSERT(src, (L"")); | |
1262 | |
1263 static const char kPad64 = '='; | |
1264 | |
1265 if (szsrc <= 0) return 0; | |
1266 | |
1267 char *cur_dest = dest; | |
1268 const unsigned char *cur_src = reinterpret_cast<const unsigned char*>(src); | |
1269 | |
1270 // Three bytes of data encodes to four characters of cyphertext. | |
1271 // So we can pump through three-byte chunks atomically. | |
1272 while (szsrc > 2) { /* keep going until we have less than 24 bits */ | |
1273 if( (szdest -= 4) < 0 ) return 0; | |
1274 cur_dest[0] = base64[cur_src[0] >> 2]; | |
1275 cur_dest[1] = base64[((cur_src[0] & 0x03) << 4) + (cur_src[1] >> 4)]; | |
1276 cur_dest[2] = base64[((cur_src[1] & 0x0f) << 2) + (cur_src[2] >> 6)]; | |
1277 cur_dest[3] = base64[cur_src[2] & 0x3f]; | |
1278 | |
1279 cur_dest += 4; | |
1280 cur_src += 3; | |
1281 szsrc -= 3; | |
1282 } | |
1283 | |
1284 /* now deal with the tail (<=2 bytes) */ | |
1285 switch (szsrc) { | |
1286 case 0: | |
1287 // Nothing left; nothing more to do. | |
1288 break; | |
1289 case 1: | |
1290 // One byte left: this encodes to two characters, and (optionally) | |
1291 // two pad characters to round out the four-character cypherblock. | |
1292 if( (szdest -= 2) < 0 ) return 0; | |
1293 cur_dest[0] = base64[cur_src[0] >> 2]; | |
1294 cur_dest[1] = base64[(cur_src[0] & 0x03) << 4]; | |
1295 cur_dest += 2; | |
1296 if (do_padding) { | |
1297 if( (szdest -= 2) < 0 ) return 0; | |
1298 cur_dest[0] = kPad64; | |
1299 cur_dest[1] = kPad64; | |
1300 cur_dest += 2; | |
1301 } | |
1302 break; | |
1303 case 2: | |
1304 // Two bytes left: this encodes to three characters, and (optionally) | |
1305 // one pad character to round out the four-character cypherblock. | |
1306 if( (szdest -= 3) < 0 ) return 0; | |
1307 cur_dest[0] = base64[cur_src[0] >> 2]; | |
1308 cur_dest[1] = base64[((cur_src[0] & 0x03) << 4) + (cur_src[1] >> 4)]; | |
1309 cur_dest[2] = base64[(cur_src[1] & 0x0f) << 2]; | |
1310 cur_dest += 3; | |
1311 if (do_padding) { | |
1312 if( (szdest -= 1) < 0 ) return 0; | |
1313 cur_dest[0] = kPad64; | |
1314 cur_dest += 1; | |
1315 } | |
1316 break; | |
1317 default: | |
1318 // Should not be reached: blocks of 3 bytes are handled | |
1319 // in the while loop before this switch statement. | |
1320 ASSERT(false, (L"Logic problem? szsrc = %S",szsrc)); | |
1321 break; | |
1322 } | |
1323 return (cur_dest - dest); | |
1324 } | |
1325 | |
1326 #define kBase64Chars "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz01234
56789+/" | |
1327 | |
1328 #define kWebSafeBase64Chars "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxy
z0123456789-_" | |
1329 | |
1330 int Base64Escape(const char *src, int szsrc, char *dest, int szdest) { | |
1331 ASSERT(dest, (L"")); | |
1332 ASSERT(src, (L"")); | |
1333 | |
1334 return Base64EscapeInternal(src, szsrc, dest, szdest, kBase64Chars, true); | |
1335 } | |
1336 int WebSafeBase64Escape(const char *src, int szsrc, char *dest, | |
1337 int szdest, bool do_padding) { | |
1338 ASSERT(dest, (L"")); | |
1339 ASSERT(src, (L"")); | |
1340 | |
1341 return Base64EscapeInternal(src, szsrc, dest, szdest, | |
1342 kWebSafeBase64Chars, do_padding); | |
1343 } | |
1344 | |
1345 void Base64Escape(const char *src, int szsrc, | |
1346 CStringA* dest, bool do_padding) | |
1347 { | |
1348 ASSERT(src, (L"")); | |
1349 ASSERT(dest,(L"")); | |
1350 const int max_escaped_size = CalculateBase64EscapedLen(szsrc, do_padding); | |
1351 dest->Empty(); | |
1352 const int escaped_len = Base64EscapeInternal(src, szsrc, | |
1353 dest->GetBufferSetLength(max_escaped_size + 1), max_escaped_size + 1, | |
1354 kBase64Chars, | |
1355 do_padding); | |
1356 ASSERT(max_escaped_size <= escaped_len,(L"")); | |
1357 dest->ReleaseBuffer(escaped_len); | |
1358 } | |
1359 | |
1360 void WebSafeBase64Escape(const char *src, int szsrc, | |
1361 CStringA *dest, bool do_padding) | |
1362 { | |
1363 ASSERT(src, (L"")); | |
1364 ASSERT(dest,(L"")); | |
1365 const int max_escaped_size = | |
1366 CalculateBase64EscapedLen(szsrc, do_padding); | |
1367 dest->Empty(); | |
1368 const int escaped_len = Base64EscapeInternal(src, szsrc, | |
1369 dest->GetBufferSetLength(max_escaped_size + 1), max_escaped_size + 1, | |
1370 kWebSafeBase64Chars, | |
1371 do_padding); | |
1372 ASSERT(max_escaped_size <= escaped_len,(L"")); | |
1373 dest->ReleaseBuffer(escaped_len); | |
1374 } | |
1375 | |
1376 void WebSafeBase64Escape(const CStringA& src, CStringA* dest) { | |
1377 ASSERT(dest,(L"")); | |
1378 int encoded_len = CalculateBase64EscapedLen(src.GetLength()); | |
1379 scoped_array<char> buf(new char[encoded_len]); | |
1380 int len = WebSafeBase64Escape(src,src.GetLength(), buf.get(), encoded_len, fal
se); | |
1381 dest->SetString(buf.get(), len); | |
1382 } | |
1383 | |
1384 // ---------------------------------------------------------------------- | |
1385 // int Base64Unescape() - base64 decoder | |
1386 // | |
1387 // Check out | |
1388 // http://www.cis.ohio-state.edu/htbin/rfc/rfc2045.html for formal | |
1389 // description, but what we care about is that... | |
1390 // Take the encoded stuff in groups of 4 characters and turn each | |
1391 // character into a code 0 to 63 thus: | |
1392 // A-Z map to 0 to 25 | |
1393 // a-z map to 26 to 51 | |
1394 // 0-9 map to 52 to 61 | |
1395 // +(- for WebSafe) maps to 62 | |
1396 // /(_ for WebSafe) maps to 63 | |
1397 // There will be four numbers, all less than 64 which can be represented | |
1398 // by a 6 digit binary number (aaaaaa, bbbbbb, cccccc, dddddd respectively). | |
1399 // Arrange the 6 digit binary numbers into three bytes as such: | |
1400 // aaaaaabb bbbbcccc ccdddddd | |
1401 // Equals signs (one or two) are used at the end of the encoded block to | |
1402 // indicate that the text was not an integer multiple of three bytes long. | |
1403 // ---------------------------------------------------------------------- | |
1404 int Base64UnescapeInternal(const char *src, int len_src, | |
1405 char *dest, int len_dest, const char* unbase64) { | |
1406 ASSERT (unbase64, (L"")); | |
1407 ASSERT (src, (L"")); | |
1408 | |
1409 static const char kPad64 = '='; | |
1410 | |
1411 int decode; | |
1412 int destidx = 0; | |
1413 int state = 0; | |
1414 // Used an unsigned char, since ch is used as an array index (into unbase64). | |
1415 unsigned char ch = 0; | |
1416 while (len_src-- && (ch = *src++) != '\0') { | |
1417 if (IsSpaceA(ch)) // Skip whitespace | |
1418 continue; | |
1419 | |
1420 if (ch == kPad64) | |
1421 break; | |
1422 | |
1423 decode = unbase64[ch]; | |
1424 if (decode == 99) // A non-base64 character | |
1425 return (-1); | |
1426 | |
1427 // Four cyphertext characters decode to three bytes. | |
1428 // Therefore we can be in one of four states. | |
1429 switch (state) { | |
1430 case 0: | |
1431 // We're at the beginning of a four-character cyphertext block. | |
1432 // This sets the high six bits of the first byte of the | |
1433 // plaintext block. | |
1434 if (dest) { | |
1435 if (destidx >= len_dest) | |
1436 return (-1); | |
1437 // lint -e{734} Loss of precision | |
1438 dest[destidx] = static_cast<char>(decode << 2); | |
1439 } | |
1440 state = 1; | |
1441 break; | |
1442 case 1: | |
1443 // We're one character into a four-character cyphertext block. | |
1444 // This sets the low two bits of the first plaintext byte, | |
1445 // and the high four bits of the second plaintext byte. | |
1446 // However, if this is the end of data, and those four | |
1447 // bits are zero, it could be that those four bits are | |
1448 // leftovers from the encoding of data that had a length | |
1449 // of one mod three. | |
1450 if (dest) { | |
1451 if (destidx >= len_dest) | |
1452 return (-1); | |
1453 // lint -e{734} Loss of precision | |
1454 dest[destidx] |= decode >> 4; | |
1455 if (destidx + 1 >= len_dest) { | |
1456 if (0 != (decode & 0x0f)) | |
1457 return (-1); | |
1458 else | |
1459 ; | |
1460 } else { | |
1461 // lint -e{734} Loss of precision | |
1462 dest[destidx+1] = static_cast<char>((decode & 0x0f) << 4); | |
1463 } | |
1464 } | |
1465 destidx++; | |
1466 state = 2; | |
1467 break; | |
1468 case 2: | |
1469 // We're two characters into a four-character cyphertext block. | |
1470 // This sets the low four bits of the second plaintext | |
1471 // byte, and the high two bits of the third plaintext byte. | |
1472 // However, if this is the end of data, and those two | |
1473 // bits are zero, it could be that those two bits are | |
1474 // leftovers from the encoding of data that had a length | |
1475 // of two mod three. | |
1476 if (dest) { | |
1477 if (destidx >= len_dest) | |
1478 return (-1); | |
1479 // lint -e{734} Loss of precision | |
1480 dest[destidx] |= decode >> 2; | |
1481 if (destidx +1 >= len_dest) { | |
1482 if (0 != (decode & 0x03)) | |
1483 return (-1); | |
1484 else | |
1485 ; | |
1486 } else { | |
1487 // lint -e{734} Loss of precision | |
1488 dest[destidx+1] = static_cast<char>((decode & 0x03) << 6); | |
1489 } | |
1490 } | |
1491 destidx++; | |
1492 state = 3; | |
1493 break; | |
1494 case 3: | |
1495 // We're at the last character of a four-character cyphertext block. | |
1496 // This sets the low six bits of the third plaintext byte. | |
1497 if (dest) { | |
1498 if (destidx >= len_dest) | |
1499 return (-1); | |
1500 // lint -e{734} Loss of precision | |
1501 dest[destidx] |= decode; | |
1502 } | |
1503 destidx++; | |
1504 state = 0; | |
1505 break; | |
1506 | |
1507 default: | |
1508 ASSERT (false, (L"")); | |
1509 break; | |
1510 } | |
1511 } | |
1512 | |
1513 // We are done decoding Base-64 chars. Let's see if we ended | |
1514 // on a byte boundary, and/or with erroneous trailing characters. | |
1515 if (ch == kPad64) { // We got a pad char | |
1516 if ((state == 0) || (state == 1)) | |
1517 return (-1); // Invalid '=' in first or second position | |
1518 if (len_src == 0) { | |
1519 if (state == 2) // We run out of input but we still need another '=' | |
1520 return (-1); | |
1521 // Otherwise, we are in state 3 and only need this '=' | |
1522 } else { | |
1523 if (state == 2) { // need another '=' | |
1524 while ((ch = *src++) != '\0' && (len_src-- > 0)) { | |
1525 if (!IsSpaceA(ch)) | |
1526 break; | |
1527 } | |
1528 if (ch != kPad64) | |
1529 return (-1); | |
1530 } | |
1531 // state = 1 or 2, check if all remain padding is space | |
1532 while ((ch = *src++) != '\0' && (len_src-- > 0)) { | |
1533 if (!IsSpaceA(ch)) | |
1534 return(-1); | |
1535 } | |
1536 } | |
1537 } else { | |
1538 // We ended by seeing the end of the string. Make sure we | |
1539 // have no partial bytes lying around. Note that we | |
1540 // do not require trailing '=', so states 2 and 3 are okay too. | |
1541 if (state == 1) | |
1542 return (-1); | |
1543 } | |
1544 | |
1545 return (destidx); | |
1546 } | |
1547 | |
1548 int Base64Unescape(const char *src, int len_src, char *dest, int len_dest) { | |
1549 ASSERT(dest, (L"")); | |
1550 ASSERT(src, (L"")); | |
1551 | |
1552 static const char UnBase64[] = { | |
1553 99, 99, 99, 99, 99, 99, 99, 99, | |
1554 99, 99, 99, 99, 99, 99, 99, 99, | |
1555 99, 99, 99, 99, 99, 99, 99, 99, | |
1556 99, 99, 99, 99, 99, 99, 99, 99, | |
1557 99, 99, 99, 99, 99, 99, 99, 99, | |
1558 99, 99, 99, 62/*+*/, 99, 99, 99, 63/*/ */, | |
1559 52/*0*/, 53/*1*/, 54/*2*/, 55/*3*/, 56/*4*/, 57/*5*/, 58/*6*/, 59/*7*/, | |
1560 60/*8*/, 61/*9*/, 99, 99, 99, 99, 99, 99, | |
1561 99, 0/*A*/, 1/*B*/, 2/*C*/, 3/*D*/, 4/*E*/, 5/*F*/, 6/*G*/, | |
1562 7/*H*/, 8/*I*/, 9/*J*/, 10/*K*/, 11/*L*/, 12/*M*/, 13/*N*/, 14/*O*/, | |
1563 15/*P*/, 16/*Q*/, 17/*R*/, 18/*S*/, 19/*T*/, 20/*U*/, 21/*V*/, 22/*W*/, | |
1564 23/*X*/, 24/*Y*/, 25/*Z*/, 99, 99, 99, 99, 99, | |
1565 99, 26/*a*/, 27/*b*/, 28/*c*/, 29/*d*/, 30/*e*/, 31/*f*/, 32/*g*/, | |
1566 33/*h*/, 34/*i*/, 35/*j*/, 36/*k*/, 37/*l*/, 38/*m*/, 39/*n*/, 40/*o*/, | |
1567 41/*p*/, 42/*q*/, 43/*r*/, 44/*s*/, 45/*t*/, 46/*u*/, 47/*v*/, 48/*w*/, | |
1568 49/*x*/, 50/*y*/, 51/*z*/, 99, 99, 99, 99, 99, | |
1569 99, 99, 99, 99, 99, 99, 99, 99, | |
1570 99, 99, 99, 99, 99, 99, 99, 99, | |
1571 99, 99, 99, 99, 99, 99, 99, 99, | |
1572 99, 99, 99, 99, 99, 99, 99, 99, | |
1573 99, 99, 99, 99, 99, 99, 99, 99, | |
1574 99, 99, 99, 99, 99, 99, 99, 99, | |
1575 99, 99, 99, 99, 99, 99, 99, 99, | |
1576 99, 99, 99, 99, 99, 99, 99, 99, | |
1577 99, 99, 99, 99, 99, 99, 99, 99, | |
1578 99, 99, 99, 99, 99, 99, 99, 99, | |
1579 99, 99, 99, 99, 99, 99, 99, 99, | |
1580 99, 99, 99, 99, 99, 99, 99, 99, | |
1581 99, 99, 99, 99, 99, 99, 99, 99, | |
1582 99, 99, 99, 99, 99, 99, 99, 99, | |
1583 99, 99, 99, 99, 99, 99, 99, 99, | |
1584 99, 99, 99, 99, 99, 99, 99, 99 | |
1585 }; | |
1586 | |
1587 // The above array was generated by the following code | |
1588 // #include <sys/time.h> | |
1589 // #include <stdlib.h> | |
1590 // #include <string.h> | |
1591 // main() | |
1592 // { | |
1593 // static const char Base64[] = | |
1594 // "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; | |
1595 // char *pos; | |
1596 // int idx, i, j; | |
1597 // printf(" "); | |
1598 // for (i = 0; i < 255; i += 8) { | |
1599 // for (j = i; j < i + 8; j++) { | |
1600 // pos = strchr(Base64, j); | |
1601 // if ((pos == NULL) || (j == 0)) | |
1602 // idx = 99; | |
1603 // else | |
1604 // idx = pos - Base64; | |
1605 // if (idx == 99) | |
1606 // printf(" %2d, ", idx); | |
1607 // else | |
1608 // printf(" %2d/*%c*/,", idx, j); | |
1609 // } | |
1610 // printf("\n "); | |
1611 // } | |
1612 // } | |
1613 | |
1614 return Base64UnescapeInternal(src, len_src, dest, len_dest, UnBase64); | |
1615 } | |
1616 | |
1617 int WebSafeBase64Unescape(const char *src, int szsrc, char *dest, int szdest) { | |
1618 ASSERT(dest, (L"")); | |
1619 ASSERT(src, (L"")); | |
1620 | |
1621 static const char UnBase64[] = { | |
1622 99, 99, 99, 99, 99, 99, 99, 99, | |
1623 99, 99, 99, 99, 99, 99, 99, 99, | |
1624 99, 99, 99, 99, 99, 99, 99, 99, | |
1625 99, 99, 99, 99, 99, 99, 99, 99, | |
1626 99, 99, 99, 99, 99, 99, 99, 99, | |
1627 99, 99, 99, 99, 99, 62/*-*/, 99, 99, | |
1628 52/*0*/, 53/*1*/, 54/*2*/, 55/*3*/, 56/*4*/, 57/*5*/, 58/*6*/, 59/*7*/, | |
1629 60/*8*/, 61/*9*/, 99, 99, 99, 99, 99, 99, | |
1630 99, 0/*A*/, 1/*B*/, 2/*C*/, 3/*D*/, 4/*E*/, 5/*F*/, 6/*G*/, | |
1631 7/*H*/, 8/*I*/, 9/*J*/, 10/*K*/, 11/*L*/, 12/*M*/, 13/*N*/, 14/*O*/, | |
1632 15/*P*/, 16/*Q*/, 17/*R*/, 18/*S*/, 19/*T*/, 20/*U*/, 21/*V*/, 22/*W*/, | |
1633 23/*X*/, 24/*Y*/, 25/*Z*/, 99, 99, 99, 99, 63/*_*/, | |
1634 99, 26/*a*/, 27/*b*/, 28/*c*/, 29/*d*/, 30/*e*/, 31/*f*/, 32/*g*/, | |
1635 33/*h*/, 34/*i*/, 35/*j*/, 36/*k*/, 37/*l*/, 38/*m*/, 39/*n*/, 40/*o*/, | |
1636 41/*p*/, 42/*q*/, 43/*r*/, 44/*s*/, 45/*t*/, 46/*u*/, 47/*v*/, 48/*w*/, | |
1637 49/*x*/, 50/*y*/, 51/*z*/, 99, 99, 99, 99, 99, | |
1638 99, 99, 99, 99, 99, 99, 99, 99, | |
1639 99, 99, 99, 99, 99, 99, 99, 99, | |
1640 99, 99, 99, 99, 99, 99, 99, 99, | |
1641 99, 99, 99, 99, 99, 99, 99, 99, | |
1642 99, 99, 99, 99, 99, 99, 99, 99, | |
1643 99, 99, 99, 99, 99, 99, 99, 99, | |
1644 99, 99, 99, 99, 99, 99, 99, 99, | |
1645 99, 99, 99, 99, 99, 99, 99, 99, | |
1646 99, 99, 99, 99, 99, 99, 99, 99, | |
1647 99, 99, 99, 99, 99, 99, 99, 99, | |
1648 99, 99, 99, 99, 99, 99, 99, 99, | |
1649 99, 99, 99, 99, 99, 99, 99, 99, | |
1650 99, 99, 99, 99, 99, 99, 99, 99, | |
1651 99, 99, 99, 99, 99, 99, 99, 99, | |
1652 99, 99, 99, 99, 99, 99, 99, 99, | |
1653 99, 99, 99, 99, 99, 99, 99, 99 | |
1654 }; | |
1655 // The above array was generated by the following code | |
1656 // #include <sys/time.h> | |
1657 // #include <stdlib.h> | |
1658 // #include <string.h> | |
1659 // main() | |
1660 // { | |
1661 // static const char Base64[] = | |
1662 // "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"; | |
1663 // char *pos; | |
1664 // int idx, i, j; | |
1665 // printf(" "); | |
1666 // for (i = 0; i < 255; i += 8) { | |
1667 // for (j = i; j < i + 8; j++) { | |
1668 // pos = strchr(Base64, j); | |
1669 // if ((pos == NULL) || (j == 0)) | |
1670 // idx = 99; | |
1671 // else | |
1672 // idx = pos - Base64; | |
1673 // if (idx == 99) | |
1674 // printf(" %2d, ", idx); | |
1675 // else | |
1676 // printf(" %2d/*%c*/,", idx, j); | |
1677 // } | |
1678 // printf("\n "); | |
1679 // } | |
1680 // } | |
1681 | |
1682 return Base64UnescapeInternal(src, szsrc, dest, szdest, UnBase64); | |
1683 } | |
1684 | |
1685 bool IsHexDigit (WCHAR c) { | |
1686 return (((c >= L'a') && (c <= L'f')) | |
1687 || ((c >= L'A') && (c <= L'F')) | |
1688 || ((c >= L'0') && (c <= L'9'))); | |
1689 } | |
1690 | |
1691 int HexDigitToInt (WCHAR c) { | |
1692 return ((c >= L'a') ? ((c - L'a') + 10) : | |
1693 (c >= L'A') ? ((c - L'A') + 10) : | |
1694 (c - L'0')); | |
1695 } | |
1696 | |
1697 // ---------------------------------------------------------------------- | |
1698 // int QuotedPrintableUnescape() | |
1699 // | |
1700 // Check out http://www.cis.ohio-state.edu/htbin/rfc/rfc2045.html for | |
1701 // more details, only briefly implemented. But from the web... | |
1702 // Quoted-printable is an encoding method defined in the MIME | |
1703 // standard. It is used primarily to encode 8-bit text (such as text | |
1704 // that includes foreign characters) into 7-bit US ASCII, creating a | |
1705 // document that is mostly readable by humans, even in its encoded | |
1706 // form. All MIME compliant applications can decode quoted-printable | |
1707 // text, though they may not necessarily be able to properly display the | |
1708 // document as it was originally intended. As quoted-printable encoding | |
1709 // is implemented most commonly, printable ASCII characters (values 33 | |
1710 // through 126, excluding 61), tabs and spaces that do not appear at the | |
1711 // end of lines, and end-of-line characters are not encoded. Other | |
1712 // characters are represented by an equal sign (=) immediately followed | |
1713 // by that character's hexadecimal value. Lines that are longer than 76 | |
1714 // characters are shortened by line breaks, with the equal sign marking | |
1715 // where the breaks occurred. | |
1716 // | |
1717 // Update: we really want QuotedPrintableUnescape to conform to rfc2047, | |
1718 // which expands the q encoding. In particular, it specifices that _'s are | |
1719 // to be treated as spaces. | |
1720 // ---------------------------------------------------------------------- | |
1721 int QuotedPrintableUnescape(const WCHAR *source, int slen, | |
1722 WCHAR *dest, int len_dest) { | |
1723 ASSERT(dest, (L"")); | |
1724 ASSERT(source, (L"")); | |
1725 | |
1726 WCHAR* d = dest; | |
1727 const WCHAR* p = source; | |
1728 | |
1729 while (*p != '\0' && p < source+slen && d < dest+len_dest) { | |
1730 switch (*p) { | |
1731 case '=': | |
1732 if (p == source+slen-1) { | |
1733 // End of line, no need to print the =.. | |
1734 return (d-dest); | |
1735 } | |
1736 // if its valid, convert to hex and insert | |
1737 if (p < source+slen-2 && IsHexDigit(p[1]) && IsHexDigit(p[2])) { | |
1738 // lint -e{734} Loss of precision | |
1739 *d++ = static_cast<WCHAR>( | |
1740 HexDigitToInt(p[1]) * 16 + HexDigitToInt(p[2])); | |
1741 p += 3; | |
1742 } else { | |
1743 p++; | |
1744 } | |
1745 break; | |
1746 case '_': // According to rfc2047, _'s are to be treated as spaces | |
1747 *d++ = ' '; p++; | |
1748 break; | |
1749 default: | |
1750 *d++ = *p++; | |
1751 break; | |
1752 } | |
1753 } | |
1754 return (d-dest); | |
1755 } | |
1756 | |
1757 // TODO(omaha): currently set not to use IsCharUpper because that is relatively
slow | |
1758 // this is used in the QUIB; consider if we need to use IsCharUpper or a replace
ment | |
1759 bool String_IsUpper(TCHAR c) { | |
1760 return (c >= 'A' && c <= 'Z'); | |
1761 // return (IsCharUpper (c)); | |
1762 } | |
1763 | |
1764 // Replacement for the CRT toupper(c) | |
1765 int String_ToUpper(int c) { | |
1766 // If it's < 128, then convert is ourself, which is far cheaper than the syste
m conversion | |
1767 if (c < 128) | |
1768 return String_ToUpperA(static_cast<char>(c)); | |
1769 | |
1770 TCHAR * p_c = reinterpret_cast<TCHAR *>(c); | |
1771 int conv_c = reinterpret_cast<int>(::CharUpper(p_c)); | |
1772 return conv_c; | |
1773 } | |
1774 | |
1775 // Replacement for the CRT toupper(c) | |
1776 char String_ToUpperA(char c) { | |
1777 if (c >= 'a' && c <= 'z') return (c - ('a' - 'A')); | |
1778 return c; | |
1779 } | |
1780 | |
1781 void String_ToLower(TCHAR* str) { | |
1782 ASSERT1(str); | |
1783 ::CharLower(str); | |
1784 } | |
1785 | |
1786 void String_ToUpper(TCHAR* str) { | |
1787 ASSERT1(str); | |
1788 ::CharUpper(str); | |
1789 } | |
1790 | |
1791 // String comparison based on length | |
1792 // Replacement for the CRT strncmp(i) | |
1793 int String_StrNCmp(const TCHAR * str1, const TCHAR * str2, uint32 len, bool igno
re_case) { | |
1794 ASSERT(str2, (L"")); | |
1795 ASSERT(str1, (L"")); | |
1796 | |
1797 TCHAR c1, c2; | |
1798 | |
1799 if (len == 0) | |
1800 return 0; | |
1801 | |
1802 // compare each char | |
1803 // TODO(omaha): If we use a lot of case sensitive compares consider having 2 l
oops. | |
1804 do { | |
1805 c1 = *str1++; | |
1806 c2 = *str2++; | |
1807 if (ignore_case) { | |
1808 c1 = (TCHAR)String_ToLowerChar((int)(c1)); // lint !e507 Suspicious trun
cation | |
1809 c2 = (TCHAR)String_ToLowerChar((int)(c2)); // lint !e507 | |
1810 } | |
1811 } while ( (--len) && c1 && (c1 == c2) ); | |
1812 | |
1813 return (int)(c1 - c2); | |
1814 } | |
1815 | |
1816 // TODO(omaha): Why do we introduce this behaviorial difference? | |
1817 // Replacement for strncpy() - except ALWAYS ends string with null | |
1818 TCHAR* String_StrNCpy(TCHAR* destination, const TCHAR* source, uint32 len) { | |
1819 ASSERT (source, (L"")); | |
1820 ASSERT (destination, (L"")); | |
1821 | |
1822 TCHAR* result = destination; | |
1823 | |
1824 ASSERT (0 != len, (L"")); // Too short a destination for even the null cha
racter | |
1825 | |
1826 while (*source && len) { | |
1827 *destination++ = *source++; | |
1828 len--; | |
1829 } | |
1830 | |
1831 // If we ran out of space, back up one | |
1832 if (0 == len) { | |
1833 destination--; | |
1834 } | |
1835 | |
1836 // Null-terminate the string | |
1837 *destination = _T('\0'); | |
1838 | |
1839 return result; | |
1840 } | |
1841 | |
1842 // check if a string starts with another string | |
1843 bool String_StartsWith(const TCHAR *str, const TCHAR *start_str, | |
1844 bool ignore_case) { | |
1845 ASSERT(start_str, (L"")); | |
1846 ASSERT(str, (L"")); | |
1847 | |
1848 while (0 != *str) { | |
1849 // Check for matching characters | |
1850 TCHAR c1 = *str; | |
1851 TCHAR c2 = *start_str; | |
1852 | |
1853 // Reached the end of start_str? | |
1854 if (0 == c2) | |
1855 return true; | |
1856 | |
1857 if (ignore_case) { | |
1858 c1 = (TCHAR)String_ToLowerChar((int)(c1)); // lint !e507 Suspicious trun
cation | |
1859 c2 = (TCHAR)String_ToLowerChar((int)(c2)); // lint !e507 Suspicious trun
cation | |
1860 } | |
1861 | |
1862 if (c1 != c2) | |
1863 return false; | |
1864 | |
1865 ++str; | |
1866 ++start_str; | |
1867 } | |
1868 | |
1869 // If str is shorter than start_str, no match. If equal size, match. | |
1870 return 0 == *start_str; | |
1871 } | |
1872 | |
1873 // check if a string starts with another string | |
1874 bool String_StartsWithA(const char *str, const char *start_str, bool ignore_case
) { | |
1875 ASSERT(start_str, (L"")); | |
1876 ASSERT(str, (L"")); | |
1877 | |
1878 while (0 != *str) { | |
1879 // Check for matching characters | |
1880 char c1 = *str; | |
1881 char c2 = *start_str; | |
1882 | |
1883 // Reached the end of start_str? | |
1884 if (0 == c2) | |
1885 return true; | |
1886 | |
1887 if (ignore_case) { | |
1888 c1 = String_ToLowerCharAnsi(c1); | |
1889 c2 = String_ToLowerCharAnsi(c2); | |
1890 } | |
1891 | |
1892 if (c1 != c2) | |
1893 return false; | |
1894 | |
1895 ++str; | |
1896 ++start_str; | |
1897 } | |
1898 | |
1899 // If str is shorter than start_str, no match. If equal size, match. | |
1900 return 0 == *start_str; | |
1901 } | |
1902 | |
1903 // the wrapper version below actually increased code size as of 5/31/04 | |
1904 // perhaps because the int64 version is larger and in some EXE/DLLs we only need
the int32 version | |
1905 | |
1906 // converts a string to an int | |
1907 // Does not check for overflow | |
1908 // is the direct int32 version significantly faster for our usage? | |
1909 // int32 String_StringToInt(const TCHAR * str) { | |
1910 // ASSERT(str, (L"")); | |
1911 // return static_cast<int32>(String_StringToInt64 (str)); | |
1912 // } | |
1913 | |
1914 // converts a string to an int | |
1915 // Does not check for overflow | |
1916 int32 String_StringToInt(const TCHAR * str) { | |
1917 ASSERT(str, (L"")); | |
1918 | |
1919 int c; // current char | |
1920 int32 total; // current total | |
1921 int sign; // if '-', then negative, otherwise positive | |
1922 | |
1923 // remove spaces | |
1924 while ( *str == _T(' ')) | |
1925 ++str; | |
1926 | |
1927 c = (int)*str++; | |
1928 sign = c; // save sign indication | |
1929 if (c == _T('-') || c == _T('+')) | |
1930 c = (int)*str++; // skip sign | |
1931 | |
1932 total = 0; | |
1933 | |
1934 while ((c = String_CharToDigit(static_cast<TCHAR>(c))) != -1 ) { | |
1935 total = 10 * total + c; // accumulate digit | |
1936 c = *str++; // get next char | |
1937 } | |
1938 | |
1939 if (sign == '-') | |
1940 return -total; | |
1941 else | |
1942 return total; // return result, negated if necessary | |
1943 } | |
1944 | |
1945 // converts a string to an int64 | |
1946 // Does not check for overflow | |
1947 int64 String_StringToInt64(const TCHAR * str) { | |
1948 ASSERT(str, (L"")); | |
1949 | |
1950 int c; // current char | |
1951 int64 total; // current total | |
1952 int sign; | |
1953 | |
1954 while (*str == ' ') ++str; // skip space | |
1955 | |
1956 c = (int)*str++; | |
1957 sign = c; /* save sign indication */ | |
1958 if (c == '-' || c == '+') | |
1959 c = (int)*str++; | |
1960 | |
1961 total = 0; | |
1962 | |
1963 while ((c = String_CharToDigit(static_cast<TCHAR>(c))) != -1) { | |
1964 total = 10 * total + c; /* accumulate digit */ | |
1965 c = *str++; /* get next char */ | |
1966 } | |
1967 | |
1968 if (sign == '-') | |
1969 return -total; | |
1970 else | |
1971 return total; | |
1972 } | |
1973 | |
1974 // A faster version of the ::CharLower command. We first check if all characters
are in low ANSI | |
1975 // If so, we can convert it ourselves [which is about 10x faster] | |
1976 // Otherwise, ask the system to do it for us. | |
1977 TCHAR * String_FastToLower(TCHAR * str) { | |
1978 ASSERT(str, (L"")); | |
1979 | |
1980 TCHAR * p = str; | |
1981 while (*p) { | |
1982 // If we can't process it ourselves, then do it with the API | |
1983 if (*p > 127) | |
1984 return ::CharLower(str); | |
1985 ++p; | |
1986 } | |
1987 | |
1988 // If we're still here, do it ourselves | |
1989 p = str; | |
1990 while (*p) { | |
1991 // Lower case it | |
1992 if (*p >= L'A' && *p <= 'Z') | |
1993 *p |= 0x20; | |
1994 ++p; | |
1995 } | |
1996 | |
1997 return str; | |
1998 } | |
1999 | |
2000 // Convert a size_t to a CString | |
2001 CString sizet_to_str(const size_t & i) { | |
2002 CString out; | |
2003 out.Format(NOTRANSL(_T("%u")),i); | |
2004 return out; | |
2005 } | |
2006 | |
2007 // Convert an int to a CString | |
2008 CString itostr(const int i) { | |
2009 return String_Int64ToString(i, 10); | |
2010 } | |
2011 | |
2012 // Convert a uint to a CString | |
2013 CString itostr(const uint32 i) { | |
2014 return String_Int64ToString(i, 10); | |
2015 } | |
2016 | |
2017 // converts an int to a string | |
2018 // Does not check for overflow | |
2019 CString String_Int64ToString(int64 value, int radix) { | |
2020 ASSERT(radix > 0, (L"")); | |
2021 | |
2022 // Space big enough for it in binary, plus the sign | |
2023 TCHAR temp[66]; | |
2024 | |
2025 bool negative = false; | |
2026 if (value < 0) { | |
2027 negative = true; | |
2028 value = -value; | |
2029 } | |
2030 | |
2031 int pos = 0; | |
2032 | |
2033 // Add digits in reverse order | |
2034 do { | |
2035 TCHAR digit = (TCHAR) (value % radix); | |
2036 if (digit > 9) | |
2037 temp[pos] = L'a' + digit - 10; | |
2038 else | |
2039 temp[pos] = L'0' + digit; | |
2040 | |
2041 pos++; | |
2042 value /= radix; | |
2043 } while (value > 0); | |
2044 | |
2045 if (negative) | |
2046 temp[pos++] = L'-'; | |
2047 | |
2048 // Reverse it before making a CString out of it | |
2049 int start = 0, end = pos - 1; | |
2050 while (start < end) { | |
2051 TCHAR t = temp[start]; | |
2052 temp[start] = temp[end]; | |
2053 temp[end] = t; | |
2054 | |
2055 end--; | |
2056 start++; | |
2057 } | |
2058 | |
2059 return CString(temp, pos); | |
2060 } | |
2061 | |
2062 // converts an uint64 to a string | |
2063 // Does not check for overflow | |
2064 CString String_Uint64ToString(uint64 value, int radix) { | |
2065 ASSERT1(radix > 0); | |
2066 | |
2067 CString ret; | |
2068 | |
2069 const uint32 kMaxUint64Digits = 65; | |
2070 | |
2071 // Space big enough for it in binary | |
2072 TCHAR* temp = ret.GetBufferSetLength(kMaxUint64Digits); | |
2073 | |
2074 int pos = 0; | |
2075 | |
2076 // Add digits in reverse order | |
2077 do { | |
2078 TCHAR digit = static_cast<TCHAR>(value % radix); | |
2079 if (digit > 9) { | |
2080 temp[pos] = _T('a') + digit - 10; | |
2081 } else { | |
2082 temp[pos] = _T('0') + digit; | |
2083 } | |
2084 | |
2085 pos++; | |
2086 value /= radix; | |
2087 } while (value > 0 && pos < kMaxUint64Digits); | |
2088 | |
2089 ret.ReleaseBuffer(pos); | |
2090 | |
2091 // Reverse it before making a CString out of it | |
2092 ret.MakeReverse(); | |
2093 | |
2094 return ret; | |
2095 } | |
2096 | |
2097 // converts an double to a string specifies the number of digits after | |
2098 // the decimal point | |
2099 CString String_DoubleToString(double value, int point_digits) { | |
2100 int64 int_val = (int64) value; | |
2101 | |
2102 // Deal with integer part | |
2103 CString result(String_Int64ToString(int_val, 10)); | |
2104 | |
2105 if (point_digits > 0) { | |
2106 result.AppendChar(L'.'); | |
2107 | |
2108 // get the fp digits | |
2109 double rem_val = value - int_val; | |
2110 if (rem_val < 0) | |
2111 rem_val = -rem_val; | |
2112 | |
2113 // multiply w/ the requested number of significant digits | |
2114 // construct the string in place | |
2115 for(int i=0; i<point_digits; i++) { | |
2116 // TODO(omaha): I have seen 1.2 turn into 1.1999999999999, and generate th
at string. | |
2117 // We should round better. For now, I'll add a quick fix to favor high | |
2118 rem_val += 1e-12; | |
2119 rem_val *= 10; | |
2120 // Get the ones digit | |
2121 int64 int_rem_dig = std::min(10LL, static_cast<int64>(rem_val)); | |
2122 result += static_cast<TCHAR>(int_rem_dig + L'0'); | |
2123 rem_val = rem_val - int_rem_dig; | |
2124 } | |
2125 } | |
2126 | |
2127 return result; | |
2128 } | |
2129 | |
2130 double String_StringToDouble (const TCHAR *s) { | |
2131 ASSERT(s, (L"")); | |
2132 | |
2133 double value, power; | |
2134 int i = 0, sign; | |
2135 | |
2136 while (IsSpaceW(s[i])) i++; | |
2137 | |
2138 // get sign | |
2139 sign = (s[i] == '-') ? -1 : 1; | |
2140 if (s[i] == '+' || s[i] == '-') i++; | |
2141 | |
2142 for (value = 0.0; s[i] >= '0' && s[i] <= '9'; i++) | |
2143 value = 10.0 * value + (s[i] - '0'); | |
2144 | |
2145 if (s[i] == '.') i++; | |
2146 | |
2147 for (power = 1.0; s[i] >= '0' && s[i] <= '9'; i++) { | |
2148 value = 10.0 * value + (s[i] - '0'); | |
2149 power *= 10.0; | |
2150 } | |
2151 | |
2152 return sign * value / power; | |
2153 } | |
2154 | |
2155 // Converts a character to a digit | |
2156 // if the character is not a digit return -1 (same as CRT) | |
2157 int32 String_CharToDigit(const TCHAR c) { | |
2158 return ((c) >= '0' && (c) <= '9' ? (c) - '0' : -1); | |
2159 } | |
2160 | |
2161 bool String_IsDigit (const TCHAR c) { | |
2162 return ((c) >= '0' && (c) <= '9'); | |
2163 } | |
2164 | |
2165 TCHAR String_DigitToChar(unsigned int n) { | |
2166 ASSERT1(n < 10); | |
2167 return static_cast<TCHAR>(_T('0') + n % 10); | |
2168 } | |
2169 | |
2170 // Returns true if an identifier character: letter, digit, or "_" | |
2171 bool String_IsIdentifierChar(const TCHAR c) { | |
2172 return ((c >= _T('A') && c <= _T('Z')) || | |
2173 (c >= _T('a') && c <= _T('z')) || | |
2174 (c >= _T('0') && c <= _T('9')) || | |
2175 c == _T('_')); | |
2176 } | |
2177 | |
2178 // Returns true if the string has letters in it. | |
2179 // This is used by the keyword extractor to downweight numbers, | |
2180 // IDs (sequences of numbers like social security numbers), etc. | |
2181 bool String_HasAlphabetLetters (const TCHAR * str) { | |
2182 ASSERT (str, (L"")); | |
2183 | |
2184 while (*str != '\0') { | |
2185 // if (iswalpha (*str)) { | |
2186 // Note that IsCharAlpha is slower but we want to avoid the CRT | |
2187 if (IsCharAlpha (*str)) { | |
2188 return true; | |
2189 } | |
2190 ++str; | |
2191 } | |
2192 | |
2193 return false; | |
2194 } | |
2195 | |
2196 CString String_LargeIntToApproximateString(uint64 value, bool base_ten, int* pow
er) { | |
2197 uint32 to_one_decimal; | |
2198 | |
2199 uint32 gig = base_ten ? 1000000000 : (1<<30); | |
2200 uint32 gig_div_10 = base_ten ? 100000000 : (1<<30)/10; | |
2201 uint32 meg = base_ten ? 1000000 : (1<<20); | |
2202 uint32 meg_div_10 = base_ten ? 100000 : (1<<20)/10; | |
2203 uint32 kilo = base_ten ? 1000 : (1<<10); | |
2204 uint32 kilo_div_10 = base_ten ? 100 : (1<<10)/10; | |
2205 | |
2206 if (value >= gig) { | |
2207 if (power) *power = 3; | |
2208 to_one_decimal = static_cast<uint32>(value / gig_div_10); | |
2209 } else if (value >= meg) { | |
2210 if (power) *power = 2; | |
2211 to_one_decimal = static_cast<uint32>(value / meg_div_10); | |
2212 } else if (value >= kilo) { | |
2213 if (power) *power = 1; | |
2214 to_one_decimal = static_cast<uint32>(value / kilo_div_10); | |
2215 } else { | |
2216 if (power) *power = 0; | |
2217 return String_Int64ToString(static_cast<uint32>(value), 10 /*radix*/); | |
2218 } | |
2219 | |
2220 uint32 whole_part = to_one_decimal / 10; | |
2221 | |
2222 if (whole_part < 10) | |
2223 return Show(0.1 * static_cast<double>(to_one_decimal), 1); | |
2224 | |
2225 return String_Int64ToString(whole_part, 10 /*radix*/); | |
2226 } | |
2227 | |
2228 int String_FindString(const TCHAR *s1, const TCHAR *s2) { | |
2229 ASSERT(s2, (L"")); | |
2230 ASSERT(s1, (L"")); | |
2231 | |
2232 // Naive implementation, but still oodles better than ATL's implementation | |
2233 // (which deals with variable character widths---we don't). | |
2234 | |
2235 const TCHAR *found = _tcsstr(s1, s2); | |
2236 if (NULL == found) | |
2237 return -1; | |
2238 | |
2239 return found - s1; | |
2240 } | |
2241 | |
2242 int String_FindString(const TCHAR *s1, const TCHAR *s2, int start_pos) { | |
2243 ASSERT(s2, (L"")); | |
2244 ASSERT(s1, (L"")); | |
2245 | |
2246 // Naive implementation, but still oodles better than ATL's implementation | |
2247 // (which deals with variable character widths---we don't). | |
2248 | |
2249 int skip = start_pos; | |
2250 | |
2251 const TCHAR *s = s1; | |
2252 while (skip && *s) { | |
2253 ++s; | |
2254 --skip; | |
2255 } | |
2256 if (!(*s)) | |
2257 return -1; | |
2258 | |
2259 const TCHAR *found = _tcsstr(s, s2); | |
2260 if (NULL == found) | |
2261 return -1; | |
2262 | |
2263 return found - s1; | |
2264 } | |
2265 | |
2266 int String_FindChar(const TCHAR *str, const TCHAR c) { | |
2267 ASSERT (str, (L"")); | |
2268 const TCHAR *s = str; | |
2269 while (*s) { | |
2270 if (*s == c) | |
2271 return s - str; | |
2272 ++s; | |
2273 } | |
2274 | |
2275 return -1; | |
2276 } | |
2277 | |
2278 // taken from wcsrchr, modified to behave in the CString way | |
2279 int String_ReverseFindChar(const TCHAR * str,TCHAR c) { | |
2280 ASSERT (str, (L"")); | |
2281 TCHAR *start = (TCHAR *)str; | |
2282 | |
2283 while (*str++) /* find end of string */ | |
2284 ; | |
2285 /* search towards front */ | |
2286 while (--str != start && *str != (TCHAR)c) | |
2287 ; | |
2288 | |
2289 if (*str == (TCHAR)c) /* found ? */ | |
2290 return( str - start ); | |
2291 | |
2292 return -1; | |
2293 } | |
2294 | |
2295 int String_FindChar(const TCHAR *str, const TCHAR c, int start_pos) { | |
2296 ASSERT (str, (L"")); | |
2297 int n = 0; | |
2298 const TCHAR *s = str; | |
2299 while (*s) { | |
2300 if (n++ >= start_pos && *s == c) | |
2301 return s - str; | |
2302 ++s; | |
2303 } | |
2304 | |
2305 return -1; | |
2306 } | |
2307 | |
2308 bool String_Contains(const TCHAR *s1, const TCHAR *s2) { | |
2309 ASSERT(s2, (L"")); | |
2310 ASSERT(s1, (L"")); | |
2311 | |
2312 return -1 != String_FindString(s1, s2); | |
2313 } | |
2314 | |
2315 void String_ReplaceChar(TCHAR *str, TCHAR old_char, TCHAR new_char) { | |
2316 ASSERT (str, (L"")); | |
2317 while (*str) { | |
2318 if (*str == old_char) | |
2319 *str = new_char; | |
2320 | |
2321 ++str; | |
2322 } | |
2323 } | |
2324 | |
2325 void String_ReplaceChar(CString & str, TCHAR old_char, TCHAR new_char) { | |
2326 String_ReplaceChar (str.GetBuffer(), old_char, new_char); | |
2327 str.ReleaseBuffer(); | |
2328 } | |
2329 | |
2330 int ReplaceCString (CString & src, const TCHAR *from, const TCHAR *to) { | |
2331 ASSERT(to, (L"")); | |
2332 ASSERT(from, (L"")); | |
2333 | |
2334 return ReplaceCString(src, from, lstrlen(from), to, lstrlen(to), kRepMax); | |
2335 } | |
2336 | |
2337 // A special version of the replace function which takes advantage of CString pr
operties | |
2338 // to make it much faster when the string grows | |
2339 // 1) It will resize the string in place if possible. Even if it has to 'grow' t
he string | |
2340 // 2) It will cutoff after a maximum number of matches | |
2341 // 3) It expects sizing data to be passed to it | |
2342 int ReplaceCString (CString & src, const TCHAR *from, unsigned int from_len, | |
2343 const TCHAR *to, unsigned int to_len, | |
2344 unsigned int max_matches) { | |
2345 ASSERT (from, (L"")); | |
2346 ASSERT (to, (L"")); | |
2347 ASSERT (from[0] != '\0', (L"")); | |
2348 int i = 0, j = 0; | |
2349 unsigned int matches = 0; | |
2350 | |
2351 // Keep track of the matches, it's easier than recalculating them | |
2352 unsigned int match_pos_stack[kExpectedMaxReplaceMatches]; | |
2353 | |
2354 // We might need to dynamically allocate space for the matches | |
2355 bool dynamic_allocate = false; | |
2356 unsigned int * match_pos = (unsigned int*)match_pos_stack; | |
2357 unsigned int max_match_size = kExpectedMaxReplaceMatches; | |
2358 | |
2359 // Is the string getting bigger? | |
2360 bool longer = to_len > from_len; | |
2361 | |
2362 // don't compute the lengths unless we know we need to | |
2363 int src_len = src.GetLength(); | |
2364 int cur_len = src_len; | |
2365 | |
2366 // Trick: We temporarily add 1 extra character to the string. The first char f
rom the from | |
2367 // string. This way we can avoid searching for NULL, since we are guaranteed t
o find it | |
2368 TCHAR * buffer = src.GetBufferSetLength(src_len+1); | |
2369 const TCHAR from_0 = from[0]; | |
2370 buffer[src_len] = from[0]; | |
2371 | |
2372 while (i < cur_len) { | |
2373 // If we have too many matches, then re-allocate to a dynamic buffer that is | |
2374 // twice as big as the one we are currently using | |
2375 if (longer && (matches == max_match_size)) { | |
2376 // Double the buffer size, and copy it over | |
2377 unsigned int * temp = new unsigned int[max_match_size * 2]; | |
2378 memcpy(temp, match_pos, matches * sizeof(unsigned int)); | |
2379 if (dynamic_allocate) | |
2380 delete [] match_pos; // lint !e424 Inappropriate deallocation | |
2381 match_pos = temp; | |
2382 | |
2383 max_match_size *= 2; | |
2384 dynamic_allocate = true; | |
2385 } | |
2386 | |
2387 // If we have the maximum number of matches already, then stop | |
2388 if (matches >= max_matches) { | |
2389 break; | |
2390 } | |
2391 | |
2392 // For each potential match | |
2393 // Note: oddly enough, this is the most expensive line in the function under
normal usage. So I am optimizing the heck out of it | |
2394 TCHAR * buf_ptr = buffer + i; | |
2395 while (*buf_ptr != from_0) { ++buf_ptr; } | |
2396 i = buf_ptr - buffer; | |
2397 | |
2398 // We're done! | |
2399 if (i >= cur_len) | |
2400 break; | |
2401 | |
2402 // buffer is not NULL terminated, we replaced the NULL above | |
2403 while (i < cur_len && buffer[i] && buffer[i] == from[j]) { | |
2404 ++i; ++j; | |
2405 if (from[j] == '\0') { // found match | |
2406 | |
2407 if (!longer) { // modify in place | |
2408 | |
2409 memcpy ((byte *)(buffer+i) - (sizeof (TCHAR) * from_len), (byte *)to,
sizeof (TCHAR) * to_len); | |
2410 // if there are often a lot of replacements, it would be faster to cre
ate a new string instead | |
2411 // of using memmove | |
2412 | |
2413 // TODO(omaha): - memmove will cause n^2 behavior in strings with mult
iple matches since it will be moved many times... | |
2414 if (to_len < from_len) { memmove ((byte *)(buffer+i) - (sizeof (TCHAR)
* (from_len - to_len)), | |
2415 (byte *)(buffer+i), (src_len - i + 1)
* sizeof (TCHAR)); } | |
2416 | |
2417 i -= (from_len - to_len); | |
2418 cur_len -= (from_len - to_len); | |
2419 } | |
2420 else | |
2421 match_pos[matches] = i - from_len; | |
2422 | |
2423 ++matches; | |
2424 | |
2425 break; | |
2426 } | |
2427 } | |
2428 | |
2429 j = 0; | |
2430 } | |
2431 | |
2432 if (to_len <= from_len) | |
2433 src_len -= matches * (from_len - to_len); | |
2434 | |
2435 // if the new string is longer we do another pass now that we know how long th
e new string needs to be | |
2436 if (matches && to_len > from_len) { | |
2437 src.ReleaseBuffer(src_len); | |
2438 | |
2439 int new_len = src_len + matches * (to_len - from_len); | |
2440 buffer = src.GetBufferSetLength(new_len); | |
2441 | |
2442 // It's easier to assemble it backwards... | |
2443 int temp_end = new_len; | |
2444 for(i = matches-1; i >= 0; --i) { | |
2445 // Figure out where the trailing portion isthe trailing portion | |
2446 int len = src_len - match_pos[i] - from_len; | |
2447 int start = match_pos[i] + from_len; | |
2448 int dest = temp_end - len; | |
2449 memmove(buffer+dest, buffer+start, (len) * sizeof(TCHAR)); | |
2450 | |
2451 // copy the new item | |
2452 memcpy(buffer + dest - to_len, to, to_len * sizeof(TCHAR)); | |
2453 | |
2454 // Update the pointers | |
2455 temp_end = dest - to_len; | |
2456 src_len = match_pos[i]; | |
2457 | |
2458 } | |
2459 src_len = new_len; | |
2460 } | |
2461 | |
2462 src.ReleaseBuffer(src_len); | |
2463 if (dynamic_allocate) | |
2464 delete [] match_pos; // lint !e673 Possibly inappropriate deallocation | |
2465 | |
2466 return matches; | |
2467 } | |
2468 | |
2469 /* | |
2470 The following 2 functions will do replacement on TCHAR* directly. They is cur
rently unused. | |
2471 Feel free to put it back if you need to. | |
2472 */ | |
2473 int ReplaceString (TCHAR *src, const TCHAR *from, const TCHAR *to, TCHAR **out,
int *out_len) { | |
2474 ASSERT(out_len, (L"")); | |
2475 ASSERT(out, (L"")); | |
2476 ASSERT(to, (L"")); | |
2477 ASSERT(from, (L"")); | |
2478 ASSERT(src, (L"")); | |
2479 | |
2480 bool created_new_string; | |
2481 int matches = ReplaceStringMaybeInPlace (src, from, to, out, out_len, &created
_new_string); | |
2482 if (!created_new_string) { | |
2483 *out = new TCHAR [(*out_len)+1]; | |
2484 if (!(*out)) { *out = src; return 0; } | |
2485 _tcscpy_s(*out, *out_len + 1, src); | |
2486 } | |
2487 | |
2488 return matches; | |
2489 } | |
2490 | |
2491 int ReplaceStringMaybeInPlace (TCHAR *src, const TCHAR *from, const TCHAR *to, T
CHAR **out, int *out_len, bool *created_new_string) { | |
2492 ASSERT (created_new_string, (L"")); | |
2493 ASSERT (out_len, (L"")); | |
2494 ASSERT (src, (L"")); | |
2495 ASSERT (from, (L"")); | |
2496 ASSERT (to, (L"")); | |
2497 ASSERT (out, (L"")); | |
2498 ASSERT (from[0] != '\0', (L"")); | |
2499 int i = 0, j = 0; | |
2500 int matches = 0; | |
2501 | |
2502 // don't compute the lengths unless we know we need to | |
2503 int from_len = -1, to_len = -1, src_len = -1; | |
2504 | |
2505 *created_new_string = false; | |
2506 *out = src; | |
2507 | |
2508 while (src[i]) { | |
2509 while (src[i] && src[i] != from[0]) { i++; } | |
2510 while (src[i] && src[i] == from[j]) { | |
2511 i++; j++; | |
2512 if (from[j] == '\0') { // found match | |
2513 if (from_len == -1) { // compute lengths if not known | |
2514 from_len = lstrlen (from); | |
2515 to_len = lstrlen (to); | |
2516 src_len = lstrlen (src); | |
2517 } | |
2518 | |
2519 matches++; | |
2520 | |
2521 if (to_len <= from_len) { // modify in place | |
2522 memcpy ((byte *)(src+i) - (sizeof (TCHAR) * from_len), (byte *)to, siz
eof (TCHAR) * to_len); | |
2523 // if there are often a lot of replacements, it would be faster to cre
ate a new string instead | |
2524 // of using memmove | |
2525 if (to_len < from_len) { memmove ((byte *)(src+i) - (sizeof (TCHAR) *
(from_len - to_len)), | |
2526 (byte *)(src+i), (src_len - i + 1) *
sizeof (TCHAR)); } | |
2527 i -= (from_len - to_len); | |
2528 } | |
2529 | |
2530 break; | |
2531 } | |
2532 } | |
2533 | |
2534 j = 0; | |
2535 } | |
2536 | |
2537 *out_len = i; | |
2538 | |
2539 // if the new string is longer we do another pass now that we know how long th
e new string needs to be | |
2540 if (matches && to_len > from_len) { | |
2541 ASSERT (src_len == i, (L"")); | |
2542 int new_len = src_len + matches * (to_len - from_len); | |
2543 *out = new TCHAR [new_len+1]; | |
2544 if (!(*out)) { *out = src; *out_len = lstrlen (src); return 0; } | |
2545 *created_new_string = true; | |
2546 i = 0; j = 0; int k = 0; | |
2547 | |
2548 while (src[i]) { | |
2549 while (src[i] && src[i] != from[0]) { | |
2550 (*out)[k++] = src[i++]; | |
2551 } | |
2552 while (src[i] && src[i] == from[j]) { | |
2553 (*out)[k++] = src[i++]; | |
2554 j++; | |
2555 | |
2556 if (from[j] == '\0') { // found match | |
2557 k -= from_len; | |
2558 ASSERT (k >= 0, (L"")); | |
2559 memcpy ((byte *)((*out)+k), (byte *)to, sizeof (TCHAR) * to_le
n); | |
2560 k += to_len; | |
2561 break; | |
2562 } | |
2563 } | |
2564 | |
2565 j = 0; | |
2566 } | |
2567 | |
2568 (*out)[k] = '\0'; | |
2569 ASSERT (k == new_len, (L"")); | |
2570 *out_len = new_len; | |
2571 } | |
2572 | |
2573 return matches; | |
2574 } | |
2575 | |
2576 /**************************************************************************** | |
2577 * wcstol, wcstoul(nptr,endptr,ibase) - Convert ascii string to long un/signed in
t. | |
2578 * | |
2579 * modified from: | |
2580 * | |
2581 * wcstol.c - Contains C runtimes wcstol and wcstoul | |
2582 * | |
2583 * Copyright (c) Microsoft Corporation. All rights reserved. | |
2584 * | |
2585 * Purpose: | |
2586 * Convert an ascii string to a long 32-bit value. The base | |
2587 * used for the caculations is supplied by the caller. The base | |
2588 * must be in the range 0, 2-36. If a base of 0 is supplied, the | |
2589 * ascii string must be examined to determine the base of the | |
2590 * number: | |
2591 * (a) First char = '0', second char = 'x' or 'X', | |
2592 * use base 16. | |
2593 * (b) First char = '0', use base 8 | |
2594 * (c) First char in range '1' - '9', use base 10. | |
2595 * | |
2596 * If the 'endptr' value is non-NULL, then wcstol/wcstoul places | |
2597 * a pointer to the terminating character in this value. | |
2598 * See ANSI standard for details | |
2599 * | |
2600 *Entry: | |
2601 * nptr == NEAR/FAR pointer to the start of string. | |
2602 * endptr == NEAR/FAR pointer to the end of the string. | |
2603 * ibase == integer base to use for the calculations. | |
2604 * | |
2605 * string format: [whitespace] [sign] [0] [x] [digits/letters] | |
2606 * | |
2607 *Exit: | |
2608 * Good return: | |
2609 * result | |
2610 * | |
2611 * Overflow return: | |
2612 * wcstol -- LONG_MAX or LONG_MIN | |
2613 * wcstoul -- ULONG_MAX | |
2614 * wcstol/wcstoul -- errno == ERANGE | |
2615 * | |
2616 * No digits or bad base return: | |
2617 * 0 | |
2618 * endptr = nptr* | |
2619 * | |
2620 *Exceptions: | |
2621 * None. | |
2622 * | |
2623 *******************************************************************************/ | |
2624 | |
2625 // flag values */ | |
2626 #define kFlUnsigned (1) // wcstoul called */ | |
2627 #define kFlNeg (2) // negative sign found */ | |
2628 #define kFlOverflow (4) // overflow occured */ | |
2629 #define kFlReaddigit (8) // we've read at least one correct digit */ | |
2630 | |
2631 static unsigned long __cdecl wcstoxl (const wchar_t *nptr, wchar_t **endptr, int
ibase, int flags) { | |
2632 ASSERT(nptr, (L"")); | |
2633 | |
2634 const wchar_t *p; | |
2635 wchar_t c; | |
2636 unsigned long number; | |
2637 unsigned digval; | |
2638 unsigned long maxval; | |
2639 // #ifdef _MT | |
2640 // pthreadlocinfo ptloci = _getptd()->ptlocinfo; | |
2641 | |
2642 // if ( ptloci != __ptlocinfo ) | |
2643 // ptloci = __updatetlocinfo(); | |
2644 // #endif // _MT */ | |
2645 | |
2646 p = nptr; // p is our scanning pointer */ | |
2647 number = 0; // start with zero */ | |
2648 | |
2649 c = *p++; // read char */ | |
2650 | |
2651 // #ifdef _MT | |
2652 // while ( __iswspace_mt(ptloci, c) ) | |
2653 // #else // _MT */ | |
2654 while (c == ' ') | |
2655 // while ( iswspace(c) ) | |
2656 // #endif // _MT */ | |
2657 c = *p++; // skip whitespace */ | |
2658 | |
2659 if (c == '-') { | |
2660 flags |= kFlNeg; // remember minus sign */ | |
2661 c = *p++; | |
2662 } | |
2663 else if (c == '+') | |
2664 c = *p++; // skip sign */ | |
2665 | |
2666 if (ibase < 0 || ibase == 1 || ibase > 36) { | |
2667 // bad base! */ | |
2668 if (endptr) | |
2669 // store beginning of string in endptr */ | |
2670 *endptr = const_cast<wchar_t *>(nptr); | |
2671 return 0L; // return 0 */ | |
2672 } | |
2673 else if (ibase == 0) { | |
2674 // determine base free-lance, based on first two chars of | |
2675 // string */ | |
2676 if (String_CharToDigit(c) != 0) | |
2677 ibase = 10; | |
2678 else if (*p == L'x' || *p == L'X') | |
2679 ibase = 16; | |
2680 else | |
2681 ibase = 8; | |
2682 } | |
2683 | |
2684 if (ibase == 16) { | |
2685 // we might have 0x in front of number; remove if there */ | |
2686 if (String_CharToDigit(c) == 0 && (*p == L'x' || *p == L'X')) { | |
2687 ++p; | |
2688 c = *p++; // advance past prefix */ | |
2689 } | |
2690 } | |
2691 | |
2692 // if our number exceeds this, we will overflow on multiply */ | |
2693 maxval = ULONG_MAX / ibase; | |
2694 | |
2695 for (;;) { // exit in middle of loop */ | |
2696 | |
2697 // convert c to value */ | |
2698 if ( (digval = String_CharToDigit(c)) != (unsigned) -1 ) | |
2699 ; | |
2700 else if (c >= 'A' && c <= 'F') { digval = c - 'A' + 10; } | |
2701 else if (c >= 'a' && c <= 'f') { digval = c - 'a' + 10; } | |
2702 // else if ( __ascii_iswalpha(c)) | |
2703 // digval = __ascii_towupper(c) - L'A' + 10; | |
2704 else | |
2705 break; | |
2706 | |
2707 if (digval >= (unsigned)ibase) | |
2708 break; // exit loop if bad digit found */ | |
2709 | |
2710 // record the fact we have read one digit */ | |
2711 flags |= kFlReaddigit; | |
2712 | |
2713 // we now need to compute number = number * base + digval, | |
2714 // but we need to know if overflow occured. This requires | |
2715 // a tricky pre-check. */ | |
2716 | |
2717 if (number < maxval || (number == maxval && | |
2718 (unsigned long)digval <= ULONG_MAX % ibase)) { | |
2719 // we won't overflow, go ahead and multiply */ | |
2720 number = number * ibase + digval; | |
2721 } | |
2722 else { | |
2723 // we would have overflowed -- set the overflow flag */ | |
2724 flags |= kFlOverflow; | |
2725 } | |
2726 | |
2727 c = *p++; // read next digit */ | |
2728 } | |
2729 | |
2730 --p; // point to place that stopped scan */ | |
2731 | |
2732 if (!(flags & kFlReaddigit)) { | |
2733 // no number there; return 0 and point to beginning of string */ | |
2734 if (endptr) | |
2735 // store beginning of string in endptr later on */ | |
2736 p = nptr; | |
2737 number = 0L; // return 0 */ | |
2738 } | |
2739 // lint -save -e648 -e650 Overflow in -LONG_MIN | |
2740 #pragma warning(push) | |
2741 // C4287 : unsigned/negative constant mismatch. | |
2742 // The offending expression is number > -LONG_MIN. -LONG_MIN overflows and | |
2743 // technically -LONG_MIN == LONG_MIN == 0x80000000. It should actually | |
2744 // result in a compiler warning, such as C4307: integral constant overflow. | |
2745 // Anyway, in the expression (number > -LONG_MIN) the right operand is converted | |
2746 // to unsigned long, so the expression is actually evaluated as | |
2747 // number > 0x80000000UL. The code is probably correct but subtle, to say the | |
2748 // least. | |
2749 #pragma warning(disable : 4287) | |
2750 else if ( (flags & kFlOverflow) || | |
2751 ( !(flags & kFlUnsigned) && | |
2752 ( ( (flags & kFlNeg) && (number > -LONG_MIN) ) || | |
2753 ( !(flags & kFlNeg) && (number > LONG_MAX) ) ) ) ) | |
2754 { | |
2755 // overflow or signed overflow occurred */ | |
2756 // errno = ERANGE; | |
2757 if ( flags & kFlUnsigned ) | |
2758 number = ULONG_MAX; | |
2759 else if ( flags & kFlNeg ) | |
2760 // lint -e{648, 650} Overflow in -LONG_MIN | |
2761 number = (unsigned long)(-LONG_MIN); | |
2762 else | |
2763 number = LONG_MAX; | |
2764 } | |
2765 #pragma warning(pop) | |
2766 // lint -restore | |
2767 | |
2768 if (endptr != NULL) | |
2769 // store pointer to char that stopped the scan */ | |
2770 *endptr = const_cast<wchar_t *>(p); | |
2771 | |
2772 if (flags & kFlNeg) | |
2773 // negate result if there was a neg sign */ | |
2774 number = (unsigned long)(-(long)number); | |
2775 | |
2776 return number; // done. */ | |
2777 } | |
2778 | |
2779 long __cdecl Wcstol (const wchar_t *nptr, wchar_t **endptr, int ibase) { | |
2780 ASSERT(endptr, (L"")); | |
2781 ASSERT(nptr, (L"")); | |
2782 | |
2783 return (long) wcstoxl(nptr, endptr, ibase, 0); | |
2784 } | |
2785 | |
2786 unsigned long __cdecl Wcstoul (const wchar_t *nptr, wchar_t **endptr, int ibase)
{ | |
2787 // endptr may be NULL | |
2788 ASSERT(nptr, (L"")); | |
2789 | |
2790 return wcstoxl(nptr, endptr, ibase, kFlUnsigned); | |
2791 } | |
2792 | |
2793 // Functions on arrays of strings | |
2794 | |
2795 // Returns true iff s is in the array strings (case-insensitive compare) | |
2796 bool String_MemberOf(const TCHAR* const* strings, const TCHAR* s) { | |
2797 ASSERT(s, (L"")); | |
2798 // strings may be NULL | |
2799 | |
2800 const int s_length = lstrlen(s); | |
2801 if (strings == NULL) | |
2802 return false; | |
2803 for (; *strings != NULL; strings++) { | |
2804 if (0 == String_StrNCmp(*strings, s, s_length, true)) { | |
2805 return true; // Found equal string | |
2806 } | |
2807 } | |
2808 return false; | |
2809 } | |
2810 | |
2811 // Returns index of s in the array of strings (or -1 for missing) (case-insensit
ive compare) | |
2812 int String_IndexOf(const TCHAR* const* strings, const TCHAR* s) { | |
2813 ASSERT(s, (L"")); | |
2814 // strings may be NULL | |
2815 | |
2816 const int s_length = lstrlen(s); | |
2817 if (strings == NULL) | |
2818 return -1; | |
2819 for (int i = 0; *strings != NULL; i++, strings++) { | |
2820 if (0 == String_StrNCmp(*strings, s, s_length, true)) { | |
2821 return i; // Found equal string | |
2822 } | |
2823 } | |
2824 return -1; | |
2825 } | |
2826 | |
2827 // The internal format is a int64. | |
2828 time64 StringToTime(const CString & time) { | |
2829 return static_cast<time64>(String_StringToInt64(time)); | |
2830 } | |
2831 | |
2832 // See above comment from StringToTime. | |
2833 // Just show it as a INT64 for now | |
2834 // NOTE: this will truncating it to INT64, which may lop off some times in the f
uture | |
2835 CString TimeToString(const time64 & time) { | |
2836 return String_Int64ToString(static_cast<int64>(time), 10); | |
2837 } | |
2838 | |
2839 const TCHAR *FindStringASpaceStringB (const TCHAR *s, const TCHAR *a, const TCHA
R *b) { | |
2840 ASSERT(s, (L"")); | |
2841 ASSERT(a, (L"")); | |
2842 ASSERT(b, (L"")); | |
2843 | |
2844 const TCHAR *search_from = s; | |
2845 const TCHAR *pos; | |
2846 while (*search_from && (pos = stristrW (search_from, a)) != NULL) { | |
2847 const TCHAR *start = pos; | |
2848 pos += lstrlen(a); | |
2849 search_from = pos; | |
2850 while (*pos == ' ' || *pos == '\t') pos++; | |
2851 if (!String_StrNCmp (pos, b, lstrlen(b), true)) return start; | |
2852 } | |
2853 | |
2854 return 0; | |
2855 } | |
2856 | |
2857 bool IsAlphaA (const char c) { | |
2858 return ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')); | |
2859 } | |
2860 | |
2861 bool IsDigitA (const char c) { | |
2862 return (c >= '0' && c <= '9'); | |
2863 } | |
2864 | |
2865 void SafeStrCat (TCHAR *dest, const TCHAR *src, int dest_buffer_len) { | |
2866 _tcscat_s(dest, dest_buffer_len, src); | |
2867 } | |
2868 | |
2869 // extracts next float in a string | |
2870 // skips any non-digit characters | |
2871 // return position after end of float | |
2872 const TCHAR *ExtractNextDouble (const TCHAR *s, double *f) { | |
2873 ASSERT (f, (L"")); | |
2874 ASSERT (s, (L"")); | |
2875 | |
2876 CString num; | |
2877 while (*s && !String_IsDigit (*s)) s++; | |
2878 while (*s && (*s == '.' || String_IsDigit (*s))) { num += *s; s++; } | |
2879 ASSERT (num.GetLength(), (L"")); | |
2880 *f = String_StringToDouble (num); | |
2881 return s; | |
2882 } | |
2883 | |
2884 TCHAR *String_PathFindExtension(const TCHAR *path) { | |
2885 ASSERT(path, (L"")); | |
2886 | |
2887 // Documentation says PathFindExtension string must be of max length | |
2888 // MAX_PATH but a trusted tester hit the ASSERT and we don't really | |
2889 // need it here, so commented out. We can't address where it is | |
2890 // called because it's called from ATL code. | |
2891 // ASSERT(lstrlen(path)<=MAX_PATH, (L"")); | |
2892 | |
2893 // point to terminating NULL | |
2894 const TCHAR *ret = path + lstrlen(path); | |
2895 const TCHAR *pos = ret; | |
2896 | |
2897 while (--pos >= path) { | |
2898 if (*pos == '.') | |
2899 return const_cast<TCHAR *>(pos); | |
2900 } | |
2901 | |
2902 return const_cast<TCHAR *>(ret); | |
2903 } | |
2904 | |
2905 char String_ToLowerCharAnsi(char c) { | |
2906 if (c >= 'A' && c <= 'Z') return (c + ('a' - 'A')); | |
2907 return c; | |
2908 } | |
2909 | |
2910 int String_ToLowerChar(int c) { | |
2911 // If it's < 128, then convert is ourself, which is far cheaper than the syste
m conversion | |
2912 if (c < 128) | |
2913 return String_ToLowerCharAnsi(static_cast<char>(c)); | |
2914 | |
2915 return Char_ToLower(static_cast<TCHAR>(c)); | |
2916 } | |
2917 | |
2918 | |
2919 bool String_PathRemoveFileSpec(TCHAR *path) { | |
2920 ASSERT (path, (L"")); | |
2921 | |
2922 int len, pos; | |
2923 len = pos = lstrlen (path); | |
2924 | |
2925 // You might think that the SHLWAPI API does not change "c:\windows" -> "c:\" | |
2926 // when c:\windows is a directory, but it does. | |
2927 | |
2928 // If we don't want to match this weird API we can use the following to check | |
2929 // for directories: | |
2930 | |
2931 // Check if we are already a directory. | |
2932 WIN32_FILE_ATTRIBUTE_DATA attrs; | |
2933 // Failure (if file does not exist) is OK. | |
2934 BOOL success = GetFileAttributesEx(path, GetFileExInfoStandard, &attrs); | |
2935 UTIL_LOG(L4, (_T("[String_PathRemoveFileSpec][path %s][success %d][dir %d]"), | |
2936 path, | |
2937 success, | |
2938 attrs.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)); | |
2939 if (success && (attrs.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) { | |
2940 // Remove trailing backslash, if any. | |
2941 if (path[pos-1] == '\\') | |
2942 path[pos-1] = '\0'; | |
2943 return 1; | |
2944 } | |
2945 | |
2946 // Find last backslash. | |
2947 while (pos && path[pos] != '\\') pos--; | |
2948 if (!pos && path[pos] != '\\') return 0; | |
2949 | |
2950 ASSERT (pos < len, (L"")); | |
2951 | |
2952 // The documentation says it removes backslash but it doesn't for c:\. | |
2953 if (!pos || path[pos-1] == ':' || (pos == 1 && path[0] == '\\')) | |
2954 // Keep the backslash in this case. | |
2955 path[pos+1] = '\0'; | |
2956 else | |
2957 path[pos] = '\0'; | |
2958 | |
2959 return 1; | |
2960 } | |
2961 | |
2962 void String_EndWithChar(TCHAR *str, TCHAR c) { | |
2963 ASSERT (str, (L"")); | |
2964 int len = lstrlen(str); | |
2965 if (len == 0 || str[len - 1] != c) { | |
2966 str[len] = c; | |
2967 str[len + 1] = 0; | |
2968 } | |
2969 } | |
2970 | |
2971 bool StartsWithBOM(const TCHAR* string) { | |
2972 ASSERT(string, (L"")); | |
2973 wchar_t c = string[0]; | |
2974 if (c == 0xFFFE || c == 0xFEFF) | |
2975 return true; | |
2976 else | |
2977 return false; | |
2978 } | |
2979 | |
2980 const TCHAR* StringAfterBOM(const TCHAR* string) { | |
2981 ASSERT(string, (L"")); | |
2982 return &string[StartsWithBOM(string) ? 1 : 0]; | |
2983 } | |
2984 | |
2985 bool String_StringToDecimalIntChecked(const TCHAR* str, int* value) { | |
2986 ASSERT1(str); | |
2987 ASSERT1(value); | |
2988 | |
2989 if (_set_errno(0)) { | |
2990 return false; | |
2991 } | |
2992 | |
2993 TCHAR* end_ptr = NULL; | |
2994 *value = _tcstol(str, &end_ptr, 10); | |
2995 ASSERT1(end_ptr); | |
2996 | |
2997 if (errno) { | |
2998 ASSERT1(ERANGE == errno); | |
2999 // Overflow or underflow. | |
3000 return false; | |
3001 } else if (*value == 0) { | |
3002 // The value returned could be an error code. tcsltol returns | |
3003 // zero when it cannot convert the string. However we need to | |
3004 // distinguish a real zero. Thus check to see if end_ptr is not the start | |
3005 // of the string (str is not an empty string) and is pointing to a '\0'. | |
3006 // If not, we have an error. | |
3007 if ((str == end_ptr) || (*end_ptr != '\0')) { | |
3008 return false; | |
3009 } | |
3010 } else if (*end_ptr != '\0') { | |
3011 // The end_ptr is pointing at a character that is | |
3012 // not the end of the string. Only part of the string could be converted. | |
3013 return false; | |
3014 } | |
3015 | |
3016 return true; | |
3017 } | |
3018 | |
3019 bool CLSIDToCString(const GUID& guid, CString* str) { | |
3020 ASSERT(str, (L"")); | |
3021 | |
3022 LPOLESTR string_guid = NULL; | |
3023 if (::StringFromCLSID(guid, &string_guid) != S_OK) { | |
3024 return false; | |
3025 } | |
3026 *str = string_guid; | |
3027 ::CoTaskMemFree(string_guid); | |
3028 | |
3029 return true; | |
3030 } | |
3031 | |
3032 HRESULT String_StringToBool(const TCHAR* str, bool* value) { | |
3033 ASSERT1(str); | |
3034 ASSERT1(value); | |
3035 | |
3036 // This method now performs a case-insentitive | |
3037 // culture aware compare. We should however be ok as we are only comparing | |
3038 // latin characters. | |
3039 if (_tcsicmp(kFalse, str) == 0) { | |
3040 *value = false; | |
3041 } else if (_tcsicmp(kTrue, str) == 0) { | |
3042 *value = true; | |
3043 } else { | |
3044 // we found another string. should error out. | |
3045 return E_FAIL; | |
3046 } | |
3047 return S_OK; | |
3048 } | |
3049 | |
3050 HRESULT String_BoolToString(bool value, CString* string) { | |
3051 ASSERT1(string); | |
3052 *string = value ? kTrue : kFalse; | |
3053 return S_OK; | |
3054 } | |
3055 | |
3056 CString String_ReplaceIgnoreCase(const CString& string, | |
3057 const CString& token, | |
3058 const CString& replacement) { | |
3059 int token_length = token.GetLength(); | |
3060 if (!token_length) { | |
3061 return string; | |
3062 } | |
3063 | |
3064 CString string_lowercase(string); | |
3065 CString token_lowercase(token); | |
3066 string_lowercase.MakeLower(); | |
3067 token_lowercase.MakeLower(); | |
3068 | |
3069 CString output(string); | |
3070 int replacement_length = replacement.GetLength(); | |
3071 | |
3072 int index = 0; | |
3073 int output_index = 0; | |
3074 | |
3075 for (int new_index = 0; | |
3076 (new_index = string_lowercase.Find(token_lowercase, index)) != -1; | |
3077 index = new_index + token_length) { | |
3078 output_index += new_index - index; | |
3079 output.Delete(output_index, token_length); | |
3080 output.Insert(output_index, replacement); | |
3081 output_index += replacement_length; | |
3082 } | |
3083 | |
3084 return output; | |
3085 } | |
3086 | |
3087 // Escape and unescape strings (shlwapi-based implementation). | |
3088 // The intended usage for these APIs is escaping strings to make up | |
3089 // URLs, for example building query strings. | |
3090 // | |
3091 // Pass false to the flag segment_only to escape the url. This will not | |
3092 // cause the conversion of the # (%23), ? (%3F), and / (%2F) characters. | |
3093 | |
3094 // Characters that must be encoded include any characters that have no | |
3095 // corresponding graphic character in the US-ASCII coded character | |
3096 // set (hexadecimal 80-FF, which are not used in the US-ASCII coded character | |
3097 // set, and hexadecimal 00-1F and 7F, which are control characters), | |
3098 // blank spaces, "%" (which is used to encode other characters), | |
3099 // and unsafe characters (<, >, ", #, {, }, |, \, ^, ~, [, ], and '). | |
3100 // | |
3101 // The input and output strings can't be longer than INTERNET_MAX_URL_LENGTH | |
3102 | |
3103 HRESULT StringEscape(const CString& str_in, | |
3104 bool segment_only, | |
3105 CString* str_out) { | |
3106 ASSERT1(str_out); | |
3107 ASSERT1(str_in.GetLength() < INTERNET_MAX_URL_LENGTH); | |
3108 | |
3109 DWORD buf_len = INTERNET_MAX_URL_LENGTH + 1; | |
3110 HRESULT hr = ::UrlEscape(str_in, str_out->GetBufferSetLength(buf_len), &buf_le
n, | |
3111 segment_only ? URL_ESCAPE_PERCENT | URL_ESCAPE_SEGMENT_ONLY : URL_ESCAPE_PER
CENT); | |
3112 if (SUCCEEDED(hr)) { | |
3113 str_out->ReleaseBuffer(); | |
3114 ASSERT1(buf_len <= INTERNET_MAX_URL_LENGTH); | |
3115 } | |
3116 return hr; | |
3117 } | |
3118 | |
3119 HRESULT StringUnescape(const CString& str_in, CString* str_out) { | |
3120 ASSERT1(str_out); | |
3121 ASSERT1(str_in.GetLength() < INTERNET_MAX_URL_LENGTH); | |
3122 | |
3123 DWORD buf_len = INTERNET_MAX_URL_LENGTH + 1; | |
3124 HRESULT hr = ::UrlUnescape(const_cast<TCHAR*>(str_in.GetString()), | |
3125 str_out->GetBufferSetLength(buf_len), &buf_len, 0); | |
3126 if (SUCCEEDED(hr)) { | |
3127 str_out->ReleaseBuffer(buf_len + 1); | |
3128 ASSERT1(buf_len <= INTERNET_MAX_URL_LENGTH); | |
3129 } | |
3130 return hr; | |
3131 } | |
3132 | |
3133 bool String_StringToTristate(const TCHAR* str, Tristate* value) { | |
3134 ASSERT1(str); | |
3135 ASSERT1(value); | |
3136 | |
3137 int numerical_value = 0; | |
3138 if (!String_StringToDecimalIntChecked(str, &numerical_value)) { | |
3139 return false; | |
3140 } | |
3141 | |
3142 switch (numerical_value) { | |
3143 case 0: | |
3144 *value = TRISTATE_FALSE; | |
3145 break; | |
3146 case 1: | |
3147 *value = TRISTATE_TRUE; | |
3148 break; | |
3149 case 2: | |
3150 *value = TRISTATE_NONE; | |
3151 break; | |
3152 default: | |
3153 return false; | |
3154 } | |
3155 | |
3156 return true; | |
3157 } | |
3158 | |
3159 // Extracts the name and value from a string that contains a name/value pair. | |
3160 bool ParseNameValuePair(const CString& token, | |
3161 TCHAR separator, | |
3162 CString* name, | |
3163 CString* value) { | |
3164 ASSERT1(name); | |
3165 ASSERT1(value); | |
3166 | |
3167 int separator_index = token.Find(separator); | |
3168 if ((separator_index == -1) || // Not a name-value pair. | |
3169 (separator_index == 0) || // No name was supplied. | |
3170 (separator_index == (token.GetLength() - 1))) { // No value was supplied. | |
3171 return false; | |
3172 } | |
3173 | |
3174 *name = token.Left(separator_index); | |
3175 *value = token.Right(token.GetLength() - separator_index - 1); | |
3176 | |
3177 ASSERT1(token.GetLength() == name->GetLength() + value->GetLength() + 1); | |
3178 | |
3179 // It's not possible for the name to contain the separator. | |
3180 ASSERT1(-1 == name->Find(separator)); | |
3181 if (-1 != value->Find(separator)) { | |
3182 // The value contains the separator. | |
3183 return false; | |
3184 } | |
3185 | |
3186 return true; | |
3187 } | |
3188 | |
3189 bool SplitCommandLineInPlace(TCHAR *command_line, | |
3190 TCHAR **first_argument_parameter, | |
3191 TCHAR **remaining_arguments_parameter) { | |
3192 if (!command_line || | |
3193 !first_argument_parameter || | |
3194 !remaining_arguments_parameter) { | |
3195 return false; | |
3196 } | |
3197 | |
3198 TCHAR end_char; | |
3199 TCHAR *&first_argument = *first_argument_parameter; | |
3200 TCHAR *&remaining_arguments = *remaining_arguments_parameter; | |
3201 if (_T('\"') == *command_line) { | |
3202 end_char = _T('\"'); | |
3203 first_argument = remaining_arguments = command_line + 1; | |
3204 } else { | |
3205 end_char = _T(' '); | |
3206 first_argument = remaining_arguments = command_line; | |
3207 } | |
3208 // Search for the end of the first argument | |
3209 while (end_char != *remaining_arguments && '\0' != *remaining_arguments) { | |
3210 ++remaining_arguments; | |
3211 } | |
3212 if (end_char == *remaining_arguments) { | |
3213 *remaining_arguments = '\0'; | |
3214 do { | |
3215 // Skip the spaces between the first argument and the remaining arguments. | |
3216 ++remaining_arguments; | |
3217 } while (_T(' ') == *remaining_arguments); | |
3218 } | |
3219 return true; | |
3220 } | |
3221 | |
3222 bool ContainsOnlyAsciiChars(const CString& str) { | |
3223 for (int i = 0; i < str.GetLength(); ++i) { | |
3224 if (str[i] > 0x7F) { | |
3225 return false; | |
3226 } | |
3227 } | |
3228 return true; | |
3229 } | |
3230 CString BytesToHex(const uint8* bytes, size_t num_bytes) { | |
3231 CString result; | |
3232 if (bytes) { | |
3233 result.Preallocate(num_bytes * sizeof(TCHAR)); | |
3234 static const TCHAR* const kHexChars = _T("0123456789abcdef"); | |
3235 for (size_t i = 0; i != num_bytes; ++i) { | |
3236 result.AppendChar(kHexChars[(bytes[i] >> 4)]); | |
3237 result.AppendChar(kHexChars[(bytes[i] & 0xf)]); | |
3238 } | |
3239 } | |
3240 return result; | |
3241 } | |
3242 | |
3243 CString BytesToHex(const std::vector<uint8>& bytes) { | |
3244 CString result; | |
3245 if (!bytes.empty()) { | |
3246 result.SetString(BytesToHex(&bytes.front(), bytes.size())); | |
3247 } | |
3248 return result; | |
3249 } | |
3250 | |
3251 void JoinStrings(const std::vector<CString>& components, | |
3252 const TCHAR* delim, | |
3253 CString* result) { | |
3254 ASSERT1(result); | |
3255 result->Empty(); | |
3256 | |
3257 // Compute length so we can reserve memory. | |
3258 size_t length = 0; | |
3259 size_t delim_length = delim ? _tcslen(delim) : 0; | |
3260 for (size_t i = 0; i != components.size(); ++i) { | |
3261 if (i != 0) { | |
3262 length += delim_length; | |
3263 } | |
3264 length += components[i].GetLength(); | |
3265 } | |
3266 | |
3267 result->Preallocate(length); | |
3268 | |
3269 for (size_t i = 0; i != components.size(); ++i) { | |
3270 if (i != 0 && delim) { | |
3271 result->Append(delim, delim_length); | |
3272 } | |
3273 result->Append(components[i]); | |
3274 } | |
3275 } | |
3276 | |
3277 void JoinStringsInArray(const TCHAR* components[], | |
3278 int num_components, | |
3279 const TCHAR* delim, | |
3280 CString* result) { | |
3281 ASSERT1(result); | |
3282 result->Empty(); | |
3283 | |
3284 for (int i = 0; i != num_components; ++i) { | |
3285 if (i != 0 && delim) { | |
3286 result->Append(delim); | |
3287 } | |
3288 if (components[i]) { | |
3289 result->Append(components[i]); | |
3290 } | |
3291 } | |
3292 } | |
3293 | |
3294 CString FormatResourceMessage(uint32 resource_id, ...) { | |
3295 CString format; | |
3296 const bool is_loaded = !!format.LoadString(resource_id); | |
3297 | |
3298 if (!is_loaded) { | |
3299 return CString(); | |
3300 } | |
3301 | |
3302 va_list arg_list; | |
3303 va_start(arg_list, resource_id); | |
3304 | |
3305 CString formatted; | |
3306 formatted.FormatMessageV(format, &arg_list); | |
3307 | |
3308 va_end(arg_list); | |
3309 | |
3310 return formatted; | |
3311 } | |
3312 | |
3313 CString FormatErrorCode(DWORD error_code) { | |
3314 CString error_code_string; | |
3315 if (FAILED(error_code)) { | |
3316 error_code_string.Format(_T("0x%08x"), error_code); | |
3317 } else { | |
3318 error_code_string.Format(_T("%u"), error_code); | |
3319 } | |
3320 return error_code_string; | |
3321 } | |
3322 | |
3323 HRESULT WideStringToUtf8UrlEncodedString(const CString& str, CString* out) { | |
3324 ASSERT1(out); | |
3325 | |
3326 out->Empty(); | |
3327 if (str.IsEmpty()) { | |
3328 return S_OK; | |
3329 } | |
3330 | |
3331 // Utf8 encode the Utf16 string first. Next urlencode it. | |
3332 CStringA utf8str = WideToUtf8(str); | |
3333 ASSERT1(!utf8str.IsEmpty()); | |
3334 DWORD buf_len = INTERNET_MAX_URL_LENGTH; | |
3335 CStringA escaped_utf8_name; | |
3336 HRESULT hr = ::UrlEscapeA(utf8str, | |
3337 CStrBufA(escaped_utf8_name, buf_len), | |
3338 &buf_len, | |
3339 0); | |
3340 ASSERT1(buf_len <= INTERNET_MAX_URL_LENGTH); | |
3341 ASSERT1(escaped_utf8_name.GetLength() == static_cast<int>(buf_len)); | |
3342 if (FAILED(hr)) { | |
3343 UTIL_LOG(LE, (_T("[UrlEscapeA failed][0x%08x]"), hr)); | |
3344 return hr; | |
3345 } | |
3346 | |
3347 *out = CString(escaped_utf8_name); | |
3348 return S_OK; | |
3349 } | |
3350 | |
3351 HRESULT Utf8UrlEncodedStringToWideString(const CString& str, CString* out) { | |
3352 ASSERT1(out); | |
3353 | |
3354 out->Empty(); | |
3355 if (str.IsEmpty()) { | |
3356 return S_OK; | |
3357 } | |
3358 | |
3359 // The value is a utf8 encoded url escaped string that is stored as a | |
3360 // unicode string. Because of this, it should contain only ascii chars. | |
3361 if (!ContainsOnlyAsciiChars(str)) { | |
3362 UTIL_LOG(LE, (_T("[String contains non ascii chars]"))); | |
3363 return E_INVALIDARG; | |
3364 } | |
3365 | |
3366 CStringA escaped_utf8_val = WideToAnsiDirect(str); | |
3367 DWORD buf_len = INTERNET_MAX_URL_LENGTH; | |
3368 CStringA unescaped_val; | |
3369 HRESULT hr = ::UrlUnescapeA(const_cast<char*>(escaped_utf8_val.GetString()), | |
3370 CStrBufA(unescaped_val, buf_len), | |
3371 &buf_len, | |
3372 0); | |
3373 ASSERT1(unescaped_val.GetLength() == static_cast<int>(buf_len)); | |
3374 if (FAILED(hr)) { | |
3375 UTIL_LOG(LE, (_T("[UrlUnescapeA failed][0x%08x]"), hr)); | |
3376 return hr; | |
3377 } | |
3378 ASSERT1(buf_len == static_cast<DWORD>(unescaped_val.GetLength())); | |
3379 ASSERT1(buf_len <= INTERNET_MAX_URL_LENGTH); | |
3380 CString app_name = Utf8ToWideChar(unescaped_val, | |
3381 unescaped_val.GetLength()); | |
3382 if (app_name.IsEmpty()) { | |
3383 return E_INVALIDARG; | |
3384 } | |
3385 | |
3386 *out = app_name; | |
3387 return S_OK; | |
3388 } | |
3389 | |
3390 } // namespace omaha | |
3391 | |
OLD | NEW |