base/string.cc - Issue 624713003: Keep only base/extractor.[cc|h].

Side by Side Diff: base/string.cc

Issue 624713003: Keep only base/extractor.[cc|h]. (Closed) Base URL: https://chromium.googlesource.com/external/omaha.git@master

Patch Set: Created 6 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
	(Empty)
1 // Copyright 2003-2009 Google Inc.

2 //

3 // Licensed under the Apache License, Version 2.0 (the "License");

4 // you may not use this file except in compliance with the License.

5 // You may obtain a copy of the License at

6 //

7 // http://www.apache.org/licenses/LICENSE-2.0

8 //

9 // Unless required by applicable law or agreed to in writing, software

10 // distributed under the License is distributed on an "AS IS" BASIS,

11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

12 // See the License for the specific language governing permissions and

13 // limitations under the License.

14 // ========================================================================

15

16 #include "omaha/base/string.h"

17

18 #include <wininet.h> // For INTERNET_MAX_URL_LENGTH.

19 #include <algorithm>

20 #include <cstdlib>

21 #include "base/scoped_ptr.h"

22 #include "omaha/base/commontypes.h"

23 #include "omaha/base/debug.h"

24 #include "omaha/base/localization.h"

25 #include "omaha/base/logging.h"

26

27 namespace omaha {

28

29 namespace {

30 // Testing shows that only the following ASCII characters are

31 // considered spaces by GetStringTypeA: 9-13, 32, 160.

32 // Rather than call GetStringTypeA with no locale, as we used to,

33 // we look up the values directly in a precomputed array.

34

35 SELECTANY byte spaces[256] = {

36 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, // 0-9

37 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, // 10-19

38 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 20-29

39 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, // 30-39

40 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 40-49

41 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 50-59

42 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 60-69

43 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 70-79

44 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80-89

45 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 90-99

46 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 100-109

47 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 110-119

48 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 120-129

49 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 130-139

50 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 140-149

51 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 150-159

52 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 160-169

53 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 170-179

54 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 180-189

55 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 190-199

56 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 200-209

57 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 210-219

58 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 220-229

59 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 230-239

60 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 240-249

61 0, 0, 0, 0, 0, 1, // 250-255

62 };

63 } // namespace

64

65 const TCHAR* const kFalse = _T("false");

66 const TCHAR* const kTrue = _T("true");

67

68 bool IsSpaceW(WCHAR c) {

69 // GetStringTypeW considers these characters to be spaces:

70 // 9-13, 32, 133, 160, 5760, 8192-8203, 8232, 8233, 12288

71 if (c < 256)

72 return (c == 133 \|\| IsSpaceA((char) (c & 0xff)));

73

74 return (c >= 8192 && c <= 8203) \|\| c == 8232 \|\|

75 c == 8233 \|\| c == 12288;

76 }

77

78 bool IsSpaceA(char c) {

79 return spaces[static_cast<unsigned char>(c)] == 1;

80 }

81

82 int TrimCString(CString &s) {

83 int len = Trim(s.GetBuffer());

84 s.ReleaseBufferSetLength(len);

85 return len;

86 }

87

88 void MakeLowerCString(CString & s) {

89 int len = s.GetLength();

90 String_FastToLower(s.GetBuffer());

91 s.ReleaseBufferSetLength(len);

92 }

93

94 int Trim(TCHAR *s) {

95 ASSERT(s, (L""));

96

97 // First find end of leading spaces

98 TCHAR *start = s;

99 while (*start) {

100 if (!IsSpace(*start))

101 break;

102 ++start;

103 }

104

105 // Now search for the end, remembering the start of the last spaces

106 TCHAR *end = start;

107 TCHAR *last_space = end;

108 while (*end) {

109 if (!IsSpace(*end))

110 last_space = end + 1;

111 ++end;

112 }

113

114 // Copy the part we want

115 int len = last_space - start;

116 // lint -e{802} Conceivably passing a NULL pointer

117 memmove(s, start, len * sizeof(TCHAR));

118

119 // 0 terminate

120 s[len] = 0;

121

122 return len;

123 }

124

125 void TrimString(CString& s, const TCHAR* delimiters) {

126 s = s.Trim(delimiters);

127 }

128

129 // Strip the first token from the front of argument s. A token is a

130 // series of consecutive non-blank characters - unless the first

131 // character is a double-quote ("), in that case the token is the full

132 // quoted string

133 CString StripFirstQuotedToken(const CString& s) {

134 const int npos = -1;

135

136 // Make a writeable copy

137 CString str(s);

138

139 // Trim any surrounding blanks (and tabs, for the heck of it)

140 TrimString(str, L" \t");

141

142 // Too short to have a second token

143 if (str.GetLength() <= 1)

144 return L"";

145

146 // What kind of token are we stripping?

147 if (str[0] == L'\"') {

148 // Remove leading quoting string

149 int i = str.Find(L"\"", 1);

150 if (i != npos)

151 i++;

152 return str.Mid(i);

153 } else {

154 // Remove leading token

155 int i = str.FindOneOf(L" \t");

156 if (i != npos)

157 i++;

158 return str.Mid(i);

159 }

160 }

161

162 // A block of text to separate lines, and back

163 void TextToLines(const CString& text, const TCHAR* delimiter, std::vector<CStrin g>* lines) {

164 ASSERT(delimiter, (L""));

165 ASSERT(lines, (L""));

166

167 size_t delimiter_len = ::lstrlen(delimiter);

168 int b = 0;

169 int e = 0;

170

171 for (b = 0; e != -1 && b < text.GetLength(); b = e + delimiter_len) {

172 e = text.Find(delimiter, b);

173 if (e != -1) {

174 ASSERT1(e - b > 0);

175 lines->push_back(text.Mid(b, e - b));

176 } else {

177 lines->push_back(text.Mid(b));

178 }

179 }

180 }

181

182 void LinesToText(const std::vector<CString>& lines, const TCHAR* delimiter, CStr ing* text) {

183 ASSERT(delimiter, (L""));

184 ASSERT(text, (L""));

185

186 size_t delimiter_len = ::lstrlen(delimiter);

187 size_t len = 0;

188 for (size_t i = 0; i < lines.size(); ++i) {

189 len += lines[i].GetLength() + delimiter_len;

190 }

191 text->Empty();

192 text->Preallocate(len);

193 for (std::vector<CString>::size_type i = 0; i < lines.size(); ++i) {

194 text->Append(lines[i]);

195 if (delimiter_len) {

196 text->Append(delimiter);

197 }

198 }

199 }

200

201 int CleanupWhitespaceCString(CString &s) {

202 int len = CleanupWhitespace(s.GetBuffer());

203 s.ReleaseBufferSetLength(len);

204 return len;

205 }

206

207 int CleanupWhitespace(TCHAR *str) {

208 ASSERT(str, (L""));

209

210 TCHAR *src = str;

211 TCHAR *dest = str;

212 int spaces = 0;

213 bool at_start = true;

214 while (true) {

215 // At end of string?

216 TCHAR c = *src;

217 if (0 == c)

218 break;

219

220 // Look for whitespace; copy it over if not whitespace

221 if (IsSpace(c)) {

222 ++spaces;

223 }

224 else {

225 *dest++ = c;

226 at_start = false;

227 spaces = 0;

228 }

229

230 // Write only first consecutive space (but skip space at start)

231 if (1 == spaces && !at_start)

232 *dest++ = ' ';

233

234 ++src;

235 }

236

237 // Remove trailing space, if any

238 if (dest > str && *(dest - 1) == L' ')

239 --dest;

240

241 // 0-terminate

242 *dest = 0;

243

244 return dest - str;

245 }

246

247 // Take 1 single hexadecimal "digit" (as a character) and return its decimal val ue

248 // Returns -1 if given invalid hex digit

249 int HexDigitToDec(const TCHAR digit) {

250 if (digit >= L'A' && digit <= L'F')

251 return 10 + (digit - L'A');

252 else if (digit >= L'a' && digit <= L'f')

253 return 10 + (digit - L'a');

254 else if (digit >= L'0' && digit <= L'9')

255 return (digit - L'0');

256 else

257 return -1;

258 }

259

260 // Convert the 2 hex chars at positions <pos> and <pos>+1 in <s> to a char (<cha r_out>)

261 // Note: scanf was giving me troubles, so here's the manual version

262 // Extracted char gets written to <char_out>, which must be allocated by

263 // the caller; return true on success or false if parameters are incorrect

264 // or string does not have 2 hex digits at the specified position

265 // NOTE: <char_out> is NOT a string, just a pointer to a char for the result

266 bool ExtractChar(const CString & s, int pos, unsigned char * char_out) {

267 // char_out may be NULL

268

269 if (s.GetLength() < pos + 1) {

270 return false;

271 }

272

273 if (pos < 0 \|\| NULL == char_out) {

274 ASSERT(0, (_T("invalid params: pos<0 or char_out is NULL")));

275 return false;

276 }

277

278 TCHAR c1 = s.GetAt(pos);

279 TCHAR c2 = s.GetAt(pos+1);

280

281 int p1 = HexDigitToDec(c1);

282 int p2 = HexDigitToDec(c2);

283

284 if (p1 == -1 \|\| p2 == -1) {

285 return false;

286 }

287

288 char_out = (unsigned char)(p1 16 + p2);

289 return true;

290 }

291

292 WCHAR ToWide (const char s, int len) {

293 ASSERT (s, (L""));

294 WCHAR *w = new WCHAR [len+1]; if (!w) { return NULL; }

295 // int rc = MultiByteToWideChar (CP_ACP, 0, s.GetString(), (int)s.GetLength( )+1, w, s.GetLength()+1);

296 // TODO(omaha): why would it ever be the case that rc > len?

297 int rc = MultiByteToWideChar (CP_ACP, 0, s, len, w, len);

298 if (rc > len) { delete [] w; return NULL; }

299 // ASSERT (rc <= len, (L""));

300 w[rc]=L'\0';

301 return w;

302 }

303

304 const byte BufferContains (const byte buf, uint32 buf_len, const byte *data, u int32 data_len) {

305 ASSERT(data, (L""));

306 ASSERT(buf, (L""));

307

308 for (uint32 i = 0; i < buf_len; i++) {

309 uint32 j = i;

310 uint32 k = 0;

311 uint32 len = 0;

312 while (j < buf_len && k < data_len && buf[j++] == data[k++]) { len++; }

313 if (len == data_len) { return buf + i; }

314 }

315 return 0;

316 }

317

318 // Converting the Ansi Multibyte String into unicode string. The multibyte

319 // string is encoded using the specified codepage.

320 // The code is pretty much like the U2W function, except the codepage can be

321 // any valid windows CP.

322 BOOL AnsiToWideString(const char from, int length, UINT codepage, CString to) {

323 ASSERT(from, (L""));

324 ASSERT(to, (L""));

325 ASSERT1(length >= -1);

326 // Figure out how long the string is

327 int req_chars = MultiByteToWideChar(codepage, 0, from, length, NULL, 0);

328

329 if (req_chars <= 0) {

330 UTIL_LOG(LEVEL_WARNING, (_T("MultiByteToWideChar Failed ")));

331 *to = AnsiToWideString(from, length);

332 return FALSE;

333 }

334

335 TCHAR *buffer = to->GetBufferSetLength(req_chars);

336 int conv_chars = MultiByteToWideChar(codepage, 0, from, length, buffer, req_ch ars);

337 if (conv_chars == 0) {

338 UTIL_LOG(LEVEL_WARNING, (_T("MultiByteToWideChar Failed ")));

339 to->ReleaseBuffer(0);

340 *to = AnsiToWideString(from, length);

341 return FALSE;

342 }

343

344 // Something truly horrible happened.

345 ASSERT (req_chars == conv_chars, (L"MBToWide returned unexpected value: GetLas tError()=%d",GetLastError()));

346 // If length was inferred, conv_chars includes the null terminator.

347 // Adjust the length here to remove null termination,

348 // because we use the length-qualified CString constructor,

349 // which automatically adds null termination given an unterminated array.

350 if (-1 == length) { --conv_chars; }

351 to->ReleaseBuffer(conv_chars);

352 return TRUE;

353 }

354

355 // CStringW(const char* from) did not cast all character properly

356 // so we write our own.

357 CString AnsiToWideString(const char *from, int length) {

358 ASSERT(from, (L""));

359 ASSERT1(length >= -1);

360 if (length < 0)

361 length = strlen(from);

362 CString to;

363 TCHAR *buffer = to.GetBufferSetLength(length);

364 for (int i = 0; i < length; ++i)

365 buffer[i] = static_cast<UINT8>(from[i]);

366 to.ReleaseBuffer(length);

367 return to;

368 }

369

370

371 // Transform a unicode string into UTF8, as represented in an ASCII string

372 CStringA WideToUtf8(const CString& w) {

373 // Add a cutoff. If it's all ascii, convert it directly

374 const TCHAR* input = static_cast<const TCHAR*>(w.GetString());

375 int input_len = w.GetLength(), i;

376 for (i = 0; i < input_len; ++i) {

377 if (input[i] > 127) {

378 break;

379 }

380 }

381

382 // If we made it to the end without breaking, then it's all ANSI, so do a quic k convert

383 if (i == input_len) {

384 return WideToAnsiDirect(w);

385 }

386

387 // Figure out how long the string is

388 int req_bytes = ::WideCharToMultiByte(CP_UTF8, 0, w, -1, NULL, 0, NULL, NULL);

389

390 scoped_array<char> utf8_buffer(new char[req_bytes]);

391

392 int conv_bytes = ::WideCharToMultiByte(CP_UTF8, 0, w, -1, utf8_buffer.get(), r eq_bytes, NULL, NULL);

393 ASSERT1(req_bytes == conv_bytes);

394

395 // conv_bytes includes the null terminator, when we read this in, don't read t he terminator

396 CStringA out(utf8_buffer.get(), conv_bytes - 1);

397

398 return out;

399 }

400

401 CString Utf8ToWideChar(const char* utf8, uint32 num_bytes) {

402 ASSERT1(utf8);

403 if (num_bytes == 0) {

404 return CString();

405 }

406

407 uint32 number_of_wide_chars = ::MultiByteToWideChar(CP_UTF8, 0, utf8, num_byte s, NULL, 0);

408 number_of_wide_chars += 1; // make room for NULL terminator

409

410 CString ret_string;

411 TCHAR* buffer = ret_string.GetBuffer(number_of_wide_chars);

412 DWORD number_of_characters_copied = ::MultiByteToWideChar(CP_UTF8, 0, utf8, nu m_bytes, buffer, number_of_wide_chars);

413 ASSERT1(number_of_characters_copied == number_of_wide_chars - 1);

414 buffer[number_of_wide_chars - 1] = _T('\0'); // ensure there is a NULL termin ator

415 ret_string.ReleaseBuffer();

416

417 // Strip the byte order marker if there is one in the document.

418 if (ret_string[0] == kUnicodeBom) {

419 ret_string = ret_string.Right(ret_string.GetLength() - 1);

420 }

421

422 if (number_of_characters_copied > 0) {

423 return ret_string;

424 }

425

426 // Failure case

427 return CString();

428 }

429

430 CString Utf8BufferToWideChar(const std::vector<uint8>& buffer) {

431 CString result;

432 if (!buffer.empty()) {

433 result = Utf8ToWideChar(

434 reinterpret_cast<const char*>(&buffer.front()), buffer.size());

435 }

436 return result;

437 }

438

439 CString AbbreviateString (const CString & title, int32 max_len) {

440 ASSERT (max_len, (L""));

441 CString s(title);

442 TrimCString(s); // remove whitespace at start/end

443 if (s.GetLength() > max_len) {

444 s = s.Left (max_len - 2);

445 CString orig(s);

446 // remove partial words

447 while (s.GetLength() > 1 && !IsSpace(s[s.GetLength()-1])) { s = s.Left ( s.GetLength() - 1); }

448 // but not if it would make the string very short

449 if (s.GetLength() < max_len / 2) { s = orig; }

450 s += _T("..");

451 }

452

453 return s;

454 }

455

456 CString GetAbsoluteUri(const CString& uri) {

457 int i = String_FindString(uri, _T("://"));

458 if (i==-1) return uri;

459

460 // add trailing / if none exists

461 int j = String_FindChar(uri, L'/',i+3);

462 if (j==-1) return (uri+NOTRANSL(_T("/")));

463

464 // remove duplicate trailing slashes

465 int len = uri.GetLength();

466 if (len > 1 && uri.GetAt(len-1) == '/' && uri.GetAt(len-2) == '/') {

467 CString new_uri(uri);

468 int new_len = new_uri.GetLength();

469 while (new_len > 1 && new_uri.GetAt(new_len-1) == '/' && new_uri.GetAt(new_l en-2) == '/') {

470 new_len--;

471 new_uri = new_uri.Left(new_len);

472 }

473 return new_uri;

474 }

475 else return uri;

476 }

477

478 // requires that input have a PROTOCOL (http://) for proper behavior

479 // items with the "file" protocol are returned as is (what is the hostname in th at case? C: ? doesn't make sense)

480 // TODO(omaha): loosen requirement

481 // includes http://, e.g. http://www.google.com/

482 CString GetUriHostName(const CString& uri, bool strip_leading) {

483 if (String_StartsWith(uri,NOTRANSL(_T("file:")),true)) return uri;

484

485 // correct any "errors"

486 CString s(GetAbsoluteUri(uri));

487

488 // Strip the leading "www."

489 if (strip_leading)

490 {

491 int index_www = String_FindString(s, kStrLeadingWww);

492 if (index_www != -1)

493 ReplaceCString (s, kStrLeadingWww, _T(""));

494 }

495

496 int i = String_FindString(s, _T("://"));

497 if(i==-1) return uri;

498 int j = String_FindChar(s, L'/',i+3);

499 if(j==-1) return uri;

500 return s.Left(j+1);

501 }

502

503 // requires that input have a PROTOCOL (http://) for proper behavior

504 // TODO(omaha): loosen requirement

505 // removes the http:// and the extra slash '/' at the end.

506 // http://www.google.com/ -> www.google.com (or google.com if strip_leading = tr ue)

507 CString GetUriHostNameHostOnly(const CString& uri, bool strip_leading) {

508 CString s(GetUriHostName(uri,strip_leading));

509

510 // remove protocol

511 int i = String_FindString (s, _T("://"));

512 if(i==-1) return s;

513 CString ss(s.Right (s.GetLength() - i-3));

514

515 // remove the last '/'

516 int j = ss.ReverseFind('/');

517 if (j == -1) return ss;

518 return ss.Left(j);

519 }

520

521 CString AbbreviateUri(const CString& uri, int32 max_len) {

522 ASSERT1(max_len);

523 ASSERT1(!uri.IsEmpty());

524

525 CString s(uri);

526 VERIFY1(String_FindString (s, _T("://")));

527

528 TrimCString(s);

529 // SKIP_LOC_BEGIN

530 RemoveFromStart (s, _T("ftp://"), false);

531 RemoveFromStart (s, _T("http://"), false);

532 RemoveFromStart (s, _T("https://"), false);

533 RemoveFromStart (s, _T("www."), false);

534 RemoveFromStart (s, _T("ftp."), false);

535 RemoveFromStart (s, _T("www-"), false);

536 RemoveFromStart (s, _T("ftp-"), false);

537 RemoveFromEnd (s, _T(".htm"));

538 RemoveFromEnd (s, _T(".html"));

539 RemoveFromEnd (s, _T(".asp"));

540 // SKIP_LOC_END

541 if (s.GetLength() > max_len) {

542 // try to keep the portion after the last /

543 int32 last_slash = s.ReverseFind ((TCHAR)'/');

544 CString after_last_slash;

545 if (last_slash == -1) { after_last_slash = _T(""); }

546 else { after_last_slash = s.Right (uri.GetLength() - last_slash - 1); }

547 if (after_last_slash.GetLength() > max_len / 2) {

548 after_last_slash = after_last_slash.Right (max_len / 2);

549 }

550 s = s.Left (max_len - after_last_slash.GetLength() - 2);

551 s += "..";

552 s += after_last_slash;

553 }

554 return s;

555 }

556

557 // normalized version of a URI intended to map duplicates to the same string

558 // the normalized URI is not a valid URI

559 CString NormalizeUri (const CString & uri) {

560 CString s(uri);

561 TrimCString(s);

562 MakeLowerCString(s);

563 // SKIP_LOC_BEGIN

564 ReplaceCString (s, _T(":80"), _T(""));

565

566 RemoveFromEnd (s, _T("/index.html"));

567 RemoveFromEnd (s, _T("/welcome.html")); // old netscape standard

568 RemoveFromEnd (s, _T("/"));

569

570 RemoveFromStart (s, _T("ftp://"), false);

571 RemoveFromStart (s, _T("http://"), false);

572 RemoveFromStart (s, _T("https://"), false);

573 RemoveFromStart (s, _T("www."), false);

574 RemoveFromStart (s, _T("ftp."), false);

575 RemoveFromStart (s, _T("www-"), false);

576 RemoveFromStart (s, _T("ftp-"), false);

577

578 ReplaceCString (s, _T("/./"), _T("/"));

579 // SKIP_LOC_END

580

581 // TODO(omaha):

582 // fixup URLs like a/b/../../c

583 // while ($s =~ m!\/\.\.\!!) {

584 // $s =~ s!/[^/]*/\.\./!/!;

585 // }

586

587 // TODO(omaha):

588 // unescape characters

589 // Note from RFC1630: "Sequences which start with a percent sign

590 // but are not followed by two hexadecimal characters are reserved

591 // for future extension"

592 // $str =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg if defined $str;

593

594 return s;

595 }

596

597 CString RemoveInternetProtocolHeader (const CString& url) {

598 int find_colon_slash_slash = String_FindString(url, NOTRANSL(L"://"));

599 if( find_colon_slash_slash != -1 ) {

600 // remove PROTOCOL://

601 return url.Right(url.GetLength() - find_colon_slash_slash - 3);

602 } else if (String_StartsWith(url, NOTRANSL(L"mailto:"), true)) {

603 // remove "mailto:"

604 return url.Right(url.GetLength() - 7);

605 } else {

606 // return as is

607 return url;

608 }

609 }

610

611 HRESULT ConvertFileUriToLocalPath(const CString& uri, CString* path_out) {

612 ASSERT1(path_out);

613 ASSERT1(uri.GetLength() < INTERNET_MAX_URL_LENGTH);

614

615 if (uri.IsEmpty()) {

616 return E_INVALIDARG;

617 }

618

619 DWORD buf_len = MAX_PATH;

620 HRESULT hr = ::PathCreateFromUrl(uri,

621 CStrBuf(*path_out, MAX_PATH),

622 &buf_len,

623 NULL);

624 return hr;

625 }

626

627 void RemoveFromStart (CString & s, const TCHAR* remove, bool ignore_case) {

628 ASSERT(remove, (L""));

629

630 // Remove the characters if it is the prefix

631 if (String_StartsWith(s, remove, ignore_case))

632 s.Delete(0, lstrlen(remove));

633 }

634

635 bool String_EndsWith(const TCHAR str, const TCHAR end_str, bool ignore_case) {

636 ASSERT(end_str, (L""));

637 ASSERT(str, (L""));

638

639 int str_len = lstrlen(str);

640 int end_len = lstrlen(end_str);

641

642 // Definitely false if the suffix is longer than the string

643 if (end_len > str_len)

644 return false;

645

646 const TCHAR *str_ptr = str + str_len;

647 const TCHAR *end_ptr = end_str + end_len;

648

649 while (end_ptr >= end_str) {

650 // Check for matching characters

651 TCHAR c1 = *str_ptr;

652 TCHAR c2 = *end_ptr;

653

654 if (ignore_case) {

655 c1 = Char_ToLower(c1);

656 c2 = Char_ToLower(c2);

657 }

658

659 if (c1 != c2)

660 return false;

661

662 --str_ptr;

663 --end_ptr;

664 }

665

666 // if we haven't failed out, it must be ok!

667 return true;

668 }

669

670 CString String_MakeEndWith(const TCHAR* str, const TCHAR* end_str, bool ignore_c ase) {

671 if (String_EndsWith(str, end_str, ignore_case)) {

672 return str;

673 } else {

674 CString r(str);

675 r += end_str;

676 return r;

677 }

678 }

679

680 void RemoveFromEnd (CString & s, const TCHAR* remove) {

681 ASSERT(remove, (L""));

682

683 // If the suffix is shorter than the string, don't bother

684 int remove_len = lstrlen(remove);

685 if (s.GetLength() < remove_len) return;

686

687 // If the suffix is equal

688 int suffix_begin = s.GetLength() - remove_len;

689 if (0 == lstrcmp(s.GetString() + suffix_begin, remove))

690 s.Delete(suffix_begin, remove_len);

691 }

692

693 CString ElideIfNeeded (const CString & input_string, int max_len, int min_len) {

694 ASSERT (min_len <= max_len, (L""));

695 ASSERT (max_len >= TSTR_SIZE(kEllipsis)+1, (L""));

696 ASSERT (min_len >= TSTR_SIZE(kEllipsis)+1, (L""));

697

698 CString s = input_string;

699

700 s.TrimRight();

701 if (s.GetLength() > max_len) {

702 int truncate_at = max_len - TSTR_SIZE(kEllipsis);

703 // find first space going backwards from character one after the truncation point

704 while (truncate_at >= min_len && !IsSpace(s.GetAt(truncate_at)))

705 truncate_at--;

706

707 // skip the space(s)

708 while (truncate_at >= min_len && IsSpace(s.GetAt(truncate_at)))

709 truncate_at--;

710

711 truncate_at++;

712

713 if (truncate_at <= min_len \|\| truncate_at > (max_len - static_cast<int>(TSTR _SIZE(kEllipsis)))) {

714 // we weren't able to break at a word boundary, may as well use more of th e string

715 truncate_at = max_len - TSTR_SIZE(kEllipsis);

716

717 // skip space(s)

718 while (truncate_at > 0 && IsSpace(s.GetAt(truncate_at-1)))

719 truncate_at--;

720 }

721

722 s = s.Left(truncate_at);

723 s += kEllipsis;

724 }

725

726 UTIL_LOG(L6, (L"elide (%d %d) %s -> %s", min_len, max_len, input_string, s));

727 return s;

728 }

729

730 // these functions untested

731 // UTF8 parameter supported on XP/2000 only

732 HRESULT AnsiToUTF8 (char * src, int src_len, char * dest, int *dest_len) {

733 ASSERT (dest_len, (L""));

734 ASSERT (dest, (L""));

735 ASSERT (src, (L""));

736

737 // First use MultiByteToWideChar(CP_UTF8, ...) to convert to Unicode

738 // then use WideCharToMultiByte to convert from Unicode to UTF8

739 WCHAR unicode = new WCHAR [(src_len + 1) sizeof (TCHAR)]; ASSERT (unicode, (L""));

740 int chars_written = MultiByteToWideChar (CP_ACP, 0, src, src_len, unicode, src _len);

741 ASSERT (chars_written == src_len, (L""));

742 char *unmappable = " ";

743 BOOL unmappable_characters = false;

744 dest_len = WideCharToMultiByte (CP_UTF8, 0, unicode, chars_written, dest, de st_len, unmappable, &unmappable_characters);

745 delete [] unicode;

746 return S_OK;

747 }

748

749 // Convert Wide to ANSI directly. Use only when it is all ANSI

750 CStringA WideToAnsiDirect(const CString & in) {

751 int in_len = in.GetLength();

752 const TCHAR * in_buf = static_cast<const TCHAR*>(in.GetString());

753

754 CStringA out;

755 unsigned char * out_buf = (unsigned char *)out.GetBufferSetLength(in_len);

756

757 for(int i = 0; i < in_len; ++i)

758 out_buf[i] = static_cast<unsigned char>(in_buf[i]);

759

760 out.ReleaseBuffer(in_len);

761 return out;

762 }

763

764 HRESULT UCS2ToUTF8 (LPCWSTR src, int src_len, char * dest, int *dest_len) {

765 ASSERT(dest_len, (L""));

766 ASSERT(dest, (L""));

767

768 dest_len = WideCharToMultiByte (CP_UTF8, 0, src, src_len, dest, dest_len, NU LL,NULL);

769 return S_OK;

770 }

771

772 HRESULT UTF8ToUCS2 (const char * src, int src_len, LPWSTR dest, int *dest_len) {

773 ASSERT (dest_len, (L""));

774 ASSERT (src, (L""));

775

776 dest_len = MultiByteToWideChar (CP_UTF8, 0, src, src_len, dest, dest_len);

777 ASSERT (*dest_len == src_len, (L""));

778 return S_OK;

779 }

780

781 HRESULT UTF8ToAnsi (char * src, int, char * dest, int *dest_len) {

782 ASSERT(dest_len, (L""));

783 ASSERT(dest, (L""));

784 ASSERT(src, (L""));

785

786 src; dest; dest_len; // unreferenced formal parameter

787

788 // First use MultiByteToWideChar(CP_UTF8, ...) to convert to Unicode

789 // then use WideCharToMultiByte to convert from Unicode to ANSI

790 return E_FAIL;

791 }

792

793 // clean up a string so it can be included within a JavaScript string

794 // mainly involves escaping characters

795 CString SanitizeString(const CString & in, DWORD mode) {

796 CString out(in);

797

798 if (mode & kSanHtml) {

799 // SKIP_LOC_BEGIN

800 ReplaceCString(out, _T("&"), _T("&"));

801 ReplaceCString(out, _T("<"), _T("<"));

802 ReplaceCString(out, _T(">"), _T(">"));

803 // SKIP_LOC_END

804 }

805

806 if ((mode & kSanXml) == kSanXml) {

807 // SKIP_LOC_BEGIN

808 ReplaceCString(out, _T("'"), _T("'"));

809 ReplaceCString(out, _T("\""), _T("""));

810 // SKIP_LOC_END

811 }

812

813 // Note that this SAN_JAVASCRIPT and kSanXml should not be used together.

814 ASSERT ((mode & (kSanJs \| kSanXml)) != (kSanJs \| kSanXml), (L""));

815

816 if ((mode & kSanJs) == kSanJs) {

817 // SKIP_LOC_BEGIN

818 ReplaceCString(out, _T("\\"), _T("\\\\"));

819 ReplaceCString(out, _T("\'"), _T("\\\'"));

820 ReplaceCString(out, _T("\""), _T("\\\""));

821 ReplaceCString(out, _T("\n"), _T(" "));

822 ReplaceCString(out, _T("\t"), _T(" "));

823 // SKIP_LOC_END

824 }

825

826 if ((mode & kSanHtmlInput) == kSanHtmlInput) {

827 // SKIP_LOC_BEGIN

828 ReplaceCString(out, _T("\""), _T("""));

829 ReplaceCString(out, _T("'"), _T("'"));

830 // SKIP_LOC_END

831 }

832

833 return out;

834 }

835

836 // Bolds the periods used for abbreviation. Call this after HighlightTerms.

837 CString BoldAbbreviationPeriods(const CString & in) {

838 CString out(in);

839 CString abbrev;

840 for (int i = 0; i < kAbbreviationPeriodLength; ++i)

841 abbrev += _T(".");

842 ReplaceCString(out, abbrev, NOTRANSL(_T("<b>")) + abbrev + NOTRANSL(_T("</b>") ));

843 return out;

844 }

845

846 // Unescape a escaped sequence leading by a percentage symbol '%',

847 // and converted the unescaped sequence (in UTF8) into unicode.

848 // Inputs: src is the input string.

849 // pos is the starting position.

850 // Returns: true if a EOS(null) char was encounted.

851 // out contains the unescaped and converted unicode string.

852 // consumed_length is how many bytes in the src string have been

853 // unescaped.

854 // We can avoid the expensive UTF8 conversion step if there are no higher

855 // ansi characters So if there aren't any, just convert it ANSI-to-WIDE

856 // directly, which is cheaper.

857 inline bool UnescapeSequence(const CString &src, int pos,

858 CStringW out, int consumed_length) {

859 ASSERT1(out);

860 ASSERT1(consumed_length);

861

862 int length = src.GetLength();

863 // (input_len - pos) / 3 is enough for un-escaping the (%xx)+ sequences.

864 int max_dst_length = (length - pos) / 3;

865 scoped_array<char> unescaped(new char[max_dst_length]);

866 char *buf = unescaped.get();

867 if (buf == NULL) { // no enough space ???

868 *consumed_length = 0;

869 return false;

870 }

871 char *dst = buf;

872 bool is_utf8 = false;

873 // It is possible that there is a null character '\0' in the sequence.

874 // Because the CStringT does't support '\0' in it, we stop

875 // parsing the input string when it is encounted.

876 bool eos_encounted = false;

877 uint8 ch;

878 int s = pos;

879 while (s + 2 < length && src[s] == '%' && !eos_encounted &&

880 ExtractChar(src, s + 1, &ch)) {

881 if (ch != 0)

882 *dst++ = ch;

883 else

884 eos_encounted = true;

885 if (ch >= 128)

886 is_utf8 = true;

887 s += 3;

888 }

889

890 ASSERT1(dst <= buf + max_dst_length); // just to make sure

891

892 *consumed_length = s - pos;

893 if (is_utf8)

894 AnsiToWideString(buf, dst - buf, CP_UTF8, out);

895 else

896 *out = AnsiToWideString(buf, dst - buf);

897 return eos_encounted;

898 }

899

900 // There is an encoding called "URL-encoding". This function takes a URL-encoded string

901 // and converts it back to the original representation

902 // example: "?q=moon+doggy_%25%5E%26&" = "moon doggy_%^&"

903 CString Unencode(const CString &input) {

904 const int input_len = input.GetLength();

905 const TCHAR *src = input.GetString();

906 // input_len is enough for containing the unencoded string.

907 CString out;

908 TCHAR *head = out.GetBuffer(input_len);

909 TCHAR *dst = head;

910 int s = 0;

911 bool eos_encounted = false;

912 bool is_utf8 = false;

913 CStringW fragment;

914 int consumed_length = 0;

915 while (s < input_len && !eos_encounted) {

916 switch (src[s]) {

917 case '+' :

918 *dst++ = ' ';

919 ASSERT1(dst <= head + input_len);

920 ++s;

921 break;

922 case '%' :

923 eos_encounted =

924 UnescapeSequence(input, s, &fragment, &consumed_length);

925 if (consumed_length > 0) {

926 s += consumed_length;

927 ASSERT1(dst + fragment.GetLength() <= head + input_len);

928 for (int i = 0; i < fragment.GetLength(); ++i)

929 *dst++ = fragment[i];

930 } else {

931 *dst++ = src[s++];

932 ASSERT1(dst <= head + input_len);

933 }

934 break;

935 default:

936 *dst++ = src[s];

937 ASSERT1(dst <= head + input_len);

938 ++s;

939 }

940 }

941 int out_len = dst - head;

942 out.ReleaseBuffer(out_len);

943 return out;

944 }

945

946 CString GetTextInbetween(const CString &input, const CString &start, const CStri ng &end) {

947 int start_index = String_FindString(input, start);

948 if (start_index == -1)

949 return L"";

950

951 start_index += start.GetLength();

952 int end_index = String_FindString(input, end, start_index);

953 if (end_index == -1)

954 return L"";

955

956 return input.Mid(start_index, end_index - start_index);

957 }

958

959 // Given a string, get the parameter and url-unencode it

960 CString GetParam(const CString & input, const CString & key) {

961 CString my_key(_T("?"));

962 my_key.Append(key);

963 my_key += L'=';

964

965 return Unencode(GetTextInbetween(input, my_key, NOTRANSL(L"?")));

966 }

967

968 // Get an xml-like field from a string

969 CString GetField (const CString & input, const CString & field) {

970 CString start_field(NOTRANSL(_T("<")));

971 start_field += field;

972 start_field += L'>';

973

974 int32 start = String_FindString(input, start_field);

975 if (start == -1) { return _T(""); }

976 start += 2 + lstrlen (field);

977

978 CString end_field(NOTRANSL(_T("</")));

979 end_field += field;

980 end_field += L'>';

981

982 int32 end = String_FindString(input, end_field);

983 if (end == -1) { return _T(""); }

984

985 return input.Mid (start, end - start);

986 }

987

988 // ------------------------------------------------------------

989 // Finds a whole word match in the query.

990 // If the word has non-spaces either before or after, it will not qualify as

991 // a match. i.e. "pie!" is not a match because of the exclamation point.

992 // TODO(omaha): Add parameter that will consider punctuation acceptable.

993 //

994 // Optionally will look for a colon at the end.

995 // If not found, return -1.

996 int FindWholeWordMatch (const CString &query,

997 const CString &word_to_match,

998 const bool end_with_colon,

999 const int index_begin) {

1000 if (word_to_match.IsEmpty()) {

1001 return -1;

1002 }

1003

1004 int index_word_begin = index_begin;

1005

1006 // Keep going until we find a whole word match, or the string ends.

1007 do {

1008 index_word_begin = String_FindString (query, word_to_match, index_word_begin );

1009

1010 if (-1 == index_word_begin) {

1011 return index_word_begin;

1012 }

1013

1014 // If it's not a whole word match, keep going.

1015 if (index_word_begin > 0 &&

1016 !IsSpaceW (query[index_word_begin - 1])) {

1017 goto LoopEnd;

1018 }

1019

1020 if (end_with_colon) {

1021 int index_colon = String_FindChar (query, L':', index_word_begin);

1022

1023 // If there is no colon in the string, return now.

1024 if (-1 == index_colon) {

1025 return -1;

1026 }

1027

1028 // If there is text between the end of the word and the colon, keep going.

1029 if (index_colon - index_word_begin != word_to_match.GetLength()) {

1030 goto LoopEnd;

1031 }

1032 } else {

1033 // If there are more chars left after this word/phrase, and

1034 // they are not spaces, return.

1035 if (query.GetLength() > index_word_begin + word_to_match.GetLength() &&

1036 !IsSpaceW (query.GetAt (index_word_begin + word_to_match.GetLength()))) {

1037 goto LoopEnd;

1038 }

1039 }

1040

1041 // It fits all the requirements, so return the index to the beginning of the word.

1042 return index_word_begin;

1043

1044 LoopEnd:

1045 ++index_word_begin;

1046

1047 } while (-1 != index_word_begin);

1048

1049 return index_word_begin;

1050 }

1051

1052 // --------------------------------------------------------

1053 // Do whole-word replacement in "str".

1054 void ReplaceWholeWord (const CString &string_to_replace,

1055 const CString &replacement,

1056 const bool trim_whitespace,

1057 CString *str) {

1058 ASSERT (str, (L"ReplaceWholeWord"));

1059

1060 if (string_to_replace.IsEmpty() \|\| str->IsEmpty()) {

1061 return;

1062 }

1063

1064 int index_str = 0;

1065 do {

1066 index_str = FindWholeWordMatch (*str, string_to_replace, false, index_str);

1067

1068 if (-1 != index_str) {

1069 // Get the strings before and after, and trim whitespace.

1070 CString str_before_word(str->Left (index_str));

1071 if (trim_whitespace) {

1072 str_before_word.TrimRight();

1073 }

1074

1075 CString str_after_word(str->Mid (index_str + string_to_replace.GetLength() ));

1076 if (trim_whitespace) {

1077 str_after_word.TrimLeft();

1078 }

1079

1080 *str = str_before_word + replacement + str_after_word;

1081 index_str += replacement.GetLength() + 1;

1082 }

1083 } while (index_str != -1);

1084 }

1085

1086 // --------------------------------------------------------

1087 // Reverse (big-endian<->little-endian) the shorts that make up

1088 // Unicode characters in a byte array of Unicode chars

1089 HRESULT ReverseUnicodeByteOrder(byte* unicode_string, int size_in_bytes) {

1090 ASSERT (unicode_string, (L""));

1091

1092 // If odd # of bytes, just leave the last one alone

1093 for (int i = 0; i < size_in_bytes - 1; i += 2) {

1094 byte b = unicode_string[i];

1095 unicode_string[i] = unicode_string[i+1];

1096 unicode_string[i+1] = b;

1097 }

1098

1099 return S_OK;

1100 }

1101

1102 // case insensitive strstr

1103 // adapted from http://c.snippets.org/snip_lister.php?fname=stristr.c

1104 const char stristr(const char string, const char *pattern)

1105 {

1106 ASSERT (pattern, (L""));

1107 ASSERT (string, (L""));

1108 ASSERT (string && pattern, (L""));

1109 char pattern_ptr, string_ptr;

1110 const char *start;

1111

1112 for (start = string; *start != 0; start++)

1113 {

1114 // find start of pattern in string

1115 for ( ; ((start!=0) && (String_ToUpperA(start) != String_ToUpperA(*pattern ))); start++)

1116 ;

1117 if (0 == *start)

1118 return NULL;

1119

1120 pattern_ptr = (char *)pattern;

1121 string_ptr = (char *)start;

1122

1123 while (String_ToUpperA(string_ptr) == String_ToUpperA(pattern_ptr))

1124 {

1125 string_ptr++;

1126 pattern_ptr++;

1127

1128 // if end of pattern then pattern was found

1129 if (0 == *pattern_ptr)

1130 return (start);

1131 }

1132 }

1133

1134 return NULL;

1135 }

1136

1137 // case insensitive Unicode strstr

1138 // adapted from http://c.snippets.org/snip_lister.php?fname=stristr.c

1139 const WCHAR stristrW(const WCHAR string, const WCHAR *pattern)

1140 {

1141 ASSERT (pattern, (L""));

1142 ASSERT (string, (L""));

1143 ASSERT (string && pattern, (L""));

1144 const WCHAR *start;

1145

1146 for (start = string; *start != 0; start++)

1147 {

1148 // find start of pattern in string

1149 for ( ; ((start!=0) && (String_ToUpper(start) != String_ToUpper(*pattern)) ); start++)

1150 ;

1151 if (0 == *start)

1152 return NULL;

1153

1154 const WCHAR *pattern_ptr = pattern;

1155 const WCHAR *string_ptr = start;

1156

1157 while (String_ToUpper(string_ptr) == String_ToUpper(pattern_ptr))

1158 {

1159 string_ptr++;

1160 pattern_ptr++;

1161

1162 // if end of pattern then pattern was found

1163 if (0 == *pattern_ptr)

1164 return (start);

1165 }

1166 }

1167

1168 return NULL;

1169 }

1170

1171 // case sensitive Unicode strstr

1172 // adapted from http://c.snippets.org/snip_lister.php?fname=stristr.c

1173 const WCHAR strstrW(const WCHAR string, const WCHAR *pattern)

1174 {

1175 ASSERT (pattern, (L""));

1176 ASSERT (string, (L""));

1177 ASSERT (string && pattern, (L""));

1178 const WCHAR *start;

1179

1180 for (start = string; *start != 0; start++)

1181 {

1182 // find start of pattern in string

1183 for ( ; ((start!=0) && (start != *pattern)); start++)

1184 ;

1185 if (0 == *start)

1186 return NULL;

1187

1188 const WCHAR *pattern_ptr = pattern;

1189 const WCHAR *string_ptr = start;

1190

1191 while (string_ptr == pattern_ptr)

1192 {

1193 string_ptr++;

1194 pattern_ptr++;

1195

1196 // if end of pattern then pattern was found

1197 if (0 == *pattern_ptr)

1198 return (start);

1199 }

1200 }

1201

1202 return NULL;

1203 }

1204

1205 // -------------------------------------------------------------------------

1206 // Helper function

1207 float GetLenWithWordWrap (const float len_so_far,

1208 const float len_to_add,

1209 const uint32 len_line) {

1210 // lint -save -e414 Possible division by 0

1211 ASSERT (len_line != 0, (L""));

1212

1213 float len_total = len_so_far + len_to_add;

1214

1215 // Figure out if we need to word wrap by seeing if adding the second

1216 // string will cause us to span more lines than before.

1217 uint32 num_lines_before = static_cast<uint32> (len_so_far / len_line);

1218 uint32 num_lines_after = static_cast<uint32> (len_total / len_line);

1219

1220 // If it just barely fit onto the line, do not wrap to the next line.

1221 if (num_lines_after > 0 && (len_total / len_line - num_lines_after == 0)) {

1222 --num_lines_after;

1223 }

1224

1225 if (num_lines_after > num_lines_before) {

1226 // Need to word wrap.

1227 // lint -e{790} Suspicious truncation

1228 return num_lines_after * len_line + len_to_add;

1229 }

1230 else

1231 return len_total;

1232

1233 // lint -restore

1234 }

1235

1236 int CalculateBase64EscapedLen(int input_len, bool do_padding) {

1237 // these formulae were copied from comments that used to go with the base64

1238 // encoding functions

1239 int intermediate_result = 8 * input_len + 5;

1240 ASSERT(intermediate_result > 0,(L"")); // make sure we didn't overflow

1241 int len = intermediate_result / 6;

1242 if (do_padding) len = ((len + 3) / 4) * 4;

1243 return len;

1244 }

1245

1246 // Base64Escape does padding, so this calculation includes padding.

1247 int CalculateBase64EscapedLen(int input_len) {

1248 return CalculateBase64EscapedLen(input_len, true);

1249 }

1250

1251 // Base64Escape

1252 // Largely based on b2a_base64 in google/docid_encryption.c

1253 //

1254 //

1255 int Base64EscapeInternal(const char *src, int szsrc,

1256 char dest, int szdest, const char base64,

1257 bool do_padding)

1258 {

1259 ASSERT(base64, (L""));

1260 ASSERT(dest, (L""));

1261 ASSERT(src, (L""));

1262

1263 static const char kPad64 = '=';

1264

1265 if (szsrc <= 0) return 0;

1266

1267 char *cur_dest = dest;

1268 const unsigned char cur_src = reinterpret_cast<const unsigned char>(src);

1269

1270 // Three bytes of data encodes to four characters of cyphertext.

1271 // So we can pump through three-byte chunks atomically.

1272 while (szsrc > 2) { /* keep going until we have less than 24 bits */

1273 if( (szdest -= 4) < 0 ) return 0;

1274 cur_dest[0] = base64[cur_src[0] >> 2];

1275 cur_dest[1] = base64[((cur_src[0] & 0x03) << 4) + (cur_src[1] >> 4)];

1276 cur_dest[2] = base64[((cur_src[1] & 0x0f) << 2) + (cur_src[2] >> 6)];

1277 cur_dest[3] = base64[cur_src[2] & 0x3f];

1278

1279 cur_dest += 4;

1280 cur_src += 3;

1281 szsrc -= 3;

1282 }

1283

1284 /* now deal with the tail (<=2 bytes) */

1285 switch (szsrc) {

1286 case 0:

1287 // Nothing left; nothing more to do.

1288 break;

1289 case 1:

1290 // One byte left: this encodes to two characters, and (optionally)

1291 // two pad characters to round out the four-character cypherblock.

1292 if( (szdest -= 2) < 0 ) return 0;

1293 cur_dest[0] = base64[cur_src[0] >> 2];

1294 cur_dest[1] = base64[(cur_src[0] & 0x03) << 4];

1295 cur_dest += 2;

1296 if (do_padding) {

1297 if( (szdest -= 2) < 0 ) return 0;

1298 cur_dest[0] = kPad64;

1299 cur_dest[1] = kPad64;

1300 cur_dest += 2;

1301 }

1302 break;

1303 case 2:

1304 // Two bytes left: this encodes to three characters, and (optionally)

1305 // one pad character to round out the four-character cypherblock.

1306 if( (szdest -= 3) < 0 ) return 0;

1307 cur_dest[0] = base64[cur_src[0] >> 2];

1308 cur_dest[1] = base64[((cur_src[0] & 0x03) << 4) + (cur_src[1] >> 4)];

1309 cur_dest[2] = base64[(cur_src[1] & 0x0f) << 2];

1310 cur_dest += 3;

1311 if (do_padding) {

1312 if( (szdest -= 1) < 0 ) return 0;

1313 cur_dest[0] = kPad64;

1314 cur_dest += 1;

1315 }

1316 break;

1317 default:

1318 // Should not be reached: blocks of 3 bytes are handled

1319 // in the while loop before this switch statement.

1320 ASSERT(false, (L"Logic problem? szsrc = %S",szsrc));

1321 break;

1322 }

1323 return (cur_dest - dest);

1324 }

1325

1326 #define kBase64Chars "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz01234 56789+/"

1327

1328 #define kWebSafeBase64Chars "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxy z0123456789-_"

1329

1330 int Base64Escape(const char src, int szsrc, char dest, int szdest) {

1331 ASSERT(dest, (L""));

1332 ASSERT(src, (L""));

1333

1334 return Base64EscapeInternal(src, szsrc, dest, szdest, kBase64Chars, true);

1335 }

1336 int WebSafeBase64Escape(const char src, int szsrc, char dest,

1337 int szdest, bool do_padding) {

1338 ASSERT(dest, (L""));

1339 ASSERT(src, (L""));

1340

1341 return Base64EscapeInternal(src, szsrc, dest, szdest,

1342 kWebSafeBase64Chars, do_padding);

1343 }

1344

1345 void Base64Escape(const char *src, int szsrc,

1346 CStringA* dest, bool do_padding)

1347 {

1348 ASSERT(src, (L""));

1349 ASSERT(dest,(L""));

1350 const int max_escaped_size = CalculateBase64EscapedLen(szsrc, do_padding);

1351 dest->Empty();

1352 const int escaped_len = Base64EscapeInternal(src, szsrc,

1353 dest->GetBufferSetLength(max_escaped_size + 1), max_escaped_size + 1,

1354 kBase64Chars,

1355 do_padding);

1356 ASSERT(max_escaped_size <= escaped_len,(L""));

1357 dest->ReleaseBuffer(escaped_len);

1358 }

1359

1360 void WebSafeBase64Escape(const char *src, int szsrc,

1361 CStringA *dest, bool do_padding)

1362 {

1363 ASSERT(src, (L""));

1364 ASSERT(dest,(L""));

1365 const int max_escaped_size =

1366 CalculateBase64EscapedLen(szsrc, do_padding);

1367 dest->Empty();

1368 const int escaped_len = Base64EscapeInternal(src, szsrc,

1369 dest->GetBufferSetLength(max_escaped_size + 1), max_escaped_size + 1,

1370 kWebSafeBase64Chars,

1371 do_padding);

1372 ASSERT(max_escaped_size <= escaped_len,(L""));

1373 dest->ReleaseBuffer(escaped_len);

1374 }

1375

1376 void WebSafeBase64Escape(const CStringA& src, CStringA* dest) {

1377 ASSERT(dest,(L""));

1378 int encoded_len = CalculateBase64EscapedLen(src.GetLength());

1379 scoped_array<char> buf(new char[encoded_len]);

1380 int len = WebSafeBase64Escape(src,src.GetLength(), buf.get(), encoded_len, fal se);

1381 dest->SetString(buf.get(), len);

1382 }

1383

1384 // ----------------------------------------------------------------------

1385 // int Base64Unescape() - base64 decoder

1386 //

1387 // Check out

1388 // http://www.cis.ohio-state.edu/htbin/rfc/rfc2045.html for formal

1389 // description, but what we care about is that...

1390 // Take the encoded stuff in groups of 4 characters and turn each

1391 // character into a code 0 to 63 thus:

1392 // A-Z map to 0 to 25

1393 // a-z map to 26 to 51

1394 // 0-9 map to 52 to 61

1395 // +(- for WebSafe) maps to 62

1396 // /(_ for WebSafe) maps to 63

1397 // There will be four numbers, all less than 64 which can be represented

1398 // by a 6 digit binary number (aaaaaa, bbbbbb, cccccc, dddddd respectively).

1399 // Arrange the 6 digit binary numbers into three bytes as such:

1400 // aaaaaabb bbbbcccc ccdddddd

1401 // Equals signs (one or two) are used at the end of the encoded block to

1402 // indicate that the text was not an integer multiple of three bytes long.

1403 // ----------------------------------------------------------------------

1404 int Base64UnescapeInternal(const char *src, int len_src,

1405 char dest, int len_dest, const char unbase64) {

1406 ASSERT (unbase64, (L""));

1407 ASSERT (src, (L""));

1408

1409 static const char kPad64 = '=';

1410

1411 int decode;

1412 int destidx = 0;

1413 int state = 0;

1414 // Used an unsigned char, since ch is used as an array index (into unbase64).

1415 unsigned char ch = 0;

1416 while (len_src-- && (ch = *src++) != '\0') {

1417 if (IsSpaceA(ch)) // Skip whitespace

1418 continue;

1419

1420 if (ch == kPad64)

1421 break;

1422

1423 decode = unbase64[ch];

1424 if (decode == 99) // A non-base64 character

1425 return (-1);

1426

1427 // Four cyphertext characters decode to three bytes.

1428 // Therefore we can be in one of four states.

1429 switch (state) {

1430 case 0:

1431 // We're at the beginning of a four-character cyphertext block.

1432 // This sets the high six bits of the first byte of the

1433 // plaintext block.

1434 if (dest) {

1435 if (destidx >= len_dest)

1436 return (-1);

1437 // lint -e{734} Loss of precision

1438 dest[destidx] = static_cast<char>(decode << 2);

1439 }

1440 state = 1;

1441 break;

1442 case 1:

1443 // We're one character into a four-character cyphertext block.

1444 // This sets the low two bits of the first plaintext byte,

1445 // and the high four bits of the second plaintext byte.

1446 // However, if this is the end of data, and those four

1447 // bits are zero, it could be that those four bits are

1448 // leftovers from the encoding of data that had a length

1449 // of one mod three.

1450 if (dest) {

1451 if (destidx >= len_dest)

1452 return (-1);

1453 // lint -e{734} Loss of precision

1454 dest[destidx] \|= decode >> 4;

1455 if (destidx + 1 >= len_dest) {

1456 if (0 != (decode & 0x0f))

1457 return (-1);

1458 else

1459 ;

1460 } else {

1461 // lint -e{734} Loss of precision

1462 dest[destidx+1] = static_cast<char>((decode & 0x0f) << 4);

1463 }

1464 }

1465 destidx++;

1466 state = 2;

1467 break;

1468 case 2:

1469 // We're two characters into a four-character cyphertext block.

1470 // This sets the low four bits of the second plaintext

1471 // byte, and the high two bits of the third plaintext byte.

1472 // However, if this is the end of data, and those two

1473 // bits are zero, it could be that those two bits are

1474 // leftovers from the encoding of data that had a length

1475 // of two mod three.

1476 if (dest) {

1477 if (destidx >= len_dest)

1478 return (-1);

1479 // lint -e{734} Loss of precision

1480 dest[destidx] \|= decode >> 2;

1481 if (destidx +1 >= len_dest) {

1482 if (0 != (decode & 0x03))

1483 return (-1);

1484 else

1485 ;

1486 } else {

1487 // lint -e{734} Loss of precision

1488 dest[destidx+1] = static_cast<char>((decode & 0x03) << 6);

1489 }

1490 }

1491 destidx++;

1492 state = 3;

1493 break;

1494 case 3:

1495 // We're at the last character of a four-character cyphertext block.

1496 // This sets the low six bits of the third plaintext byte.

1497 if (dest) {

1498 if (destidx >= len_dest)

1499 return (-1);

1500 // lint -e{734} Loss of precision

1501 dest[destidx] \|= decode;

1502 }

1503 destidx++;

1504 state = 0;

1505 break;

1506

1507 default:

1508 ASSERT (false, (L""));

1509 break;

1510 }

1511 }

1512

1513 // We are done decoding Base-64 chars. Let's see if we ended

1514 // on a byte boundary, and/or with erroneous trailing characters.

1515 if (ch == kPad64) { // We got a pad char

1516 if ((state == 0) \|\| (state == 1))

1517 return (-1); // Invalid '=' in first or second position

1518 if (len_src == 0) {

1519 if (state == 2) // We run out of input but we still need another '='

1520 return (-1);

1521 // Otherwise, we are in state 3 and only need this '='

1522 } else {

1523 if (state == 2) { // need another '='

1524 while ((ch = *src++) != '\0' && (len_src-- > 0)) {

1525 if (!IsSpaceA(ch))

1526 break;

1527 }

1528 if (ch != kPad64)

1529 return (-1);

1530 }

1531 // state = 1 or 2, check if all remain padding is space

1532 while ((ch = *src++) != '\0' && (len_src-- > 0)) {

1533 if (!IsSpaceA(ch))

1534 return(-1);

1535 }

1536 }

1537 } else {

1538 // We ended by seeing the end of the string. Make sure we

1539 // have no partial bytes lying around. Note that we

1540 // do not require trailing '=', so states 2 and 3 are okay too.

1541 if (state == 1)

1542 return (-1);

1543 }

1544

1545 return (destidx);

1546 }

1547

1548 int Base64Unescape(const char src, int len_src, char dest, int len_dest) {

1549 ASSERT(dest, (L""));

1550 ASSERT(src, (L""));

1551

1552 static const char UnBase64[] = {

1553 99, 99, 99, 99, 99, 99, 99, 99,

1554 99, 99, 99, 99, 99, 99, 99, 99,

1555 99, 99, 99, 99, 99, 99, 99, 99,

1556 99, 99, 99, 99, 99, 99, 99, 99,

1557 99, 99, 99, 99, 99, 99, 99, 99,

1558 99, 99, 99, 62/+/, 99, 99, 99, 63// /,

1559 52/0/, 53/1/, 54/2/, 55/3/, 56/4/, 57/5/, 58/6/, 59/7/,

1560 60/8/, 61/9/, 99, 99, 99, 99, 99, 99,

1561 99, 0/A/, 1/B/, 2/C/, 3/D/, 4/E/, 5/F/, 6/G/,

1562 7/H/, 8/I/, 9/J/, 10/K/, 11/L/, 12/M/, 13/N/, 14/O/,

1563 15/P/, 16/Q/, 17/R/, 18/S/, 19/T/, 20/U/, 21/V/, 22/W/,

1564 23/X/, 24/Y/, 25/Z/, 99, 99, 99, 99, 99,

1565 99, 26/a/, 27/b/, 28/c/, 29/d/, 30/e/, 31/f/, 32/g/,

1566 33/h/, 34/i/, 35/j/, 36/k/, 37/l/, 38/m/, 39/n/, 40/o/,

1567 41/p/, 42/q/, 43/r/, 44/s/, 45/t/, 46/u/, 47/v/, 48/w/,

1568 49/x/, 50/y/, 51/z/, 99, 99, 99, 99, 99,

1569 99, 99, 99, 99, 99, 99, 99, 99,

1570 99, 99, 99, 99, 99, 99, 99, 99,

1571 99, 99, 99, 99, 99, 99, 99, 99,

1572 99, 99, 99, 99, 99, 99, 99, 99,

1573 99, 99, 99, 99, 99, 99, 99, 99,

1574 99, 99, 99, 99, 99, 99, 99, 99,

1575 99, 99, 99, 99, 99, 99, 99, 99,

1576 99, 99, 99, 99, 99, 99, 99, 99,

1577 99, 99, 99, 99, 99, 99, 99, 99,

1578 99, 99, 99, 99, 99, 99, 99, 99,

1579 99, 99, 99, 99, 99, 99, 99, 99,

1580 99, 99, 99, 99, 99, 99, 99, 99,

1581 99, 99, 99, 99, 99, 99, 99, 99,

1582 99, 99, 99, 99, 99, 99, 99, 99,

1583 99, 99, 99, 99, 99, 99, 99, 99,

1584 99, 99, 99, 99, 99, 99, 99, 99

1585 };

1586

1587 // The above array was generated by the following code

1588 // #include <sys/time.h>

1589 // #include <stdlib.h>

1590 // #include <string.h>

1591 // main()

1592 // {

1593 // static const char Base64[] =

1594 // "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";

1595 // char *pos;

1596 // int idx, i, j;

1597 // printf(" ");

1598 // for (i = 0; i < 255; i += 8) {

1599 // for (j = i; j < i + 8; j++) {

1600 // pos = strchr(Base64, j);

1601 // if ((pos == NULL) \|\| (j == 0))

1602 // idx = 99;

1603 // else

1604 // idx = pos - Base64;

1605 // if (idx == 99)

1606 // printf(" %2d, ", idx);

1607 // else

1608 // printf(" %2d/%c/,", idx, j);

1609 // }

1610 // printf("\n ");

1611 // }

1612 // }

1613

1614 return Base64UnescapeInternal(src, len_src, dest, len_dest, UnBase64);

1615 }

1616

1617 int WebSafeBase64Unescape(const char src, int szsrc, char dest, int szdest) {

1618 ASSERT(dest, (L""));

1619 ASSERT(src, (L""));

1620

1621 static const char UnBase64[] = {

1622 99, 99, 99, 99, 99, 99, 99, 99,

1623 99, 99, 99, 99, 99, 99, 99, 99,

1624 99, 99, 99, 99, 99, 99, 99, 99,

1625 99, 99, 99, 99, 99, 99, 99, 99,

1626 99, 99, 99, 99, 99, 99, 99, 99,

1627 99, 99, 99, 99, 99, 62/-/, 99, 99,

1628 52/0/, 53/1/, 54/2/, 55/3/, 56/4/, 57/5/, 58/6/, 59/7/,

1629 60/8/, 61/9/, 99, 99, 99, 99, 99, 99,

1630 99, 0/A/, 1/B/, 2/C/, 3/D/, 4/E/, 5/F/, 6/G/,

1631 7/H/, 8/I/, 9/J/, 10/K/, 11/L/, 12/M/, 13/N/, 14/O/,

1632 15/P/, 16/Q/, 17/R/, 18/S/, 19/T/, 20/U/, 21/V/, 22/W/,

1633 23/X/, 24/Y/, 25/Z/, 99, 99, 99, 99, 63/_/,

1634 99, 26/a/, 27/b/, 28/c/, 29/d/, 30/e/, 31/f/, 32/g/,

1635 33/h/, 34/i/, 35/j/, 36/k/, 37/l/, 38/m/, 39/n/, 40/o/,

1636 41/p/, 42/q/, 43/r/, 44/s/, 45/t/, 46/u/, 47/v/, 48/w/,

1637 49/x/, 50/y/, 51/z/, 99, 99, 99, 99, 99,

1638 99, 99, 99, 99, 99, 99, 99, 99,

1639 99, 99, 99, 99, 99, 99, 99, 99,

1640 99, 99, 99, 99, 99, 99, 99, 99,

1641 99, 99, 99, 99, 99, 99, 99, 99,

1642 99, 99, 99, 99, 99, 99, 99, 99,

1643 99, 99, 99, 99, 99, 99, 99, 99,

1644 99, 99, 99, 99, 99, 99, 99, 99,

1645 99, 99, 99, 99, 99, 99, 99, 99,

1646 99, 99, 99, 99, 99, 99, 99, 99,

1647 99, 99, 99, 99, 99, 99, 99, 99,

1648 99, 99, 99, 99, 99, 99, 99, 99,

1649 99, 99, 99, 99, 99, 99, 99, 99,

1650 99, 99, 99, 99, 99, 99, 99, 99,

1651 99, 99, 99, 99, 99, 99, 99, 99,

1652 99, 99, 99, 99, 99, 99, 99, 99,

1653 99, 99, 99, 99, 99, 99, 99, 99

1654 };

1655 // The above array was generated by the following code

1656 // #include <sys/time.h>

1657 // #include <stdlib.h>

1658 // #include <string.h>

1659 // main()

1660 // {

1661 // static const char Base64[] =

1662 // "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";

1663 // char *pos;

1664 // int idx, i, j;

1665 // printf(" ");

1666 // for (i = 0; i < 255; i += 8) {

1667 // for (j = i; j < i + 8; j++) {

1668 // pos = strchr(Base64, j);

1669 // if ((pos == NULL) \|\| (j == 0))

1670 // idx = 99;

1671 // else

1672 // idx = pos - Base64;

1673 // if (idx == 99)

1674 // printf(" %2d, ", idx);

1675 // else

1676 // printf(" %2d/%c/,", idx, j);

1677 // }

1678 // printf("\n ");

1679 // }

1680 // }

1681

1682 return Base64UnescapeInternal(src, szsrc, dest, szdest, UnBase64);

1683 }

1684

1685 bool IsHexDigit (WCHAR c) {

1686 return (((c >= L'a') && (c <= L'f'))

1687 \|\| ((c >= L'A') && (c <= L'F'))

1688 \|\| ((c >= L'0') && (c <= L'9')));

1689 }

1690

1691 int HexDigitToInt (WCHAR c) {

1692 return ((c >= L'a') ? ((c - L'a') + 10) :

1693 (c >= L'A') ? ((c - L'A') + 10) :

1694 (c - L'0'));

1695 }

1696

1697 // ----------------------------------------------------------------------

1698 // int QuotedPrintableUnescape()

1699 //

1700 // Check out http://www.cis.ohio-state.edu/htbin/rfc/rfc2045.html for

1701 // more details, only briefly implemented. But from the web...

1702 // Quoted-printable is an encoding method defined in the MIME

1703 // standard. It is used primarily to encode 8-bit text (such as text

1704 // that includes foreign characters) into 7-bit US ASCII, creating a

1705 // document that is mostly readable by humans, even in its encoded

1706 // form. All MIME compliant applications can decode quoted-printable

1707 // text, though they may not necessarily be able to properly display the

1708 // document as it was originally intended. As quoted-printable encoding

1709 // is implemented most commonly, printable ASCII characters (values 33

1710 // through 126, excluding 61), tabs and spaces that do not appear at the

1711 // end of lines, and end-of-line characters are not encoded. Other

1712 // characters are represented by an equal sign (=) immediately followed

1713 // by that character's hexadecimal value. Lines that are longer than 76

1714 // characters are shortened by line breaks, with the equal sign marking

1715 // where the breaks occurred.

1716 //

1717 // Update: we really want QuotedPrintableUnescape to conform to rfc2047,

1718 // which expands the q encoding. In particular, it specifices that _'s are

1719 // to be treated as spaces.

1720 // ----------------------------------------------------------------------

1721 int QuotedPrintableUnescape(const WCHAR *source, int slen,

1722 WCHAR *dest, int len_dest) {

1723 ASSERT(dest, (L""));

1724 ASSERT(source, (L""));

1725

1726 WCHAR* d = dest;

1727 const WCHAR* p = source;

1728

1729 while (*p != '\0' && p < source+slen && d < dest+len_dest) {

1730 switch (*p) {

1731 case '=':

1732 if (p == source+slen-1) {

1733 // End of line, no need to print the =..

1734 return (d-dest);

1735 }

1736 // if its valid, convert to hex and insert

1737 if (p < source+slen-2 && IsHexDigit(p[1]) && IsHexDigit(p[2])) {

1738 // lint -e{734} Loss of precision

1739 *d++ = static_cast<WCHAR>(

1740 HexDigitToInt(p[1]) * 16 + HexDigitToInt(p[2]));

1741 p += 3;

1742 } else {

1743 p++;

1744 }

1745 break;

1746 case '_': // According to rfc2047, _'s are to be treated as spaces

1747 *d++ = ' '; p++;

1748 break;

1749 default:

1750 d++ = p++;

1751 break;

1752 }

1753 }

1754 return (d-dest);

1755 }

1756

1757 // TODO(omaha): currently set not to use IsCharUpper because that is relatively slow

1758 // this is used in the QUIB; consider if we need to use IsCharUpper or a replace ment

1759 bool String_IsUpper(TCHAR c) {

1760 return (c >= 'A' && c <= 'Z');

1761 // return (IsCharUpper (c));

1762 }

1763

1764 // Replacement for the CRT toupper(c)

1765 int String_ToUpper(int c) {

1766 // If it's < 128, then convert is ourself, which is far cheaper than the syste m conversion

1767 if (c < 128)

1768 return String_ToUpperA(static_cast<char>(c));

1769

1770 TCHAR * p_c = reinterpret_cast<TCHAR *>(c);

1771 int conv_c = reinterpret_cast<int>(::CharUpper(p_c));

1772 return conv_c;

1773 }

1774

1775 // Replacement for the CRT toupper(c)

1776 char String_ToUpperA(char c) {

1777 if (c >= 'a' && c <= 'z') return (c - ('a' - 'A'));

1778 return c;

1779 }

1780

1781 void String_ToLower(TCHAR* str) {

1782 ASSERT1(str);

1783 ::CharLower(str);

1784 }

1785

1786 void String_ToUpper(TCHAR* str) {

1787 ASSERT1(str);

1788 ::CharUpper(str);

1789 }

1790

1791 // String comparison based on length

1792 // Replacement for the CRT strncmp(i)

1793 int String_StrNCmp(const TCHAR * str1, const TCHAR * str2, uint32 len, bool igno re_case) {

1794 ASSERT(str2, (L""));

1795 ASSERT(str1, (L""));

1796

1797 TCHAR c1, c2;

1798

1799 if (len == 0)

1800 return 0;

1801

1802 // compare each char

1803 // TODO(omaha): If we use a lot of case sensitive compares consider having 2 l oops.

1804 do {

1805 c1 = *str1++;

1806 c2 = *str2++;

1807 if (ignore_case) {

1808 c1 = (TCHAR)String_ToLowerChar((int)(c1)); // lint !e507 Suspicious trun cation

1809 c2 = (TCHAR)String_ToLowerChar((int)(c2)); // lint !e507

1810 }

1811 } while ( (--len) && c1 && (c1 == c2) );

1812

1813 return (int)(c1 - c2);

1814 }

1815

1816 // TODO(omaha): Why do we introduce this behaviorial difference?

1817 // Replacement for strncpy() - except ALWAYS ends string with null

1818 TCHAR* String_StrNCpy(TCHAR* destination, const TCHAR* source, uint32 len) {

1819 ASSERT (source, (L""));

1820 ASSERT (destination, (L""));

1821

1822 TCHAR* result = destination;

1823

1824 ASSERT (0 != len, (L"")); // Too short a destination for even the null cha racter

1825

1826 while (*source && len) {

1827 destination++ = source++;

1828 len--;

1829 }

1830

1831 // If we ran out of space, back up one

1832 if (0 == len) {

1833 destination--;

1834 }

1835

1836 // Null-terminate the string

1837 *destination = _T('\0');

1838

1839 return result;

1840 }

1841

1842 // check if a string starts with another string

1843 bool String_StartsWith(const TCHAR str, const TCHAR start_str,

1844 bool ignore_case) {

1845 ASSERT(start_str, (L""));

1846 ASSERT(str, (L""));

1847

1848 while (0 != *str) {

1849 // Check for matching characters

1850 TCHAR c1 = *str;

1851 TCHAR c2 = *start_str;

1852

1853 // Reached the end of start_str?

1854 if (0 == c2)

1855 return true;

1856

1857 if (ignore_case) {

1858 c1 = (TCHAR)String_ToLowerChar((int)(c1)); // lint !e507 Suspicious trun cation

1859 c2 = (TCHAR)String_ToLowerChar((int)(c2)); // lint !e507 Suspicious trun cation

1860 }

1861

1862 if (c1 != c2)

1863 return false;

1864

1865 ++str;

1866 ++start_str;

1867 }

1868

1869 // If str is shorter than start_str, no match. If equal size, match.

1870 return 0 == *start_str;

1871 }

1872

1873 // check if a string starts with another string

1874 bool String_StartsWithA(const char str, const char start_str, bool ignore_case ) {

1875 ASSERT(start_str, (L""));

1876 ASSERT(str, (L""));

1877

1878 while (0 != *str) {

1879 // Check for matching characters

1880 char c1 = *str;

1881 char c2 = *start_str;

1882

1883 // Reached the end of start_str?

1884 if (0 == c2)

1885 return true;

1886

1887 if (ignore_case) {

1888 c1 = String_ToLowerCharAnsi(c1);

1889 c2 = String_ToLowerCharAnsi(c2);

1890 }

1891

1892 if (c1 != c2)

1893 return false;

1894

1895 ++str;

1896 ++start_str;

1897 }

1898

1899 // If str is shorter than start_str, no match. If equal size, match.

1900 return 0 == *start_str;

1901 }

1902

1903 // the wrapper version below actually increased code size as of 5/31/04

1904 // perhaps because the int64 version is larger and in some EXE/DLLs we only need the int32 version

1905

1906 // converts a string to an int

1907 // Does not check for overflow

1908 // is the direct int32 version significantly faster for our usage?

1909 // int32 String_StringToInt(const TCHAR * str) {

1910 // ASSERT(str, (L""));

1911 // return static_cast<int32>(String_StringToInt64 (str));

1912 // }

1913

1914 // converts a string to an int

1915 // Does not check for overflow

1916 int32 String_StringToInt(const TCHAR * str) {

1917 ASSERT(str, (L""));

1918

1919 int c; // current char

1920 int32 total; // current total

1921 int sign; // if '-', then negative, otherwise positive

1922

1923 // remove spaces

1924 while ( *str == _T(' '))

1925 ++str;

1926

1927 c = (int)*str++;

1928 sign = c; // save sign indication

1929 if (c == _T('-') \|\| c == _T('+'))

1930 c = (int)*str++; // skip sign

1931

1932 total = 0;

1933

1934 while ((c = String_CharToDigit(static_cast<TCHAR>(c))) != -1 ) {

1935 total = 10 * total + c; // accumulate digit

1936 c = *str++; // get next char

1937 }

1938

1939 if (sign == '-')

1940 return -total;

1941 else

1942 return total; // return result, negated if necessary

1943 }

1944

1945 // converts a string to an int64

1946 // Does not check for overflow

1947 int64 String_StringToInt64(const TCHAR * str) {

1948 ASSERT(str, (L""));

1949

1950 int c; // current char

1951 int64 total; // current total

1952 int sign;

1953

1954 while (*str == ' ') ++str; // skip space

1955

1956 c = (int)*str++;

1957 sign = c; /* save sign indication */

1958 if (c == '-' \|\| c == '+')

1959 c = (int)*str++;

1960

1961 total = 0;

1962

1963 while ((c = String_CharToDigit(static_cast<TCHAR>(c))) != -1) {

1964 total = 10 * total + c; /* accumulate digit */

1965 c = str++; / get next char */

1966 }

1967

1968 if (sign == '-')

1969 return -total;

1970 else

1971 return total;

1972 }

1973

1974 // A faster version of the ::CharLower command. We first check if all characters are in low ANSI

1975 // If so, we can convert it ourselves [which is about 10x faster]

1976 // Otherwise, ask the system to do it for us.

1977 TCHAR * String_FastToLower(TCHAR * str) {

1978 ASSERT(str, (L""));

1979

1980 TCHAR * p = str;

1981 while (*p) {

1982 // If we can't process it ourselves, then do it with the API

1983 if (*p > 127)

1984 return ::CharLower(str);

1985 ++p;

1986 }

1987

1988 // If we're still here, do it ourselves

1989 p = str;

1990 while (*p) {

1991 // Lower case it

1992 if (p >= L'A' && p <= 'Z')

1993 *p \|= 0x20;

1994 ++p;

1995 }

1996

1997 return str;

1998 }

1999

2000 // Convert a size_t to a CString

2001 CString sizet_to_str(const size_t & i) {

2002 CString out;

2003 out.Format(NOTRANSL(_T("%u")),i);

2004 return out;

2005 }

2006

2007 // Convert an int to a CString

2008 CString itostr(const int i) {

2009 return String_Int64ToString(i, 10);

2010 }

2011

2012 // Convert a uint to a CString

2013 CString itostr(const uint32 i) {

2014 return String_Int64ToString(i, 10);

2015 }

2016

2017 // converts an int to a string

2018 // Does not check for overflow

2019 CString String_Int64ToString(int64 value, int radix) {

2020 ASSERT(radix > 0, (L""));

2021

2022 // Space big enough for it in binary, plus the sign

2023 TCHAR temp[66];

2024

2025 bool negative = false;

2026 if (value < 0) {

2027 negative = true;

2028 value = -value;

2029 }

2030

2031 int pos = 0;

2032

2033 // Add digits in reverse order

2034 do {

2035 TCHAR digit = (TCHAR) (value % radix);

2036 if (digit > 9)

2037 temp[pos] = L'a' + digit - 10;

2038 else

2039 temp[pos] = L'0' + digit;

2040

2041 pos++;

2042 value /= radix;

2043 } while (value > 0);

2044

2045 if (negative)

2046 temp[pos++] = L'-';

2047

2048 // Reverse it before making a CString out of it

2049 int start = 0, end = pos - 1;

2050 while (start < end) {

2051 TCHAR t = temp[start];

2052 temp[start] = temp[end];

2053 temp[end] = t;

2054

2055 end--;

2056 start++;

2057 }

2058

2059 return CString(temp, pos);

2060 }

2061

2062 // converts an uint64 to a string

2063 // Does not check for overflow

2064 CString String_Uint64ToString(uint64 value, int radix) {

2065 ASSERT1(radix > 0);

2066

2067 CString ret;

2068

2069 const uint32 kMaxUint64Digits = 65;

2070

2071 // Space big enough for it in binary

2072 TCHAR* temp = ret.GetBufferSetLength(kMaxUint64Digits);

2073

2074 int pos = 0;

2075

2076 // Add digits in reverse order

2077 do {

2078 TCHAR digit = static_cast<TCHAR>(value % radix);

2079 if (digit > 9) {

2080 temp[pos] = _T('a') + digit - 10;

2081 } else {

2082 temp[pos] = _T('0') + digit;

2083 }

2084

2085 pos++;

2086 value /= radix;

2087 } while (value > 0 && pos < kMaxUint64Digits);

2088

2089 ret.ReleaseBuffer(pos);

2090

2091 // Reverse it before making a CString out of it

2092 ret.MakeReverse();

2093

2094 return ret;

2095 }

2096

2097 // converts an double to a string specifies the number of digits after

2098 // the decimal point

2099 CString String_DoubleToString(double value, int point_digits) {

2100 int64 int_val = (int64) value;

2101

2102 // Deal with integer part

2103 CString result(String_Int64ToString(int_val, 10));

2104

2105 if (point_digits > 0) {

2106 result.AppendChar(L'.');

2107

2108 // get the fp digits

2109 double rem_val = value - int_val;

2110 if (rem_val < 0)

2111 rem_val = -rem_val;

2112

2113 // multiply w/ the requested number of significant digits

2114 // construct the string in place

2115 for(int i=0; i<point_digits; i++) {

2116 // TODO(omaha): I have seen 1.2 turn into 1.1999999999999, and generate th at string.

2117 // We should round better. For now, I'll add a quick fix to favor high

2118 rem_val += 1e-12;

2119 rem_val *= 10;

2120 // Get the ones digit

2121 int64 int_rem_dig = std::min(10LL, static_cast<int64>(rem_val));

2122 result += static_cast<TCHAR>(int_rem_dig + L'0');

2123 rem_val = rem_val - int_rem_dig;

2124 }

2125 }

2126

2127 return result;

2128 }

2129

2130 double String_StringToDouble (const TCHAR *s) {

2131 ASSERT(s, (L""));

2132

2133 double value, power;

2134 int i = 0, sign;

2135

2136 while (IsSpaceW(s[i])) i++;

2137

2138 // get sign

2139 sign = (s[i] == '-') ? -1 : 1;

2140 if (s[i] == '+' \|\| s[i] == '-') i++;

2141

2142 for (value = 0.0; s[i] >= '0' && s[i] <= '9'; i++)

2143 value = 10.0 * value + (s[i] - '0');

2144

2145 if (s[i] == '.') i++;

2146

2147 for (power = 1.0; s[i] >= '0' && s[i] <= '9'; i++) {

2148 value = 10.0 * value + (s[i] - '0');

2149 power *= 10.0;

2150 }

2151

2152 return sign * value / power;

2153 }

2154

2155 // Converts a character to a digit

2156 // if the character is not a digit return -1 (same as CRT)

2157 int32 String_CharToDigit(const TCHAR c) {

2158 return ((c) >= '0' && (c) <= '9' ? (c) - '0' : -1);

2159 }

2160

2161 bool String_IsDigit (const TCHAR c) {

2162 return ((c) >= '0' && (c) <= '9');

2163 }

2164

2165 TCHAR String_DigitToChar(unsigned int n) {

2166 ASSERT1(n < 10);

2167 return static_cast<TCHAR>(_T('0') + n % 10);

2168 }

2169

2170 // Returns true if an identifier character: letter, digit, or "_"

2171 bool String_IsIdentifierChar(const TCHAR c) {

2172 return ((c >= _T('A') && c <= _T('Z')) \|\|

2173 (c >= _T('a') && c <= _T('z')) \|\|

2174 (c >= _T('0') && c <= _T('9')) \|\|

2175 c == _T('_'));

2176 }

2177

2178 // Returns true if the string has letters in it.

2179 // This is used by the keyword extractor to downweight numbers,

2180 // IDs (sequences of numbers like social security numbers), etc.

2181 bool String_HasAlphabetLetters (const TCHAR * str) {

2182 ASSERT (str, (L""));

2183

2184 while (*str != '\0') {

2185 // if (iswalpha (*str)) {

2186 // Note that IsCharAlpha is slower but we want to avoid the CRT

2187 if (IsCharAlpha (*str)) {

2188 return true;

2189 }

2190 ++str;

2191 }

2192

2193 return false;

2194 }

2195

2196 CString String_LargeIntToApproximateString(uint64 value, bool base_ten, int* pow er) {

2197 uint32 to_one_decimal;

2198

2199 uint32 gig = base_ten ? 1000000000 : (1<<30);

2200 uint32 gig_div_10 = base_ten ? 100000000 : (1<<30)/10;

2201 uint32 meg = base_ten ? 1000000 : (1<<20);

2202 uint32 meg_div_10 = base_ten ? 100000 : (1<<20)/10;

2203 uint32 kilo = base_ten ? 1000 : (1<<10);

2204 uint32 kilo_div_10 = base_ten ? 100 : (1<<10)/10;

2205

2206 if (value >= gig) {

2207 if (power) *power = 3;

2208 to_one_decimal = static_cast<uint32>(value / gig_div_10);

2209 } else if (value >= meg) {

2210 if (power) *power = 2;

2211 to_one_decimal = static_cast<uint32>(value / meg_div_10);

2212 } else if (value >= kilo) {

2213 if (power) *power = 1;

2214 to_one_decimal = static_cast<uint32>(value / kilo_div_10);

2215 } else {

2216 if (power) *power = 0;

2217 return String_Int64ToString(static_cast<uint32>(value), 10 /radix/);

2218 }

2219

2220 uint32 whole_part = to_one_decimal / 10;

2221

2222 if (whole_part < 10)

2223 return Show(0.1 * static_cast<double>(to_one_decimal), 1);

2224

2225 return String_Int64ToString(whole_part, 10 /radix/);

2226 }

2227

2228 int String_FindString(const TCHAR s1, const TCHAR s2) {

2229 ASSERT(s2, (L""));

2230 ASSERT(s1, (L""));

2231

2232 // Naive implementation, but still oodles better than ATL's implementation

2233 // (which deals with variable character widths---we don't).

2234

2235 const TCHAR *found = _tcsstr(s1, s2);

2236 if (NULL == found)

2237 return -1;

2238

2239 return found - s1;

2240 }

2241

2242 int String_FindString(const TCHAR s1, const TCHAR s2, int start_pos) {

2243 ASSERT(s2, (L""));

2244 ASSERT(s1, (L""));

2245

2246 // Naive implementation, but still oodles better than ATL's implementation

2247 // (which deals with variable character widths---we don't).

2248

2249 int skip = start_pos;

2250

2251 const TCHAR *s = s1;

2252 while (skip && *s) {

2253 ++s;

2254 --skip;

2255 }

2256 if (!(*s))

2257 return -1;

2258

2259 const TCHAR *found = _tcsstr(s, s2);

2260 if (NULL == found)

2261 return -1;

2262

2263 return found - s1;

2264 }

2265

2266 int String_FindChar(const TCHAR *str, const TCHAR c) {

2267 ASSERT (str, (L""));

2268 const TCHAR *s = str;

2269 while (*s) {

2270 if (*s == c)

2271 return s - str;

2272 ++s;

2273 }

2274

2275 return -1;

2276 }

2277

2278 // taken from wcsrchr, modified to behave in the CString way

2279 int String_ReverseFindChar(const TCHAR * str,TCHAR c) {

2280 ASSERT (str, (L""));

2281 TCHAR start = (TCHAR )str;

2282

2283 while (str++) / find end of string */

2284 ;

2285 /* search towards front */

2286 while (--str != start && *str != (TCHAR)c)

2287 ;

2288

2289 if (str == (TCHAR)c) / found ? */

2290 return( str - start );

2291

2292 return -1;

2293 }

2294

2295 int String_FindChar(const TCHAR *str, const TCHAR c, int start_pos) {

2296 ASSERT (str, (L""));

2297 int n = 0;

2298 const TCHAR *s = str;

2299 while (*s) {

2300 if (n++ >= start_pos && *s == c)

2301 return s - str;

2302 ++s;

2303 }

2304

2305 return -1;

2306 }

2307

2308 bool String_Contains(const TCHAR s1, const TCHAR s2) {

2309 ASSERT(s2, (L""));

2310 ASSERT(s1, (L""));

2311

2312 return -1 != String_FindString(s1, s2);

2313 }

2314

2315 void String_ReplaceChar(TCHAR *str, TCHAR old_char, TCHAR new_char) {

2316 ASSERT (str, (L""));

2317 while (*str) {

2318 if (*str == old_char)

2319 *str = new_char;

2320

2321 ++str;

2322 }

2323 }

2324

2325 void String_ReplaceChar(CString & str, TCHAR old_char, TCHAR new_char) {

2326 String_ReplaceChar (str.GetBuffer(), old_char, new_char);

2327 str.ReleaseBuffer();

2328 }

2329

2330 int ReplaceCString (CString & src, const TCHAR from, const TCHAR to) {

2331 ASSERT(to, (L""));

2332 ASSERT(from, (L""));

2333

2334 return ReplaceCString(src, from, lstrlen(from), to, lstrlen(to), kRepMax);

2335 }

2336

2337 // A special version of the replace function which takes advantage of CString pr operties

2338 // to make it much faster when the string grows

2339 // 1) It will resize the string in place if possible. Even if it has to 'grow' t he string

2340 // 2) It will cutoff after a maximum number of matches

2341 // 3) It expects sizing data to be passed to it

2342 int ReplaceCString (CString & src, const TCHAR *from, unsigned int from_len,

2343 const TCHAR *to, unsigned int to_len,

2344 unsigned int max_matches) {

2345 ASSERT (from, (L""));

2346 ASSERT (to, (L""));

2347 ASSERT (from[0] != '\0', (L""));

2348 int i = 0, j = 0;

2349 unsigned int matches = 0;

2350

2351 // Keep track of the matches, it's easier than recalculating them

2352 unsigned int match_pos_stack[kExpectedMaxReplaceMatches];

2353

2354 // We might need to dynamically allocate space for the matches

2355 bool dynamic_allocate = false;

2356 unsigned int * match_pos = (unsigned int*)match_pos_stack;

2357 unsigned int max_match_size = kExpectedMaxReplaceMatches;

2358

2359 // Is the string getting bigger?

2360 bool longer = to_len > from_len;

2361

2362 // don't compute the lengths unless we know we need to

2363 int src_len = src.GetLength();

2364 int cur_len = src_len;

2365

2366 // Trick: We temporarily add 1 extra character to the string. The first char f rom the from

2367 // string. This way we can avoid searching for NULL, since we are guaranteed t o find it

2368 TCHAR * buffer = src.GetBufferSetLength(src_len+1);

2369 const TCHAR from_0 = from[0];

2370 buffer[src_len] = from[0];

2371

2372 while (i < cur_len) {

2373 // If we have too many matches, then re-allocate to a dynamic buffer that is

2374 // twice as big as the one we are currently using

2375 if (longer && (matches == max_match_size)) {

2376 // Double the buffer size, and copy it over

2377 unsigned int * temp = new unsigned int[max_match_size * 2];

2378 memcpy(temp, match_pos, matches * sizeof(unsigned int));

2379 if (dynamic_allocate)

2380 delete [] match_pos; // lint !e424 Inappropriate deallocation

2381 match_pos = temp;

2382

2383 max_match_size *= 2;

2384 dynamic_allocate = true;

2385 }

2386

2387 // If we have the maximum number of matches already, then stop

2388 if (matches >= max_matches) {

2389 break;

2390 }

2391

2392 // For each potential match

2393 // Note: oddly enough, this is the most expensive line in the function under normal usage. So I am optimizing the heck out of it

2394 TCHAR * buf_ptr = buffer + i;

2395 while (*buf_ptr != from_0) { ++buf_ptr; }

2396 i = buf_ptr - buffer;

2397

2398 // We're done!

2399 if (i >= cur_len)

2400 break;

2401

2402 // buffer is not NULL terminated, we replaced the NULL above

2403 while (i < cur_len && buffer[i] && buffer[i] == from[j]) {

2404 ++i; ++j;

2405 if (from[j] == '\0') { // found match

2406

2407 if (!longer) { // modify in place

2408

2409 memcpy ((byte )(buffer+i) - (sizeof (TCHAR) from_len), (byte )to, sizeof (TCHAR) to_len);

2410 // if there are often a lot of replacements, it would be faster to cre ate a new string instead

2411 // of using memmove

2412

2413 // TODO(omaha): - memmove will cause n^2 behavior in strings with mult iple matches since it will be moved many times...

2414 if (to_len < from_len) { memmove ((byte )(buffer+i) - (sizeof (TCHAR) (from_len - to_len)),

2415 (byte )(buffer+i), (src_len - i + 1) sizeof (TCHAR)); }

2416

2417 i -= (from_len - to_len);

2418 cur_len -= (from_len - to_len);

2419 }

2420 else

2421 match_pos[matches] = i - from_len;

2422

2423 ++matches;

2424

2425 break;

2426 }

2427 }

2428

2429 j = 0;

2430 }

2431

2432 if (to_len <= from_len)

2433 src_len -= matches * (from_len - to_len);

2434

2435 // if the new string is longer we do another pass now that we know how long th e new string needs to be

2436 if (matches && to_len > from_len) {

2437 src.ReleaseBuffer(src_len);

2438

2439 int new_len = src_len + matches * (to_len - from_len);

2440 buffer = src.GetBufferSetLength(new_len);

2441

2442 // It's easier to assemble it backwards...

2443 int temp_end = new_len;

2444 for(i = matches-1; i >= 0; --i) {

2445 // Figure out where the trailing portion isthe trailing portion

2446 int len = src_len - match_pos[i] - from_len;

2447 int start = match_pos[i] + from_len;

2448 int dest = temp_end - len;

2449 memmove(buffer+dest, buffer+start, (len) * sizeof(TCHAR));

2450

2451 // copy the new item

2452 memcpy(buffer + dest - to_len, to, to_len * sizeof(TCHAR));

2453

2454 // Update the pointers

2455 temp_end = dest - to_len;

2456 src_len = match_pos[i];

2457

2458 }

2459 src_len = new_len;

2460 }

2461

2462 src.ReleaseBuffer(src_len);

2463 if (dynamic_allocate)

2464 delete [] match_pos; // lint !e673 Possibly inappropriate deallocation

2465

2466 return matches;

2467 }

2468

2469 /*

2470 The following 2 functions will do replacement on TCHAR* directly. They is cur rently unused.

2471 Feel free to put it back if you need to.

2472 */

2473 int ReplaceString (TCHAR src, const TCHAR from, const TCHAR to, TCHAR out, int out_len) {

2474 ASSERT(out_len, (L""));

2475 ASSERT(out, (L""));

2476 ASSERT(to, (L""));

2477 ASSERT(from, (L""));

2478 ASSERT(src, (L""));

2479

2480 bool created_new_string;

2481 int matches = ReplaceStringMaybeInPlace (src, from, to, out, out_len, &created _new_string);

2482 if (!created_new_string) {

2483 out = new TCHAR [(out_len)+1];

2484 if (!(out)) { out = src; return 0; }

2485 _tcscpy_s(out, out_len + 1, src);

2486 }

2487

2488 return matches;

2489 }

2490

2491 int ReplaceStringMaybeInPlace (TCHAR src, const TCHAR from, const TCHAR to, T CHAR out, int out_len, bool *created_new_string) {

2492 ASSERT (created_new_string, (L""));

2493 ASSERT (out_len, (L""));

2494 ASSERT (src, (L""));

2495 ASSERT (from, (L""));

2496 ASSERT (to, (L""));

2497 ASSERT (out, (L""));

2498 ASSERT (from[0] != '\0', (L""));

2499 int i = 0, j = 0;

2500 int matches = 0;

2501

2502 // don't compute the lengths unless we know we need to

2503 int from_len = -1, to_len = -1, src_len = -1;

2504

2505 *created_new_string = false;

2506 *out = src;

2507

2508 while (src[i]) {

2509 while (src[i] && src[i] != from[0]) { i++; }

2510 while (src[i] && src[i] == from[j]) {

2511 i++; j++;

2512 if (from[j] == '\0') { // found match

2513 if (from_len == -1) { // compute lengths if not known

2514 from_len = lstrlen (from);

2515 to_len = lstrlen (to);

2516 src_len = lstrlen (src);

2517 }

2518

2519 matches++;

2520

2521 if (to_len <= from_len) { // modify in place

2522 memcpy ((byte )(src+i) - (sizeof (TCHAR) from_len), (byte )to, siz eof (TCHAR) to_len);

2523 // if there are often a lot of replacements, it would be faster to cre ate a new string instead

2524 // of using memmove

2525 if (to_len < from_len) { memmove ((byte )(src+i) - (sizeof (TCHAR) (from_len - to_len)),

2526 (byte )(src+i), (src_len - i + 1) sizeof (TCHAR)); }

2527 i -= (from_len - to_len);

2528 }

2529

2530 break;

2531 }

2532 }

2533

2534 j = 0;

2535 }

2536

2537 *out_len = i;

2538

2539 // if the new string is longer we do another pass now that we know how long th e new string needs to be

2540 if (matches && to_len > from_len) {

2541 ASSERT (src_len == i, (L""));

2542 int new_len = src_len + matches * (to_len - from_len);

2543 *out = new TCHAR [new_len+1];

2544 if (!(out)) { out = src; *out_len = lstrlen (src); return 0; }

2545 *created_new_string = true;

2546 i = 0; j = 0; int k = 0;

2547

2548 while (src[i]) {

2549 while (src[i] && src[i] != from[0]) {

2550 (*out)[k++] = src[i++];

2551 }

2552 while (src[i] && src[i] == from[j]) {

2553 (*out)[k++] = src[i++];

2554 j++;

2555

2556 if (from[j] == '\0') { // found match

2557 k -= from_len;

2558 ASSERT (k >= 0, (L""));

2559 memcpy ((byte )((out)+k), (byte )to, sizeof (TCHAR) to_le n);

2560 k += to_len;

2561 break;

2562 }

2563 }

2564

2565 j = 0;

2566 }

2567

2568 (*out)[k] = '\0';

2569 ASSERT (k == new_len, (L""));

2570 *out_len = new_len;

2571 }

2572

2573 return matches;

2574 }

2575

2576 /****************************************************************************

2577 * wcstol, wcstoul(nptr,endptr,ibase) - Convert ascii string to long un/signed in t.

2578 *

2579 * modified from:

2580 *

2581 * wcstol.c - Contains C runtimes wcstol and wcstoul

2582 *

2583 * Copyright (c) Microsoft Corporation. All rights reserved.

2584 *

2585 * Purpose:

2586 * Convert an ascii string to a long 32-bit value. The base

2587 * used for the caculations is supplied by the caller. The base

2588 * must be in the range 0, 2-36. If a base of 0 is supplied, the

2589 * ascii string must be examined to determine the base of the

2590 * number:

2591 * (a) First char = '0', second char = 'x' or 'X',

2592 * use base 16.

2593 * (b) First char = '0', use base 8

2594 * (c) First char in range '1' - '9', use base 10.

2595 *

2596 * If the 'endptr' value is non-NULL, then wcstol/wcstoul places

2597 * a pointer to the terminating character in this value.

2598 * See ANSI standard for details

2599 *

2600 *Entry:

2601 * nptr == NEAR/FAR pointer to the start of string.

2602 * endptr == NEAR/FAR pointer to the end of the string.

2603 * ibase == integer base to use for the calculations.

2604 *

2605 * string format: [whitespace] [sign] [0] [x] [digits/letters]

2606 *

2607 *Exit:

2608 * Good return:

2609 * result

2610 *

2611 * Overflow return:

2612 * wcstol -- LONG_MAX or LONG_MIN

2613 * wcstoul -- ULONG_MAX

2614 * wcstol/wcstoul -- errno == ERANGE

2615 *

2616 * No digits or bad base return:

2617 * 0

2618 * endptr = nptr*

2619 *

2620 *Exceptions:

2621 * None.

2622 *

2623 *******************************************************************************/

2624

2625 // flag values */

2626 #define kFlUnsigned (1) // wcstoul called */

2627 #define kFlNeg (2) // negative sign found */

2628 #define kFlOverflow (4) // overflow occured */

2629 #define kFlReaddigit (8) // we've read at least one correct digit */

2630

2631 static unsigned long __cdecl wcstoxl (const wchar_t nptr, wchar_t *endptr, int ibase, int flags) {

2632 ASSERT(nptr, (L""));

2633

2634 const wchar_t *p;

2635 wchar_t c;

2636 unsigned long number;

2637 unsigned digval;

2638 unsigned long maxval;

2639 // #ifdef _MT

2640 // pthreadlocinfo ptloci = _getptd()->ptlocinfo;

2641

2642 // if ( ptloci != __ptlocinfo )

2643 // ptloci = __updatetlocinfo();

2644 // #endif // _MT */

2645

2646 p = nptr; // p is our scanning pointer */

2647 number = 0; // start with zero */

2648

2649 c = p++; // read char /

2650

2651 // #ifdef _MT

2652 // while ( __iswspace_mt(ptloci, c) )

2653 // #else // _MT */

2654 while (c == ' ')

2655 // while ( iswspace(c) )

2656 // #endif // _MT */

2657 c = p++; // skip whitespace /

2658

2659 if (c == '-') {

2660 flags \|= kFlNeg; // remember minus sign */

2661 c = *p++;

2662 }

2663 else if (c == '+')

2664 c = p++; // skip sign /

2665

2666 if (ibase < 0 \|\| ibase == 1 \|\| ibase > 36) {

2667 // bad base! */

2668 if (endptr)

2669 // store beginning of string in endptr */

2670 endptr = const_cast<wchar_t >(nptr);

2671 return 0L; // return 0 */

2672 }

2673 else if (ibase == 0) {

2674 // determine base free-lance, based on first two chars of

2675 // string */

2676 if (String_CharToDigit(c) != 0)

2677 ibase = 10;

2678 else if (p == L'x' \|\| p == L'X')

2679 ibase = 16;

2680 else

2681 ibase = 8;

2682 }

2683

2684 if (ibase == 16) {

2685 // we might have 0x in front of number; remove if there */

2686 if (String_CharToDigit(c) == 0 && (p == L'x' \|\| p == L'X')) {

2687 ++p;

2688 c = p++; // advance past prefix /

2689 }

2690 }

2691

2692 // if our number exceeds this, we will overflow on multiply */

2693 maxval = ULONG_MAX / ibase;

2694

2695 for (;;) { // exit in middle of loop */

2696

2697 // convert c to value */

2698 if ( (digval = String_CharToDigit(c)) != (unsigned) -1 )

2699 ;

2700 else if (c >= 'A' && c <= 'F') { digval = c - 'A' + 10; }

2701 else if (c >= 'a' && c <= 'f') { digval = c - 'a' + 10; }

2702 // else if ( __ascii_iswalpha(c))

2703 // digval = __ascii_towupper(c) - L'A' + 10;

2704 else

2705 break;

2706

2707 if (digval >= (unsigned)ibase)

2708 break; // exit loop if bad digit found */

2709

2710 // record the fact we have read one digit */

2711 flags \|= kFlReaddigit;

2712

2713 // we now need to compute number = number * base + digval,

2714 // but we need to know if overflow occured. This requires

2715 // a tricky pre-check. */

2716

2717 if (number < maxval \|\| (number == maxval &&

2718 (unsigned long)digval <= ULONG_MAX % ibase)) {

2719 // we won't overflow, go ahead and multiply */

2720 number = number * ibase + digval;

2721 }

2722 else {

2723 // we would have overflowed -- set the overflow flag */

2724 flags \|= kFlOverflow;

2725 }

2726

2727 c = p++; // read next digit /

2728 }

2729

2730 --p; // point to place that stopped scan */

2731

2732 if (!(flags & kFlReaddigit)) {

2733 // no number there; return 0 and point to beginning of string */

2734 if (endptr)

2735 // store beginning of string in endptr later on */

2736 p = nptr;

2737 number = 0L; // return 0 */

2738 }

2739 // lint -save -e648 -e650 Overflow in -LONG_MIN

2740 #pragma warning(push)

2741 // C4287 : unsigned/negative constant mismatch.

2742 // The offending expression is number > -LONG_MIN. -LONG_MIN overflows and

2743 // technically -LONG_MIN == LONG_MIN == 0x80000000. It should actually

2744 // result in a compiler warning, such as C4307: integral constant overflow.

2745 // Anyway, in the expression (number > -LONG_MIN) the right operand is converted

2746 // to unsigned long, so the expression is actually evaluated as

2747 // number > 0x80000000UL. The code is probably correct but subtle, to say the

2748 // least.

2749 #pragma warning(disable : 4287)

2750 else if ( (flags & kFlOverflow) \|\|

2751 ( !(flags & kFlUnsigned) &&

2752 ( ( (flags & kFlNeg) && (number > -LONG_MIN) ) \|\|

2753 ( !(flags & kFlNeg) && (number > LONG_MAX) ) ) ) )

2754 {

2755 // overflow or signed overflow occurred */

2756 // errno = ERANGE;

2757 if ( flags & kFlUnsigned )

2758 number = ULONG_MAX;

2759 else if ( flags & kFlNeg )

2760 // lint -e{648, 650} Overflow in -LONG_MIN

2761 number = (unsigned long)(-LONG_MIN);

2762 else

2763 number = LONG_MAX;

2764 }

2765 #pragma warning(pop)

2766 // lint -restore

2767

2768 if (endptr != NULL)

2769 // store pointer to char that stopped the scan */

2770 endptr = const_cast<wchar_t >(p);

2771

2772 if (flags & kFlNeg)

2773 // negate result if there was a neg sign */

2774 number = (unsigned long)(-(long)number);

2775

2776 return number; // done. */

2777 }

2778

2779 long __cdecl Wcstol (const wchar_t nptr, wchar_t *endptr, int ibase) {

2780 ASSERT(endptr, (L""));

2781 ASSERT(nptr, (L""));

2782

2783 return (long) wcstoxl(nptr, endptr, ibase, 0);

2784 }

2785

2786 unsigned long __cdecl Wcstoul (const wchar_t nptr, wchar_t *endptr, int ibase) {

2787 // endptr may be NULL

2788 ASSERT(nptr, (L""));

2789

2790 return wcstoxl(nptr, endptr, ibase, kFlUnsigned);

2791 }

2792

2793 // Functions on arrays of strings

2794

2795 // Returns true iff s is in the array strings (case-insensitive compare)

2796 bool String_MemberOf(const TCHAR* const* strings, const TCHAR* s) {

2797 ASSERT(s, (L""));

2798 // strings may be NULL

2799

2800 const int s_length = lstrlen(s);

2801 if (strings == NULL)

2802 return false;

2803 for (; *strings != NULL; strings++) {

2804 if (0 == String_StrNCmp(*strings, s, s_length, true)) {

2805 return true; // Found equal string

2806 }

2807 }

2808 return false;

2809 }

2810

2811 // Returns index of s in the array of strings (or -1 for missing) (case-insensit ive compare)

2812 int String_IndexOf(const TCHAR* const* strings, const TCHAR* s) {

2813 ASSERT(s, (L""));

2814 // strings may be NULL

2815

2816 const int s_length = lstrlen(s);

2817 if (strings == NULL)

2818 return -1;

2819 for (int i = 0; *strings != NULL; i++, strings++) {

2820 if (0 == String_StrNCmp(*strings, s, s_length, true)) {

2821 return i; // Found equal string

2822 }

2823 }

2824 return -1;

2825 }

2826

2827 // The internal format is a int64.

2828 time64 StringToTime(const CString & time) {

2829 return static_cast<time64>(String_StringToInt64(time));

2830 }

2831

2832 // See above comment from StringToTime.

2833 // Just show it as a INT64 for now

2834 // NOTE: this will truncating it to INT64, which may lop off some times in the f uture

2835 CString TimeToString(const time64 & time) {

2836 return String_Int64ToString(static_cast<int64>(time), 10);

2837 }

2838

2839 const TCHAR FindStringASpaceStringB (const TCHAR s, const TCHAR a, const TCHA R b) {

2840 ASSERT(s, (L""));

2841 ASSERT(a, (L""));

2842 ASSERT(b, (L""));

2843

2844 const TCHAR *search_from = s;

2845 const TCHAR *pos;

2846 while (*search_from && (pos = stristrW (search_from, a)) != NULL) {

2847 const TCHAR *start = pos;

2848 pos += lstrlen(a);

2849 search_from = pos;

2850 while (pos == ' ' \|\| pos == '\t') pos++;

2851 if (!String_StrNCmp (pos, b, lstrlen(b), true)) return start;

2852 }

2853

2854 return 0;

2855 }

2856

2857 bool IsAlphaA (const char c) {

2858 return ((c >= 'a' && c <= 'z') \|\| (c >= 'A' && c <= 'Z'));

2859 }

2860

2861 bool IsDigitA (const char c) {

2862 return (c >= '0' && c <= '9');

2863 }

2864

2865 void SafeStrCat (TCHAR dest, const TCHAR src, int dest_buffer_len) {

2866 _tcscat_s(dest, dest_buffer_len, src);

2867 }

2868

2869 // extracts next float in a string

2870 // skips any non-digit characters

2871 // return position after end of float

2872 const TCHAR ExtractNextDouble (const TCHAR s, double *f) {

2873 ASSERT (f, (L""));

2874 ASSERT (s, (L""));

2875

2876 CString num;

2877 while (s && !String_IsDigit (s)) s++;

2878 while (s && (s == '.' \|\| String_IsDigit (s))) { num += s; s++; }

2879 ASSERT (num.GetLength(), (L""));

2880 *f = String_StringToDouble (num);

2881 return s;

2882 }

2883

2884 TCHAR String_PathFindExtension(const TCHAR path) {

2885 ASSERT(path, (L""));

2886

2887 // Documentation says PathFindExtension string must be of max length

2888 // MAX_PATH but a trusted tester hit the ASSERT and we don't really

2889 // need it here, so commented out. We can't address where it is

2890 // called because it's called from ATL code.

2891 // ASSERT(lstrlen(path)<=MAX_PATH, (L""));

2892

2893 // point to terminating NULL

2894 const TCHAR *ret = path + lstrlen(path);

2895 const TCHAR *pos = ret;

2896

2897 while (--pos >= path) {

2898 if (*pos == '.')

2899 return const_cast<TCHAR *>(pos);

2900 }

2901

2902 return const_cast<TCHAR *>(ret);

2903 }

2904

2905 char String_ToLowerCharAnsi(char c) {

2906 if (c >= 'A' && c <= 'Z') return (c + ('a' - 'A'));

2907 return c;

2908 }

2909

2910 int String_ToLowerChar(int c) {

2911 // If it's < 128, then convert is ourself, which is far cheaper than the syste m conversion

2912 if (c < 128)

2913 return String_ToLowerCharAnsi(static_cast<char>(c));

2914

2915 return Char_ToLower(static_cast<TCHAR>(c));

2916 }

2917

2918

2919 bool String_PathRemoveFileSpec(TCHAR *path) {

2920 ASSERT (path, (L""));

2921

2922 int len, pos;

2923 len = pos = lstrlen (path);

2924

2925 // You might think that the SHLWAPI API does not change "c:\windows" -> "c:\"

2926 // when c:\windows is a directory, but it does.

2927

2928 // If we don't want to match this weird API we can use the following to check

2929 // for directories:

2930

2931 // Check if we are already a directory.

2932 WIN32_FILE_ATTRIBUTE_DATA attrs;

2933 // Failure (if file does not exist) is OK.

2934 BOOL success = GetFileAttributesEx(path, GetFileExInfoStandard, &attrs);

2935 UTIL_LOG(L4, (_T("[String_PathRemoveFileSpec][path %s][success %d][dir %d]"),

2936 path,

2937 success,

2938 attrs.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY));

2939 if (success && (attrs.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) {

2940 // Remove trailing backslash, if any.

2941 if (path[pos-1] == '\\')

2942 path[pos-1] = '\0';

2943 return 1;

2944 }

2945

2946 // Find last backslash.

2947 while (pos && path[pos] != '\\') pos--;

2948 if (!pos && path[pos] != '\\') return 0;

2949

2950 ASSERT (pos < len, (L""));

2951

2952 // The documentation says it removes backslash but it doesn't for c:\.

2953 if (!pos \|\| path[pos-1] == ':' \|\| (pos == 1 && path[0] == '\\'))

2954 // Keep the backslash in this case.

2955 path[pos+1] = '\0';

2956 else

2957 path[pos] = '\0';

2958

2959 return 1;

2960 }

2961

2962 void String_EndWithChar(TCHAR *str, TCHAR c) {

2963 ASSERT (str, (L""));

2964 int len = lstrlen(str);

2965 if (len == 0 \|\| str[len - 1] != c) {

2966 str[len] = c;

2967 str[len + 1] = 0;

2968 }

2969 }

2970

2971 bool StartsWithBOM(const TCHAR* string) {

2972 ASSERT(string, (L""));

2973 wchar_t c = string[0];

2974 if (c == 0xFFFE \|\| c == 0xFEFF)

2975 return true;

2976 else

2977 return false;

2978 }

2979

2980 const TCHAR* StringAfterBOM(const TCHAR* string) {

2981 ASSERT(string, (L""));

2982 return &string[StartsWithBOM(string) ? 1 : 0];

2983 }

2984

2985 bool String_StringToDecimalIntChecked(const TCHAR* str, int* value) {

2986 ASSERT1(str);

2987 ASSERT1(value);

2988

2989 if (_set_errno(0)) {

2990 return false;

2991 }

2992

2993 TCHAR* end_ptr = NULL;

2994 *value = _tcstol(str, &end_ptr, 10);

2995 ASSERT1(end_ptr);

2996

2997 if (errno) {

2998 ASSERT1(ERANGE == errno);

2999 // Overflow or underflow.

3000 return false;

3001 } else if (*value == 0) {

3002 // The value returned could be an error code. tcsltol returns

3003 // zero when it cannot convert the string. However we need to

3004 // distinguish a real zero. Thus check to see if end_ptr is not the start

3005 // of the string (str is not an empty string) and is pointing to a '\0'.

3006 // If not, we have an error.

3007 if ((str == end_ptr) \|\| (*end_ptr != '\0')) {

3008 return false;

3009 }

3010 } else if (*end_ptr != '\0') {

3011 // The end_ptr is pointing at a character that is

3012 // not the end of the string. Only part of the string could be converted.

3013 return false;

3014 }

3015

3016 return true;

3017 }

3018

3019 bool CLSIDToCString(const GUID& guid, CString* str) {

3020 ASSERT(str, (L""));

3021

3022 LPOLESTR string_guid = NULL;

3023 if (::StringFromCLSID(guid, &string_guid) != S_OK) {

3024 return false;

3025 }

3026 *str = string_guid;

3027 ::CoTaskMemFree(string_guid);

3028

3029 return true;

3030 }

3031

3032 HRESULT String_StringToBool(const TCHAR* str, bool* value) {

3033 ASSERT1(str);

3034 ASSERT1(value);

3035

3036 // This method now performs a case-insentitive

3037 // culture aware compare. We should however be ok as we are only comparing

3038 // latin characters.

3039 if (_tcsicmp(kFalse, str) == 0) {

3040 *value = false;

3041 } else if (_tcsicmp(kTrue, str) == 0) {

3042 *value = true;

3043 } else {

3044 // we found another string. should error out.

3045 return E_FAIL;

3046 }

3047 return S_OK;

3048 }

3049

3050 HRESULT String_BoolToString(bool value, CString* string) {

3051 ASSERT1(string);

3052 *string = value ? kTrue : kFalse;

3053 return S_OK;

3054 }

3055

3056 CString String_ReplaceIgnoreCase(const CString& string,

3057 const CString& token,

3058 const CString& replacement) {

3059 int token_length = token.GetLength();

3060 if (!token_length) {

3061 return string;

3062 }

3063

3064 CString string_lowercase(string);

3065 CString token_lowercase(token);

3066 string_lowercase.MakeLower();

3067 token_lowercase.MakeLower();

3068

3069 CString output(string);

3070 int replacement_length = replacement.GetLength();

3071

3072 int index = 0;

3073 int output_index = 0;

3074

3075 for (int new_index = 0;

3076 (new_index = string_lowercase.Find(token_lowercase, index)) != -1;

3077 index = new_index + token_length) {

3078 output_index += new_index - index;

3079 output.Delete(output_index, token_length);

3080 output.Insert(output_index, replacement);

3081 output_index += replacement_length;

3082 }

3083

3084 return output;

3085 }

3086

3087 // Escape and unescape strings (shlwapi-based implementation).

3088 // The intended usage for these APIs is escaping strings to make up

3089 // URLs, for example building query strings.

3090 //

3091 // Pass false to the flag segment_only to escape the url. This will not

3092 // cause the conversion of the # (%23), ? (%3F), and / (%2F) characters.

3093

3094 // Characters that must be encoded include any characters that have no

3095 // corresponding graphic character in the US-ASCII coded character

3096 // set (hexadecimal 80-FF, which are not used in the US-ASCII coded character

3097 // set, and hexadecimal 00-1F and 7F, which are control characters),

3098 // blank spaces, "%" (which is used to encode other characters),

3099 // and unsafe characters (<, >, ", #, {, }, \|, \, ^, ~, [, ], and ').

3100 //

3101 // The input and output strings can't be longer than INTERNET_MAX_URL_LENGTH

3102

3103 HRESULT StringEscape(const CString& str_in,

3104 bool segment_only,

3105 CString* str_out) {

3106 ASSERT1(str_out);

3107 ASSERT1(str_in.GetLength() < INTERNET_MAX_URL_LENGTH);

3108

3109 DWORD buf_len = INTERNET_MAX_URL_LENGTH + 1;

3110 HRESULT hr = ::UrlEscape(str_in, str_out->GetBufferSetLength(buf_len), &buf_le n,

3111 segment_only ? URL_ESCAPE_PERCENT \| URL_ESCAPE_SEGMENT_ONLY : URL_ESCAPE_PER CENT);

3112 if (SUCCEEDED(hr)) {

3113 str_out->ReleaseBuffer();

3114 ASSERT1(buf_len <= INTERNET_MAX_URL_LENGTH);

3115 }

3116 return hr;

3117 }

3118

3119 HRESULT StringUnescape(const CString& str_in, CString* str_out) {

3120 ASSERT1(str_out);

3121 ASSERT1(str_in.GetLength() < INTERNET_MAX_URL_LENGTH);

3122

3123 DWORD buf_len = INTERNET_MAX_URL_LENGTH + 1;

3124 HRESULT hr = ::UrlUnescape(const_cast<TCHAR*>(str_in.GetString()),

3125 str_out->GetBufferSetLength(buf_len), &buf_len, 0);

3126 if (SUCCEEDED(hr)) {

3127 str_out->ReleaseBuffer(buf_len + 1);

3128 ASSERT1(buf_len <= INTERNET_MAX_URL_LENGTH);

3129 }

3130 return hr;

3131 }

3132

3133 bool String_StringToTristate(const TCHAR* str, Tristate* value) {

3134 ASSERT1(str);

3135 ASSERT1(value);

3136

3137 int numerical_value = 0;

3138 if (!String_StringToDecimalIntChecked(str, &numerical_value)) {

3139 return false;

3140 }

3141

3142 switch (numerical_value) {

3143 case 0:

3144 *value = TRISTATE_FALSE;

3145 break;

3146 case 1:

3147 *value = TRISTATE_TRUE;

3148 break;

3149 case 2:

3150 *value = TRISTATE_NONE;

3151 break;

3152 default:

3153 return false;

3154 }

3155

3156 return true;

3157 }

3158

3159 // Extracts the name and value from a string that contains a name/value pair.

3160 bool ParseNameValuePair(const CString& token,

3161 TCHAR separator,

3162 CString* name,

3163 CString* value) {

3164 ASSERT1(name);

3165 ASSERT1(value);

3166

3167 int separator_index = token.Find(separator);

3168 if ((separator_index == -1) \|\| // Not a name-value pair.

3169 (separator_index == 0) \|\| // No name was supplied.

3170 (separator_index == (token.GetLength() - 1))) { // No value was supplied.

3171 return false;

3172 }

3173

3174 *name = token.Left(separator_index);

3175 *value = token.Right(token.GetLength() - separator_index - 1);

3176

3177 ASSERT1(token.GetLength() == name->GetLength() + value->GetLength() + 1);

3178

3179 // It's not possible for the name to contain the separator.

3180 ASSERT1(-1 == name->Find(separator));

3181 if (-1 != value->Find(separator)) {

3182 // The value contains the separator.

3183 return false;

3184 }

3185

3186 return true;

3187 }

3188

3189 bool SplitCommandLineInPlace(TCHAR *command_line,

3190 TCHAR **first_argument_parameter,

3191 TCHAR **remaining_arguments_parameter) {

3192 if (!command_line \|\|

3193 !first_argument_parameter \|\|

3194 !remaining_arguments_parameter) {

3195 return false;

3196 }

3197

3198 TCHAR end_char;

3199 TCHAR &first_argument = first_argument_parameter;

3200 TCHAR &remaining_arguments = remaining_arguments_parameter;

3201 if (_T('\"') == *command_line) {

3202 end_char = _T('\"');

3203 first_argument = remaining_arguments = command_line + 1;

3204 } else {

3205 end_char = _T(' ');

3206 first_argument = remaining_arguments = command_line;

3207 }

3208 // Search for the end of the first argument

3209 while (end_char != remaining_arguments && '\0' != remaining_arguments) {

3210 ++remaining_arguments;

3211 }

3212 if (end_char == *remaining_arguments) {

3213 *remaining_arguments = '\0';

3214 do {

3215 // Skip the spaces between the first argument and the remaining arguments.

3216 ++remaining_arguments;

3217 } while (_T(' ') == *remaining_arguments);

3218 }

3219 return true;

3220 }

3221

3222 bool ContainsOnlyAsciiChars(const CString& str) {

3223 for (int i = 0; i < str.GetLength(); ++i) {

3224 if (str[i] > 0x7F) {

3225 return false;

3226 }

3227 }

3228 return true;

3229 }

3230 CString BytesToHex(const uint8* bytes, size_t num_bytes) {

3231 CString result;

3232 if (bytes) {

3233 result.Preallocate(num_bytes * sizeof(TCHAR));

3234 static const TCHAR* const kHexChars = _T("0123456789abcdef");

3235 for (size_t i = 0; i != num_bytes; ++i) {

3236 result.AppendChar(kHexChars[(bytes[i] >> 4)]);

3237 result.AppendChar(kHexChars[(bytes[i] & 0xf)]);

3238 }

3239 }

3240 return result;

3241 }

3242

3243 CString BytesToHex(const std::vector<uint8>& bytes) {

3244 CString result;

3245 if (!bytes.empty()) {

3246 result.SetString(BytesToHex(&bytes.front(), bytes.size()));

3247 }

3248 return result;

3249 }

3250

3251 void JoinStrings(const std::vector<CString>& components,

3252 const TCHAR* delim,

3253 CString* result) {

3254 ASSERT1(result);

3255 result->Empty();

3256

3257 // Compute length so we can reserve memory.

3258 size_t length = 0;

3259 size_t delim_length = delim ? _tcslen(delim) : 0;

3260 for (size_t i = 0; i != components.size(); ++i) {

3261 if (i != 0) {

3262 length += delim_length;

3263 }

3264 length += components[i].GetLength();

3265 }

3266

3267 result->Preallocate(length);

3268

3269 for (size_t i = 0; i != components.size(); ++i) {

3270 if (i != 0 && delim) {

3271 result->Append(delim, delim_length);

3272 }

3273 result->Append(components[i]);

3274 }

3275 }

3276

3277 void JoinStringsInArray(const TCHAR* components[],

3278 int num_components,

3279 const TCHAR* delim,

3280 CString* result) {

3281 ASSERT1(result);

3282 result->Empty();

3283

3284 for (int i = 0; i != num_components; ++i) {

3285 if (i != 0 && delim) {

3286 result->Append(delim);

3287 }

3288 if (components[i]) {

3289 result->Append(components[i]);

3290 }

3291 }

3292 }

3293

3294 CString FormatResourceMessage(uint32 resource_id, ...) {

3295 CString format;

3296 const bool is_loaded = !!format.LoadString(resource_id);

3297

3298 if (!is_loaded) {

3299 return CString();

3300 }

3301

3302 va_list arg_list;

3303 va_start(arg_list, resource_id);

3304

3305 CString formatted;

3306 formatted.FormatMessageV(format, &arg_list);

3307

3308 va_end(arg_list);

3309

3310 return formatted;

3311 }

3312

3313 CString FormatErrorCode(DWORD error_code) {

3314 CString error_code_string;

3315 if (FAILED(error_code)) {

3316 error_code_string.Format(_T("0x%08x"), error_code);

3317 } else {

3318 error_code_string.Format(_T("%u"), error_code);

3319 }

3320 return error_code_string;

3321 }

3322

3323 HRESULT WideStringToUtf8UrlEncodedString(const CString& str, CString* out) {

3324 ASSERT1(out);

3325

3326 out->Empty();

3327 if (str.IsEmpty()) {

3328 return S_OK;

3329 }

3330

3331 // Utf8 encode the Utf16 string first. Next urlencode it.

3332 CStringA utf8str = WideToUtf8(str);

3333 ASSERT1(!utf8str.IsEmpty());

3334 DWORD buf_len = INTERNET_MAX_URL_LENGTH;

3335 CStringA escaped_utf8_name;

3336 HRESULT hr = ::UrlEscapeA(utf8str,

3337 CStrBufA(escaped_utf8_name, buf_len),

3338 &buf_len,

3339 0);

3340 ASSERT1(buf_len <= INTERNET_MAX_URL_LENGTH);

3341 ASSERT1(escaped_utf8_name.GetLength() == static_cast<int>(buf_len));

3342 if (FAILED(hr)) {

3343 UTIL_LOG(LE, (_T("[UrlEscapeA failed][0x%08x]"), hr));

3344 return hr;

3345 }

3346

3347 *out = CString(escaped_utf8_name);

3348 return S_OK;

3349 }

3350

3351 HRESULT Utf8UrlEncodedStringToWideString(const CString& str, CString* out) {

3352 ASSERT1(out);

3353

3354 out->Empty();

3355 if (str.IsEmpty()) {

3356 return S_OK;

3357 }

3358

3359 // The value is a utf8 encoded url escaped string that is stored as a

3360 // unicode string. Because of this, it should contain only ascii chars.

3361 if (!ContainsOnlyAsciiChars(str)) {

3362 UTIL_LOG(LE, (_T("[String contains non ascii chars]")));

3363 return E_INVALIDARG;

3364 }

3365

3366 CStringA escaped_utf8_val = WideToAnsiDirect(str);

3367 DWORD buf_len = INTERNET_MAX_URL_LENGTH;

3368 CStringA unescaped_val;

3369 HRESULT hr = ::UrlUnescapeA(const_cast<char*>(escaped_utf8_val.GetString()),

3370 CStrBufA(unescaped_val, buf_len),

3371 &buf_len,

3372 0);

3373 ASSERT1(unescaped_val.GetLength() == static_cast<int>(buf_len));

3374 if (FAILED(hr)) {

3375 UTIL_LOG(LE, (_T("[UrlUnescapeA failed][0x%08x]"), hr));

3376 return hr;

3377 }

3378 ASSERT1(buf_len == static_cast<DWORD>(unescaped_val.GetLength()));

3379 ASSERT1(buf_len <= INTERNET_MAX_URL_LENGTH);

3380 CString app_name = Utf8ToWideChar(unescaped_val,

3381 unescaped_val.GetLength());

3382 if (app_name.IsEmpty()) {

3383 return E_INVALIDARG;

3384 }

3385

3386 *out = app_name;

3387 return S_OK;

3388 }

3389

3390 } // namespace omaha

3391

OLD	NEW

« no previous file with comments | « base/string.h ('k') | base/string_unittest.cc » ('j') | no next file with comments »