Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include <limits> | |
| 6 | |
| 7 #include "base/debug/format.h" | |
| 8 | |
| 9 | |
| 10 namespace base { | |
| 11 namespace debug { | |
| 12 | |
| 13 // The code in this file is extremely careful to be async-signal-safe. | |
| 14 // | |
| 15 // Most obviously, we avoid calling any code that could dynamically allocate | |
| 16 // memory. Doing so would almost certainly result in bugs and dead-locks. | |
| 17 // We also avoid calling any other STL functions that could have unintended | |
| 18 // side-effects involving memory allocation or access to other shared | |
| 19 // resources. | |
| 20 // | |
| 21 // But on top of that, we also avoid calling other library functions, as many | |
| 22 // of them have the side-effect of calling getenv() (in order to deal with | |
| 23 // localization) or accessing errno. The latter sounds benign, but there are | |
| 24 // several execution contexts where it isn't even possible to safely read let | |
| 25 // alone write errno. | |
| 26 // | |
| 27 // The stated design goal of the Format() function is that it can be called | |
| 28 // from any context that can safely call C or C++ code (i.e. anything that | |
| 29 // doesn't require assembly code). | |
| 30 // | |
| 31 // For a brief overview of some but not all of the issues with async-signal- | |
| 32 // safety, refer to: | |
| 33 // http://pubs.opengroup.org/onlinepubs/009695399/functions/xsh_chap02_04.html | |
| 34 | |
| 35 namespace { | |
| 36 | |
| 37 inline bool IncrementCount(size_t* count, size_t inc = 1) { | |
| 38 if (*count > std::numeric_limits<ssize_t>::max() - inc) { | |
|
Jeffrey Yasskin
2013/07/30 23:13:53
I think this will give the wrong answer if inc>std
| |
| 39 *count = std::numeric_limits<ssize_t>::max(); | |
| 40 return false; | |
| 41 } else { | |
| 42 *count += inc; | |
| 43 return true; | |
| 44 } | |
| 45 } | |
| 46 | |
| 47 inline bool Out(char* buf, size_t sz, size_t* count, char ch) { | |
| 48 if (*count + 1 < sz) { | |
| 49 buf[*count] = ch; | |
| 50 IncrementCount(count); | |
| 51 return true; | |
| 52 } | |
| 53 IncrementCount(count); | |
| 54 return false; | |
| 55 } | |
| 56 | |
| 57 inline void Pad(char* buf, size_t sz, size_t* count, char pad, size_t padding, | |
| 58 size_t len, char** ptr) { | |
| 59 char *dst = *ptr; | |
| 60 for (; padding > len; --padding) | |
| 61 if (Out(buf, sz, count, pad)) | |
| 62 ++dst; | |
| 63 else { | |
| 64 if (--padding) | |
| 65 IncrementCount(count, padding-len); | |
| 66 break; | |
| 67 } | |
| 68 *ptr = dst; | |
| 69 } | |
| 70 | |
| 71 // POSIX doesn't define any async-signal safe function for converting | |
| 72 // an integer to ASCII. Define our own version. | |
| 73 // | |
| 74 // This also gives us the ability to make the function a little more powerful | |
| 75 // and have it deal with padding, with truncation, and with predicting the | |
| 76 // length of the untruncated output. | |
| 77 // | |
| 78 // IToASCII() converts an (optionally signed) integer to ASCII. It never | |
| 79 // writes more than "sz" bytes. Output will be truncated as needed, and a NUL | |
| 80 // character is appended, unless "sz" is zero. It returns the number of non-NUL | |
| 81 // bytes that would be output if no truncation had happened. | |
| 82 // | |
| 83 // It supports bases 2 through 16. Padding can be done with either '0' zeros | |
| 84 // or ' ' spaces. | |
| 85 size_t IToASCII(bool sign, bool upcase, int64_t i, char* buf, size_t sz, | |
| 86 int base, size_t padding, char pad) { | |
| 87 // Sanity check for the "base". | |
| 88 if (base < 2 || base > 16 || (sign && base != 10)) { | |
| 89 if (static_cast<ssize_t>(sz) >= 1) | |
| 90 buf[0] = '\000'; | |
| 91 return 0; | |
| 92 } | |
| 93 | |
| 94 // Handle negative numbers, if requested by caller. | |
| 95 size_t count = 0; | |
| 96 size_t n = 1; | |
| 97 char* start = buf; | |
| 98 int minint = 0; | |
| 99 bool needs_minus = false; | |
| 100 uint64_t num; | |
| 101 if (sign && i < 0) { | |
| 102 // If we aren't inserting padding, or if we are padding with '0' zeros, | |
| 103 // we should insert the minus character now. It makes it easier to | |
| 104 // correctly deal with truncated padded numbers. | |
| 105 // On the other hand, if we are padding with ' ' spaces, we have to | |
| 106 // delay outputting the minus character until later. | |
| 107 if (padding <= 2 || pad == '0') { | |
| 108 ++count; | |
| 109 | |
| 110 // Make sure we can write the '-' character. | |
| 111 if (++n > sz) { | |
| 112 if (sz > 0) | |
| 113 *start = '\000'; | |
| 114 } else | |
| 115 *start++ = '-'; | |
| 116 | |
| 117 // Adjust padding, since we just output one character already. | |
| 118 if (padding) | |
| 119 --padding; | |
| 120 } else | |
| 121 needs_minus = true; | |
| 122 | |
| 123 // Turn our number positive. | |
| 124 if (i == -i) { | |
|
Jeffrey Yasskin
2013/07/30 23:13:53
That's undefined behavior for signed numbers (sorr
| |
| 125 // The lowest-most negative integer needs special treatment. | |
| 126 minint = 1; | |
| 127 num = -(i + 1); | |
| 128 } else { | |
| 129 // "Normal" negative numbers are easy. | |
| 130 num = -i; | |
| 131 } | |
| 132 } else | |
| 133 num = i; | |
| 134 | |
| 135 // Loop until we have converted the entire number. Output at least one | |
| 136 // character (i.e. '0'). | |
| 137 char* ptr = start; | |
| 138 bool started = false; | |
| 139 do { | |
| 140 // Sanity check. If padding is used to fill the entire address space, | |
| 141 // don't allow more than MAXINT bytes. | |
|
Jeffrey Yasskin
2013/07/30 23:13:53
MAXINT != numeric_limits<ssize_t>::max(). SSIZE_MA
| |
| 142 if (++count == static_cast<size_t>(std::numeric_limits<ssize_t>::max())) { | |
| 143 break; | |
| 144 } | |
| 145 | |
| 146 // Make sure there is still enough space left in our output buffer. | |
| 147 if (n == sz) { | |
| 148 if (ptr > start) { | |
| 149 // It is rare that we need to output a partial number. But if asked | |
| 150 // to do so, we will still make sure we output the correct number of | |
| 151 // leading digits. | |
| 152 // memmove(start, start+1, --ptr - start) | |
|
Jeffrey Yasskin
2013/07/30 23:13:53
This is weird. Why do you need to erase the first
| |
| 153 --ptr; | |
| 154 for (char* move = start; move < ptr; ++move) | |
| 155 *move = move[1]; | |
| 156 } else | |
| 157 goto cannot_write_anything_but_nul; | |
| 158 } else | |
| 159 ++n; | |
| 160 | |
| 161 // Output the next digit and (if necessary) compensate for the lowest- | |
|
Jeffrey Yasskin
2013/07/30 23:13:53
"lowest-most negative integer" should probably be
| |
| 162 // most negative integer needing special treatment. This works because, | |
| 163 // no matter the bit width of the integer, the lowest-most decimal | |
| 164 // integer always ends in 2, 4, 6, or 8. | |
| 165 if (n <= sz) { | |
| 166 if (!num && started) | |
| 167 if (needs_minus) { | |
| 168 *ptr++ = '-'; | |
| 169 needs_minus = false; | |
| 170 } else | |
| 171 *ptr++ = pad; | |
| 172 else { | |
| 173 started = true; | |
| 174 *ptr++ = (upcase ? "0123456789ABCDEF" : "0123456789abcdef") | |
| 175 [num%base+minint]; | |
| 176 } | |
| 177 } | |
| 178 | |
| 179 cannot_write_anything_but_nul: | |
| 180 minint = 0; | |
| 181 num /= base; | |
| 182 | |
| 183 // Add padding, if requested. | |
| 184 if (padding > 0) { | |
| 185 --padding; | |
| 186 | |
| 187 // Performance optimization for when we are asked to output | |
| 188 // excessive padding, but our output buffer is limited in size. | |
| 189 // Even if we output a 128bit number in binary, we would never | |
| 190 // write more than 130 characters. So, anything beyond this limit | |
| 191 // and we can compute the result arithmetically. | |
| 192 if (count > n && count - n > 130) { | |
| 193 IncrementCount(&count, padding); | |
| 194 padding = 0; | |
| 195 } | |
| 196 } | |
| 197 } while (num || padding || needs_minus); | |
| 198 | |
| 199 // Terminate the output with a NUL character. | |
| 200 if (sz > 0) | |
| 201 *ptr = '\000'; | |
| 202 | |
| 203 // Conversion to ASCII actually resulted in the digits being in reverse | |
| 204 // order. We can't easily generate them in forward order, as we can't tell | |
| 205 // the number of characters needed until we are done converting. | |
| 206 // So, now, we reverse the string (except for the possible '-' sign). | |
| 207 while (--ptr > start) { | |
| 208 char ch = *ptr; | |
| 209 *ptr = *start; | |
| 210 *start++ = ch; | |
| 211 } | |
| 212 return count; | |
| 213 } | |
| 214 | |
| 215 } // anonymous namespace | |
| 216 | |
| 217 ssize_t internal::FormatN(char* buf, size_t sz, const char* fmt, | |
| 218 const Arg* args, const size_t max_args) { | |
| 219 // Make sure we can write at least one NUL byte. | |
| 220 if (static_cast<ssize_t>(sz) < 1) | |
| 221 return -1; | |
| 222 | |
| 223 // Iterate over format string and interpret '%' arguments as they are | |
| 224 // encountered. | |
| 225 char* ptr = buf; | |
| 226 size_t padding; | |
| 227 char pad; | |
| 228 size_t count = 0; | |
| 229 for (unsigned int cur_arg = 0; | |
| 230 *fmt && | |
| 231 count != static_cast<size_t>(std::numeric_limits<ssize_t>::max()); ) { | |
| 232 if (*fmt++ == '%') { | |
| 233 padding = 0; | |
| 234 pad = ' '; | |
| 235 char ch = *fmt++; | |
| 236 format_character_found: | |
| 237 switch (ch) { | |
| 238 case '0': case '1': case '2': case '3': case '4': | |
| 239 case '5': case '6': case '7': case '8': case '9': | |
| 240 // Found a width parameter. Convert to an integer value and store in | |
| 241 // "padding". If the leading digit is a zero, change the padding | |
| 242 // character from a space ' ' to a zero '0'. | |
| 243 pad = ch == '0' ? '0' : ' '; | |
| 244 for (;;) { | |
| 245 const size_t max_padding = std::numeric_limits<ssize_t>::max(); | |
| 246 if (padding > max_padding/10 || | |
| 247 10*padding > max_padding - (ch - '0')) { | |
| 248 // Integer overflow detected. Skip the rest of the width until | |
| 249 // we find the format character, then do the normal error handling. | |
| 250 while ((ch = *fmt++) >= '0' && ch <= '9') { | |
| 251 } | |
| 252 goto fail_to_expand; | |
| 253 } | |
| 254 padding = 10*padding + ch - '0'; | |
| 255 ch = *fmt++; | |
| 256 if (ch < '0' || ch > '9') { | |
| 257 // Reached the end of the width parameter. This is where the format | |
| 258 // character is found. | |
| 259 goto format_character_found; | |
| 260 } | |
| 261 } | |
| 262 break; | |
| 263 case 'c': { // Output an ASCII character. | |
| 264 // Check that there are arguments left to be inserted. | |
| 265 if (cur_arg >= max_args) | |
| 266 goto fail_to_expand; | |
| 267 | |
| 268 // Check that the argument has the expected type. | |
| 269 const Arg& arg = args[cur_arg++]; | |
| 270 if (arg.type_ != Arg::INT && | |
| 271 arg.type_ != Arg::UINT) | |
| 272 goto fail_to_expand; | |
| 273 | |
| 274 // Apply padding, if needed. | |
| 275 Pad(buf, sz, &count, ' ', padding, 1, &ptr); | |
| 276 | |
| 277 // Convert the argument to an ASCII character and output it. | |
| 278 char ch = static_cast<char>(arg.i_); | |
| 279 if (!ch) | |
| 280 goto end_of_output_buffer; | |
| 281 if (Out(buf, sz, &count, ch)) | |
| 282 ++ptr; | |
| 283 break; } | |
| 284 case 'd': { // Output a signed or unsigned integer-like value. | |
| 285 // Check that there are arguments left to be inserted. | |
| 286 if (cur_arg >= max_args) | |
| 287 goto fail_to_expand; | |
| 288 | |
| 289 // Check that the argument has the expected type. | |
| 290 const Arg& arg = args[cur_arg++]; | |
| 291 if (arg.type_ != Arg::INT && | |
| 292 arg.type_ != Arg::UINT) | |
| 293 goto fail_to_expand; | |
| 294 | |
| 295 // Our implementation of IToASCII() can handle all widths of data types | |
| 296 // and can print both signed and unsigned values. | |
| 297 IncrementCount(&count, | |
| 298 IToASCII(arg.type_ == Arg::INT, false, arg.i_, | |
| 299 ptr, sz - (ptr - buf), 10, padding, pad)); | |
| 300 | |
| 301 // Advance "ptr" to the end of the string that was just emitted. | |
| 302 if (sz - (ptr - buf)) | |
| 303 while (*ptr) | |
| 304 ++ptr; | |
| 305 break; } | |
| 306 case 'x': // Output an unsigned hexadecimal value. | |
| 307 case 'X': | |
| 308 case 'p': { // Output a pointer value. | |
| 309 // Check that there are arguments left to be inserted. | |
| 310 if (cur_arg >= max_args) | |
| 311 goto fail_to_expand; | |
| 312 | |
| 313 const Arg& arg = args[cur_arg++]; | |
| 314 int64_t i; | |
| 315 switch (ch) { | |
| 316 case 'x': // Hexadecimal values are available for integer-like args. | |
| 317 case 'X': | |
| 318 // Check that the argument has the expected type. | |
| 319 if (arg.type_ != Arg::INT && | |
| 320 arg.type_ != Arg::UINT) | |
| 321 goto fail_to_expand; | |
| 322 i = arg.i_; | |
| 323 | |
| 324 // The Arg() constructor automatically performed sign expansion on | |
| 325 // signed parameters. This is great when outputting a %d decimal | |
| 326 // number, but can result in unexpected leading 0xFF bytes when | |
| 327 // outputting a %c hexadecimal number. Mask bits, if necessary. | |
| 328 if (arg.type_ == Arg::INT && arg.width_ < 8) | |
| 329 i &= ~(static_cast<int64_t>(-1) << (8*arg.width_)); | |
|
Jeffrey Yasskin
2013/07/30 23:13:53
Heh, alternately, "i &= (1LL << 8*arg.width_) - 1"
| |
| 330 break; | |
| 331 default: | |
| 332 // Pointer values require an actual pointer or a string. | |
| 333 if (arg.type_ == Arg::POINTER) | |
| 334 i = reinterpret_cast<uintptr_t>(arg.ptr_); | |
| 335 else if (arg.type_ == Arg::STRING) | |
| 336 i = reinterpret_cast<uintptr_t>(arg.s_); | |
| 337 else | |
| 338 goto fail_to_expand; | |
| 339 | |
| 340 // Pointers always include the "0x" prefix. This affects padding. | |
| 341 if (padding) { | |
| 342 if (pad == ' ') { | |
| 343 // Predict the number of hex digits (including "0x" prefix) that | |
| 344 // will be output for this address when it is converted to ASCII. | |
| 345 size_t chars = 2; | |
| 346 uint64_t j = i; | |
| 347 do { | |
| 348 ++chars; | |
| 349 j >>= 4; | |
| 350 } while (j); | |
| 351 | |
| 352 // Output the necessary number of space characters to perform | |
| 353 // padding. We can't rely on IToASCII() to do that for us, as it | |
| 354 // would incorrectly add padding _after_ the "0x" prefix. | |
| 355 Pad(buf, sz, &count, pad, padding, chars, &ptr); | |
| 356 | |
| 357 // Inform itoa_r() that it no longer needs to handle the padding. | |
| 358 padding = 0; | |
| 359 } else { | |
| 360 // Adjust for the two-character "0x" prefix. | |
| 361 padding = padding >= 2 ? padding - 2 : 0; | |
| 362 } | |
| 363 } | |
| 364 | |
| 365 // Insert "0x" prefix, if there is still sufficient space in the | |
| 366 // output buffer. | |
| 367 if (Out(buf, sz, &count, '0')) | |
| 368 ++ptr; | |
| 369 if (Out(buf, sz, &count, 'x')) | |
| 370 ++ptr; | |
| 371 break; | |
| 372 } | |
| 373 | |
| 374 // No matter what data type this value originated from, print it as | |
| 375 // a regular hexadecimal number. | |
| 376 IncrementCount(&count, | |
| 377 IToASCII(false, ch != 'x', i, ptr, sz - (ptr - buf), | |
| 378 16, padding, pad)); | |
| 379 | |
| 380 // Advance "ptr" to the end of the string that was just emitted. | |
| 381 if (sz - (ptr - buf)) | |
| 382 while (*ptr) | |
| 383 ++ptr; | |
| 384 break; } | |
| 385 case 's': { | |
| 386 // Check that there are arguments left to be inserted. | |
| 387 if (cur_arg >= max_args) | |
| 388 goto fail_to_expand; | |
| 389 | |
| 390 // Check that the argument has the expected type. | |
| 391 const Arg& arg = args[cur_arg++]; | |
| 392 if (arg.type_ != Arg::STRING) | |
| 393 goto fail_to_expand; | |
| 394 | |
| 395 // Apply padding, if needed. This requires us to first check the | |
| 396 // length of the string that we are outputting. | |
| 397 if (padding) { | |
| 398 size_t len = 0; | |
| 399 for (const char* src = arg.s_ ? arg.s_ : "<NULL>"; *src++; ) | |
| 400 ++len; | |
| 401 Pad(buf, sz, &count, ' ', padding, len, &ptr); | |
| 402 } | |
| 403 | |
| 404 // Printing a string involves nothing more than copying it into the | |
| 405 // output buffer and making sure we don't output more bytes than | |
| 406 // available space. | |
| 407 for (const char* src = arg.s_ ? arg.s_ : "<NULL>"; *src; ) | |
| 408 if (Out(buf, sz, &count, *src++)) | |
| 409 ++ptr; | |
| 410 break; } | |
| 411 case '%': | |
| 412 // Quoted percent '%' character. | |
| 413 goto copy_verbatim; | |
| 414 fail_to_expand: | |
| 415 // C++ gives us tools to do type checking -- something that snprintf() | |
| 416 // could never really do. So, whenever we see arguments that don't | |
| 417 // match up with the format string, we refuse to output them. But | |
| 418 // since we have to be extremely conservative about being async- | |
| 419 // signal-safe, we are limited in the type of error handling that we | |
|
Jeffrey Yasskin
2013/07/30 23:13:53
Is abort() or "*(volatile char*)0 = 0" ok for debu
| |
| 420 // can do. So, all we do is pass the format string unchanged. That | |
| 421 // should eventually get the user's attention. | |
| 422 default: | |
| 423 // Unknown or unsupported format character. Just copy verbatim to | |
| 424 // output. | |
| 425 if (Out(buf, sz, &count, '%')) | |
| 426 ++ptr; | |
| 427 if (!ch) | |
| 428 goto end_of_format_string; | |
| 429 if (Out(buf, sz, &count, ch)) | |
| 430 ++ptr; | |
| 431 break; | |
| 432 } | |
| 433 } else { | |
| 434 copy_verbatim: | |
| 435 if (Out(buf, sz, &count, fmt[-1])) | |
| 436 ++ptr; | |
| 437 } | |
| 438 } | |
| 439 end_of_format_string: | |
| 440 end_of_output_buffer: | |
| 441 *ptr = '\000'; | |
| 442 IncrementCount(&count); | |
| 443 return static_cast<ssize_t>(count)-1; | |
| 444 } | |
| 445 | |
| 446 ssize_t FormatN(char* buf, size_t N, const char* fmt) { | |
| 447 // Make sure we can write at least one NUL byte. | |
| 448 ssize_t n = static_cast<ssize_t>(N); | |
| 449 if (n < 1) | |
| 450 return -1; | |
| 451 size_t count = 0; | |
| 452 | |
| 453 // In the slow-path, we deal with errors by copying the contents of | |
| 454 // "fmt" unexpanded. This means, if there are no arguments passed, the | |
| 455 // Format() function always degenerates to version of strncpy() that | |
| 456 // de-duplicates '%' characters. | |
| 457 char* dst = buf; | |
| 458 const char* src = fmt; | |
| 459 for (; *src; ++src) { | |
| 460 char ch = *src; | |
| 461 if (!IncrementCount(&count) && n > 1) { | |
| 462 --dst; | |
| 463 break; | |
| 464 } | |
| 465 if (n > 1) { | |
| 466 --n; | |
| 467 *dst++ = ch; | |
| 468 } | |
| 469 if (ch == '%' && src[1] == '%') | |
| 470 ++src; | |
| 471 } | |
| 472 IncrementCount(&count); | |
| 473 *dst = '\000'; | |
| 474 return static_cast<ssize_t>(count)-1; | |
| 475 } | |
| 476 | |
| 477 } // namespace debug | |
| 478 } // namespace base | |
| OLD | NEW |