Index: base/debug/format.cc |
diff --git a/base/debug/format.cc b/base/debug/format.cc |
new file mode 100644 |
index 0000000000000000000000000000000000000000..2582bbb61ab8072ae56115e3a98b5c7a3a651080 |
--- /dev/null |
+++ b/base/debug/format.cc |
@@ -0,0 +1,478 @@ |
+// Copyright (c) 2013 The Chromium Authors. All rights reserved. |
+// Use of this source code is governed by a BSD-style license that can be |
+// found in the LICENSE file. |
+ |
+#include <limits> |
+ |
+#include "base/debug/format.h" |
+ |
+ |
+namespace base { |
+namespace debug { |
+ |
+// The code in this file is extremely careful to be async-signal-safe. |
+// |
+// Most obviously, we avoid calling any code that could dynamically allocate |
+// memory. Doing so would almost certainly result in bugs and dead-locks. |
+// We also avoid calling any other STL functions that could have unintended |
+// side-effects involving memory allocation or access to other shared |
+// resources. |
+// |
+// But on top of that, we also avoid calling other library functions, as many |
+// of them have the side-effect of calling getenv() (in order to deal with |
+// localization) or accessing errno. The latter sounds benign, but there are |
+// several execution contexts where it isn't even possible to safely read let |
+// alone write errno. |
+// |
+// The stated design goal of the Format() function is that it can be called |
+// from any context that can safely call C or C++ code (i.e. anything that |
+// doesn't require assembly code). |
+// |
+// For a brief overview of some but not all of the issues with async-signal- |
+// safety, refer to: |
+// http://pubs.opengroup.org/onlinepubs/009695399/functions/xsh_chap02_04.html |
+ |
+namespace { |
+ |
+inline bool IncrementCount(size_t* count, size_t inc = 1) { |
+ if (*count > std::numeric_limits<ssize_t>::max() - inc) { |
Jeffrey Yasskin
2013/07/30 23:13:53
I think this will give the wrong answer if inc>std
|
+ *count = std::numeric_limits<ssize_t>::max(); |
+ return false; |
+ } else { |
+ *count += inc; |
+ return true; |
+ } |
+} |
+ |
+inline bool Out(char* buf, size_t sz, size_t* count, char ch) { |
+ if (*count + 1 < sz) { |
+ buf[*count] = ch; |
+ IncrementCount(count); |
+ return true; |
+ } |
+ IncrementCount(count); |
+ return false; |
+} |
+ |
+inline void Pad(char* buf, size_t sz, size_t* count, char pad, size_t padding, |
+ size_t len, char** ptr) { |
+ char *dst = *ptr; |
+ for (; padding > len; --padding) |
+ if (Out(buf, sz, count, pad)) |
+ ++dst; |
+ else { |
+ if (--padding) |
+ IncrementCount(count, padding-len); |
+ break; |
+ } |
+ *ptr = dst; |
+} |
+ |
+// POSIX doesn't define any async-signal safe function for converting |
+// an integer to ASCII. Define our own version. |
+// |
+// This also gives us the ability to make the function a little more powerful |
+// and have it deal with padding, with truncation, and with predicting the |
+// length of the untruncated output. |
+// |
+// IToASCII() converts an (optionally signed) integer to ASCII. It never |
+// writes more than "sz" bytes. Output will be truncated as needed, and a NUL |
+// character is appended, unless "sz" is zero. It returns the number of non-NUL |
+// bytes that would be output if no truncation had happened. |
+// |
+// It supports bases 2 through 16. Padding can be done with either '0' zeros |
+// or ' ' spaces. |
+size_t IToASCII(bool sign, bool upcase, int64_t i, char* buf, size_t sz, |
+ int base, size_t padding, char pad) { |
+ // Sanity check for the "base". |
+ if (base < 2 || base > 16 || (sign && base != 10)) { |
+ if (static_cast<ssize_t>(sz) >= 1) |
+ buf[0] = '\000'; |
+ return 0; |
+ } |
+ |
+ // Handle negative numbers, if requested by caller. |
+ size_t count = 0; |
+ size_t n = 1; |
+ char* start = buf; |
+ int minint = 0; |
+ bool needs_minus = false; |
+ uint64_t num; |
+ if (sign && i < 0) { |
+ // If we aren't inserting padding, or if we are padding with '0' zeros, |
+ // we should insert the minus character now. It makes it easier to |
+ // correctly deal with truncated padded numbers. |
+ // On the other hand, if we are padding with ' ' spaces, we have to |
+ // delay outputting the minus character until later. |
+ if (padding <= 2 || pad == '0') { |
+ ++count; |
+ |
+ // Make sure we can write the '-' character. |
+ if (++n > sz) { |
+ if (sz > 0) |
+ *start = '\000'; |
+ } else |
+ *start++ = '-'; |
+ |
+ // Adjust padding, since we just output one character already. |
+ if (padding) |
+ --padding; |
+ } else |
+ needs_minus = true; |
+ |
+ // Turn our number positive. |
+ if (i == -i) { |
Jeffrey Yasskin
2013/07/30 23:13:53
That's undefined behavior for signed numbers (sorr
|
+ // The lowest-most negative integer needs special treatment. |
+ minint = 1; |
+ num = -(i + 1); |
+ } else { |
+ // "Normal" negative numbers are easy. |
+ num = -i; |
+ } |
+ } else |
+ num = i; |
+ |
+ // Loop until we have converted the entire number. Output at least one |
+ // character (i.e. '0'). |
+ char* ptr = start; |
+ bool started = false; |
+ do { |
+ // Sanity check. If padding is used to fill the entire address space, |
+ // don't allow more than MAXINT bytes. |
Jeffrey Yasskin
2013/07/30 23:13:53
MAXINT != numeric_limits<ssize_t>::max(). SSIZE_MA
|
+ if (++count == static_cast<size_t>(std::numeric_limits<ssize_t>::max())) { |
+ break; |
+ } |
+ |
+ // Make sure there is still enough space left in our output buffer. |
+ if (n == sz) { |
+ if (ptr > start) { |
+ // It is rare that we need to output a partial number. But if asked |
+ // to do so, we will still make sure we output the correct number of |
+ // leading digits. |
+ // memmove(start, start+1, --ptr - start) |
Jeffrey Yasskin
2013/07/30 23:13:53
This is weird. Why do you need to erase the first
|
+ --ptr; |
+ for (char* move = start; move < ptr; ++move) |
+ *move = move[1]; |
+ } else |
+ goto cannot_write_anything_but_nul; |
+ } else |
+ ++n; |
+ |
+ // Output the next digit and (if necessary) compensate for the lowest- |
Jeffrey Yasskin
2013/07/30 23:13:53
"lowest-most negative integer" should probably be
|
+ // most negative integer needing special treatment. This works because, |
+ // no matter the bit width of the integer, the lowest-most decimal |
+ // integer always ends in 2, 4, 6, or 8. |
+ if (n <= sz) { |
+ if (!num && started) |
+ if (needs_minus) { |
+ *ptr++ = '-'; |
+ needs_minus = false; |
+ } else |
+ *ptr++ = pad; |
+ else { |
+ started = true; |
+ *ptr++ = (upcase ? "0123456789ABCDEF" : "0123456789abcdef") |
+ [num%base+minint]; |
+ } |
+ } |
+ |
+ cannot_write_anything_but_nul: |
+ minint = 0; |
+ num /= base; |
+ |
+ // Add padding, if requested. |
+ if (padding > 0) { |
+ --padding; |
+ |
+ // Performance optimization for when we are asked to output |
+ // excessive padding, but our output buffer is limited in size. |
+ // Even if we output a 128bit number in binary, we would never |
+ // write more than 130 characters. So, anything beyond this limit |
+ // and we can compute the result arithmetically. |
+ if (count > n && count - n > 130) { |
+ IncrementCount(&count, padding); |
+ padding = 0; |
+ } |
+ } |
+ } while (num || padding || needs_minus); |
+ |
+ // Terminate the output with a NUL character. |
+ if (sz > 0) |
+ *ptr = '\000'; |
+ |
+ // Conversion to ASCII actually resulted in the digits being in reverse |
+ // order. We can't easily generate them in forward order, as we can't tell |
+ // the number of characters needed until we are done converting. |
+ // So, now, we reverse the string (except for the possible '-' sign). |
+ while (--ptr > start) { |
+ char ch = *ptr; |
+ *ptr = *start; |
+ *start++ = ch; |
+ } |
+ return count; |
+} |
+ |
+} // anonymous namespace |
+ |
+ssize_t internal::FormatN(char* buf, size_t sz, const char* fmt, |
+ const Arg* args, const size_t max_args) { |
+ // Make sure we can write at least one NUL byte. |
+ if (static_cast<ssize_t>(sz) < 1) |
+ return -1; |
+ |
+ // Iterate over format string and interpret '%' arguments as they are |
+ // encountered. |
+ char* ptr = buf; |
+ size_t padding; |
+ char pad; |
+ size_t count = 0; |
+ for (unsigned int cur_arg = 0; |
+ *fmt && |
+ count != static_cast<size_t>(std::numeric_limits<ssize_t>::max()); ) { |
+ if (*fmt++ == '%') { |
+ padding = 0; |
+ pad = ' '; |
+ char ch = *fmt++; |
+ format_character_found: |
+ switch (ch) { |
+ case '0': case '1': case '2': case '3': case '4': |
+ case '5': case '6': case '7': case '8': case '9': |
+ // Found a width parameter. Convert to an integer value and store in |
+ // "padding". If the leading digit is a zero, change the padding |
+ // character from a space ' ' to a zero '0'. |
+ pad = ch == '0' ? '0' : ' '; |
+ for (;;) { |
+ const size_t max_padding = std::numeric_limits<ssize_t>::max(); |
+ if (padding > max_padding/10 || |
+ 10*padding > max_padding - (ch - '0')) { |
+ // Integer overflow detected. Skip the rest of the width until |
+ // we find the format character, then do the normal error handling. |
+ while ((ch = *fmt++) >= '0' && ch <= '9') { |
+ } |
+ goto fail_to_expand; |
+ } |
+ padding = 10*padding + ch - '0'; |
+ ch = *fmt++; |
+ if (ch < '0' || ch > '9') { |
+ // Reached the end of the width parameter. This is where the format |
+ // character is found. |
+ goto format_character_found; |
+ } |
+ } |
+ break; |
+ case 'c': { // Output an ASCII character. |
+ // Check that there are arguments left to be inserted. |
+ if (cur_arg >= max_args) |
+ goto fail_to_expand; |
+ |
+ // Check that the argument has the expected type. |
+ const Arg& arg = args[cur_arg++]; |
+ if (arg.type_ != Arg::INT && |
+ arg.type_ != Arg::UINT) |
+ goto fail_to_expand; |
+ |
+ // Apply padding, if needed. |
+ Pad(buf, sz, &count, ' ', padding, 1, &ptr); |
+ |
+ // Convert the argument to an ASCII character and output it. |
+ char ch = static_cast<char>(arg.i_); |
+ if (!ch) |
+ goto end_of_output_buffer; |
+ if (Out(buf, sz, &count, ch)) |
+ ++ptr; |
+ break; } |
+ case 'd': { // Output a signed or unsigned integer-like value. |
+ // Check that there are arguments left to be inserted. |
+ if (cur_arg >= max_args) |
+ goto fail_to_expand; |
+ |
+ // Check that the argument has the expected type. |
+ const Arg& arg = args[cur_arg++]; |
+ if (arg.type_ != Arg::INT && |
+ arg.type_ != Arg::UINT) |
+ goto fail_to_expand; |
+ |
+ // Our implementation of IToASCII() can handle all widths of data types |
+ // and can print both signed and unsigned values. |
+ IncrementCount(&count, |
+ IToASCII(arg.type_ == Arg::INT, false, arg.i_, |
+ ptr, sz - (ptr - buf), 10, padding, pad)); |
+ |
+ // Advance "ptr" to the end of the string that was just emitted. |
+ if (sz - (ptr - buf)) |
+ while (*ptr) |
+ ++ptr; |
+ break; } |
+ case 'x': // Output an unsigned hexadecimal value. |
+ case 'X': |
+ case 'p': { // Output a pointer value. |
+ // Check that there are arguments left to be inserted. |
+ if (cur_arg >= max_args) |
+ goto fail_to_expand; |
+ |
+ const Arg& arg = args[cur_arg++]; |
+ int64_t i; |
+ switch (ch) { |
+ case 'x': // Hexadecimal values are available for integer-like args. |
+ case 'X': |
+ // Check that the argument has the expected type. |
+ if (arg.type_ != Arg::INT && |
+ arg.type_ != Arg::UINT) |
+ goto fail_to_expand; |
+ i = arg.i_; |
+ |
+ // The Arg() constructor automatically performed sign expansion on |
+ // signed parameters. This is great when outputting a %d decimal |
+ // number, but can result in unexpected leading 0xFF bytes when |
+ // outputting a %c hexadecimal number. Mask bits, if necessary. |
+ if (arg.type_ == Arg::INT && arg.width_ < 8) |
+ i &= ~(static_cast<int64_t>(-1) << (8*arg.width_)); |
Jeffrey Yasskin
2013/07/30 23:13:53
Heh, alternately, "i &= (1LL << 8*arg.width_) - 1"
|
+ break; |
+ default: |
+ // Pointer values require an actual pointer or a string. |
+ if (arg.type_ == Arg::POINTER) |
+ i = reinterpret_cast<uintptr_t>(arg.ptr_); |
+ else if (arg.type_ == Arg::STRING) |
+ i = reinterpret_cast<uintptr_t>(arg.s_); |
+ else |
+ goto fail_to_expand; |
+ |
+ // Pointers always include the "0x" prefix. This affects padding. |
+ if (padding) { |
+ if (pad == ' ') { |
+ // Predict the number of hex digits (including "0x" prefix) that |
+ // will be output for this address when it is converted to ASCII. |
+ size_t chars = 2; |
+ uint64_t j = i; |
+ do { |
+ ++chars; |
+ j >>= 4; |
+ } while (j); |
+ |
+ // Output the necessary number of space characters to perform |
+ // padding. We can't rely on IToASCII() to do that for us, as it |
+ // would incorrectly add padding _after_ the "0x" prefix. |
+ Pad(buf, sz, &count, pad, padding, chars, &ptr); |
+ |
+ // Inform itoa_r() that it no longer needs to handle the padding. |
+ padding = 0; |
+ } else { |
+ // Adjust for the two-character "0x" prefix. |
+ padding = padding >= 2 ? padding - 2 : 0; |
+ } |
+ } |
+ |
+ // Insert "0x" prefix, if there is still sufficient space in the |
+ // output buffer. |
+ if (Out(buf, sz, &count, '0')) |
+ ++ptr; |
+ if (Out(buf, sz, &count, 'x')) |
+ ++ptr; |
+ break; |
+ } |
+ |
+ // No matter what data type this value originated from, print it as |
+ // a regular hexadecimal number. |
+ IncrementCount(&count, |
+ IToASCII(false, ch != 'x', i, ptr, sz - (ptr - buf), |
+ 16, padding, pad)); |
+ |
+ // Advance "ptr" to the end of the string that was just emitted. |
+ if (sz - (ptr - buf)) |
+ while (*ptr) |
+ ++ptr; |
+ break; } |
+ case 's': { |
+ // Check that there are arguments left to be inserted. |
+ if (cur_arg >= max_args) |
+ goto fail_to_expand; |
+ |
+ // Check that the argument has the expected type. |
+ const Arg& arg = args[cur_arg++]; |
+ if (arg.type_ != Arg::STRING) |
+ goto fail_to_expand; |
+ |
+ // Apply padding, if needed. This requires us to first check the |
+ // length of the string that we are outputting. |
+ if (padding) { |
+ size_t len = 0; |
+ for (const char* src = arg.s_ ? arg.s_ : "<NULL>"; *src++; ) |
+ ++len; |
+ Pad(buf, sz, &count, ' ', padding, len, &ptr); |
+ } |
+ |
+ // Printing a string involves nothing more than copying it into the |
+ // output buffer and making sure we don't output more bytes than |
+ // available space. |
+ for (const char* src = arg.s_ ? arg.s_ : "<NULL>"; *src; ) |
+ if (Out(buf, sz, &count, *src++)) |
+ ++ptr; |
+ break; } |
+ case '%': |
+ // Quoted percent '%' character. |
+ goto copy_verbatim; |
+ fail_to_expand: |
+ // C++ gives us tools to do type checking -- something that snprintf() |
+ // could never really do. So, whenever we see arguments that don't |
+ // match up with the format string, we refuse to output them. But |
+ // since we have to be extremely conservative about being async- |
+ // signal-safe, we are limited in the type of error handling that we |
Jeffrey Yasskin
2013/07/30 23:13:53
Is abort() or "*(volatile char*)0 = 0" ok for debu
|
+ // can do. So, all we do is pass the format string unchanged. That |
+ // should eventually get the user's attention. |
+ default: |
+ // Unknown or unsupported format character. Just copy verbatim to |
+ // output. |
+ if (Out(buf, sz, &count, '%')) |
+ ++ptr; |
+ if (!ch) |
+ goto end_of_format_string; |
+ if (Out(buf, sz, &count, ch)) |
+ ++ptr; |
+ break; |
+ } |
+ } else { |
+ copy_verbatim: |
+ if (Out(buf, sz, &count, fmt[-1])) |
+ ++ptr; |
+ } |
+ } |
+ end_of_format_string: |
+ end_of_output_buffer: |
+ *ptr = '\000'; |
+ IncrementCount(&count); |
+ return static_cast<ssize_t>(count)-1; |
+} |
+ |
+ssize_t FormatN(char* buf, size_t N, const char* fmt) { |
+ // Make sure we can write at least one NUL byte. |
+ ssize_t n = static_cast<ssize_t>(N); |
+ if (n < 1) |
+ return -1; |
+ size_t count = 0; |
+ |
+ // In the slow-path, we deal with errors by copying the contents of |
+ // "fmt" unexpanded. This means, if there are no arguments passed, the |
+ // Format() function always degenerates to version of strncpy() that |
+ // de-duplicates '%' characters. |
+ char* dst = buf; |
+ const char* src = fmt; |
+ for (; *src; ++src) { |
+ char ch = *src; |
+ if (!IncrementCount(&count) && n > 1) { |
+ --dst; |
+ break; |
+ } |
+ if (n > 1) { |
+ --n; |
+ *dst++ = ch; |
+ } |
+ if (ch == '%' && src[1] == '%') |
+ ++src; |
+ } |
+ IncrementCount(&count); |
+ *dst = '\000'; |
+ return static_cast<ssize_t>(count)-1; |
+} |
+ |
+} // namespace debug |
+} // namespace base |