base/debug/format.cc - Issue 18656004: Added a new SafeSPrintf() function that implements snprintf() in an async-safe-fashion

Unified Diff: base/debug/format.cc

Issue 18656004: Added a new SafeSPrintf() function that implements snprintf() in an async-safe-fashion (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Fixed reference value in unittest Created 7 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: base/debug/format.cc

diff --git a/base/debug/format.cc b/base/debug/format.cc

new file mode 100644

index 0000000000000000000000000000000000000000..2582bbb61ab8072ae56115e3a98b5c7a3a651080

--- /dev/null

+++ b/base/debug/format.cc

@@ -0,0 +1,478 @@

+// Use of this source code is governed by a BSD-style license that can be

+// found in the LICENSE file.

+#include <limits>

+#include "base/debug/format.h"

+namespace base {

+namespace debug {

+// The code in this file is extremely careful to be async-signal-safe.

+//

+// Most obviously, we avoid calling any code that could dynamically allocate

+// memory. Doing so would almost certainly result in bugs and dead-locks.

+// We also avoid calling any other STL functions that could have unintended

+// side-effects involving memory allocation or access to other shared

+// resources.

+//

+// But on top of that, we also avoid calling other library functions, as many

+// of them have the side-effect of calling getenv() (in order to deal with

+// localization) or accessing errno. The latter sounds benign, but there are

+// several execution contexts where it isn't even possible to safely read let

+// alone write errno.

+//

+// The stated design goal of the Format() function is that it can be called

+// from any context that can safely call C or C++ code (i.e. anything that

+// doesn't require assembly code).

+//

+// For a brief overview of some but not all of the issues with async-signal-

+// safety, refer to:

+// http://pubs.opengroup.org/onlinepubs/009695399/functions/xsh_chap02_04.html

+namespace {

+inline bool IncrementCount(size_t* count, size_t inc = 1) {

+ if (*count > std::numeric_limits<ssize_t>::max() - inc) {

Jeffrey Yasskin 2013/07/30 23:13:53 I think this will give the wrong answer if inc>std

+ *count = std::numeric_limits<ssize_t>::max();

+ return false;

+ } else {

+ *count += inc;

+ return true;

+ }

+inline bool Out(char* buf, size_t sz, size_t* count, char ch) {

+ if (*count + 1 < sz) {

+ buf[*count] = ch;

+ IncrementCount(count);

+ return true;

+ }

+ IncrementCount(count);

+ return false;

+inline void Pad(char* buf, size_t sz, size_t* count, char pad, size_t padding,

+ size_t len, char** ptr) {

+ char *dst = *ptr;

+ for (; padding > len; --padding)

+ if (Out(buf, sz, count, pad))

+ ++dst;

+ else {

+ if (--padding)

+ IncrementCount(count, padding-len);

+ break;

+ }

+ *ptr = dst;

+// POSIX doesn't define any async-signal safe function for converting

+// an integer to ASCII. Define our own version.

+//

+// This also gives us the ability to make the function a little more powerful

+// and have it deal with padding, with truncation, and with predicting the

+// length of the untruncated output.

+//

+// IToASCII() converts an (optionally signed) integer to ASCII. It never

+// writes more than "sz" bytes. Output will be truncated as needed, and a NUL

+// character is appended, unless "sz" is zero. It returns the number of non-NUL

+// bytes that would be output if no truncation had happened.

+//

+// It supports bases 2 through 16. Padding can be done with either '0' zeros

+// or ' ' spaces.

+size_t IToASCII(bool sign, bool upcase, int64_t i, char* buf, size_t sz,

+ int base, size_t padding, char pad) {

+ // Sanity check for the "base".

+ if (base < 2 || base > 16 || (sign && base != 10)) {

+ if (static_cast<ssize_t>(sz) >= 1)

+ buf[0] = '\000';

+ return 0;

+ }

+ // Handle negative numbers, if requested by caller.

+ size_t count = 0;

+ size_t n = 1;

+ char* start = buf;

+ int minint = 0;

+ bool needs_minus = false;

+ uint64_t num;

+ if (sign && i < 0) {

+ // If we aren't inserting padding, or if we are padding with '0' zeros,

+ // we should insert the minus character now. It makes it easier to

+ // correctly deal with truncated padded numbers.

+ // On the other hand, if we are padding with ' ' spaces, we have to

+ // delay outputting the minus character until later.

+ if (padding <= 2 || pad == '0') {

+ ++count;

+ // Make sure we can write the '-' character.

+ if (++n > sz) {

+ if (sz > 0)

+ *start = '\000';

+ } else

+ *start++ = '-';

+ // Adjust padding, since we just output one character already.

+ if (padding)

+ --padding;

+ } else

+ needs_minus = true;

+ // Turn our number positive.

+ if (i == -i) {

Jeffrey Yasskin 2013/07/30 23:13:53 That's undefined behavior for signed numbers (sorr

+ // The lowest-most negative integer needs special treatment.

+ minint = 1;

+ num = -(i + 1);

+ } else {

+ // "Normal" negative numbers are easy.

+ num = -i;

+ }

+ } else

+ num = i;

+ // Loop until we have converted the entire number. Output at least one

+ // character (i.e. '0').

+ char* ptr = start;

+ bool started = false;

+ do {

+ // Sanity check. If padding is used to fill the entire address space,

+ // don't allow more than MAXINT bytes.

Jeffrey Yasskin 2013/07/30 23:13:53 MAXINT != numeric_limits<ssize_t>::max(). SSIZE_MA

+ if (++count == static_cast<size_t>(std::numeric_limits<ssize_t>::max())) {

+ break;

+ }

+ // Make sure there is still enough space left in our output buffer.

+ if (n == sz) {

+ if (ptr > start) {

+ // It is rare that we need to output a partial number. But if asked

+ // to do so, we will still make sure we output the correct number of

+ // leading digits.

+ // memmove(start, start+1, --ptr - start)

Jeffrey Yasskin 2013/07/30 23:13:53 This is weird. Why do you need to erase the first

+ --ptr;

+ for (char* move = start; move < ptr; ++move)

+ *move = move[1];

+ } else

+ goto cannot_write_anything_but_nul;

+ } else

+ ++n;

+ // Output the next digit and (if necessary) compensate for the lowest-

Jeffrey Yasskin 2013/07/30 23:13:53 "lowest-most negative integer" should probably be

+ // most negative integer needing special treatment. This works because,

+ // no matter the bit width of the integer, the lowest-most decimal

+ // integer always ends in 2, 4, 6, or 8.

+ if (n <= sz) {

+ if (!num && started)

+ if (needs_minus) {

+ *ptr++ = '-';

+ needs_minus = false;

+ } else

+ *ptr++ = pad;

+ else {

+ started = true;

+ *ptr++ = (upcase ? "0123456789ABCDEF" : "0123456789abcdef")

+ [num%base+minint];

+ }

+ cannot_write_anything_but_nul:

+ minint = 0;

+ num /= base;

+ // Add padding, if requested.

+ if (padding > 0) {

+ --padding;

+ // Performance optimization for when we are asked to output

+ // excessive padding, but our output buffer is limited in size.

+ // Even if we output a 128bit number in binary, we would never

+ // write more than 130 characters. So, anything beyond this limit

+ // and we can compute the result arithmetically.

+ if (count > n && count - n > 130) {

+ IncrementCount(&count, padding);

+ padding = 0;

+ }

+ } while (num || padding || needs_minus);

+ // Terminate the output with a NUL character.

+ if (sz > 0)

+ *ptr = '\000';

+ // Conversion to ASCII actually resulted in the digits being in reverse

+ // order. We can't easily generate them in forward order, as we can't tell

+ // the number of characters needed until we are done converting.

+ // So, now, we reverse the string (except for the possible '-' sign).

+ while (--ptr > start) {

+ char ch = *ptr;

+ *ptr = *start;

+ *start++ = ch;

+ }

+ return count;

+} // anonymous namespace

+ssize_t internal::FormatN(char* buf, size_t sz, const char* fmt,

+ const Arg* args, const size_t max_args) {

+ // Make sure we can write at least one NUL byte.

+ if (static_cast<ssize_t>(sz) < 1)

+ return -1;

+ // Iterate over format string and interpret '%' arguments as they are

+ // encountered.

+ char* ptr = buf;

+ size_t padding;

+ char pad;

+ size_t count = 0;

+ for (unsigned int cur_arg = 0;

+ *fmt &&

+ count != static_cast<size_t>(std::numeric_limits<ssize_t>::max()); ) {

+ if (*fmt++ == '%') {

+ padding = 0;

+ pad = ' ';

+ char ch = *fmt++;

+ format_character_found:

+ switch (ch) {

+ case '0': case '1': case '2': case '3': case '4':

+ case '5': case '6': case '7': case '8': case '9':

+ // Found a width parameter. Convert to an integer value and store in

+ // "padding". If the leading digit is a zero, change the padding

+ // character from a space ' ' to a zero '0'.

+ pad = ch == '0' ? '0' : ' ';

+ for (;;) {

+ const size_t max_padding = std::numeric_limits<ssize_t>::max();

+ if (padding > max_padding/10 ||

+ 10*padding > max_padding - (ch - '0')) {

+ // Integer overflow detected. Skip the rest of the width until

+ // we find the format character, then do the normal error handling.

+ while ((ch = *fmt++) >= '0' && ch <= '9') {

+ }

+ goto fail_to_expand;

+ }

+ padding = 10*padding + ch - '0';

+ ch = *fmt++;

+ if (ch < '0' || ch > '9') {

+ // Reached the end of the width parameter. This is where the format

+ // character is found.

+ goto format_character_found;

+ }

+ break;

+ case 'c': { // Output an ASCII character.

+ // Check that there are arguments left to be inserted.

+ if (cur_arg >= max_args)

+ goto fail_to_expand;

+ // Check that the argument has the expected type.

+ const Arg& arg = args[cur_arg++];

+ if (arg.type_ != Arg::INT &&

+ arg.type_ != Arg::UINT)

+ goto fail_to_expand;

+ // Apply padding, if needed.

+ Pad(buf, sz, &count, ' ', padding, 1, &ptr);

+ // Convert the argument to an ASCII character and output it.

+ char ch = static_cast<char>(arg.i_);

+ if (!ch)

+ goto end_of_output_buffer;

+ if (Out(buf, sz, &count, ch))

+ ++ptr;

+ break; }

+ case 'd': { // Output a signed or unsigned integer-like value.

+ // Check that there are arguments left to be inserted.

+ if (cur_arg >= max_args)

+ goto fail_to_expand;

+ // Check that the argument has the expected type.

+ const Arg& arg = args[cur_arg++];

+ if (arg.type_ != Arg::INT &&

+ arg.type_ != Arg::UINT)

+ goto fail_to_expand;

+ // Our implementation of IToASCII() can handle all widths of data types

+ // and can print both signed and unsigned values.

+ IncrementCount(&count,

+ IToASCII(arg.type_ == Arg::INT, false, arg.i_,

+ ptr, sz - (ptr - buf), 10, padding, pad));

+ // Advance "ptr" to the end of the string that was just emitted.

+ if (sz - (ptr - buf))

+ while (*ptr)

+ ++ptr;

+ break; }

+ case 'x': // Output an unsigned hexadecimal value.

+ case 'X':

+ case 'p': { // Output a pointer value.

+ // Check that there are arguments left to be inserted.

+ if (cur_arg >= max_args)

+ goto fail_to_expand;

+ const Arg& arg = args[cur_arg++];

+ int64_t i;

+ switch (ch) {

+ case 'x': // Hexadecimal values are available for integer-like args.

+ case 'X':

+ // Check that the argument has the expected type.

+ if (arg.type_ != Arg::INT &&

+ arg.type_ != Arg::UINT)

+ goto fail_to_expand;

+ i = arg.i_;

+ // The Arg() constructor automatically performed sign expansion on

+ // signed parameters. This is great when outputting a %d decimal

+ // number, but can result in unexpected leading 0xFF bytes when

+ // outputting a %c hexadecimal number. Mask bits, if necessary.

+ if (arg.type_ == Arg::INT && arg.width_ < 8)

+ i &= ~(static_cast<int64_t>(-1) << (8*arg.width_));

Jeffrey Yasskin 2013/07/30 23:13:53 Heh, alternately, "i &= (1LL << 8*arg.width_) - 1"

+ break;

+ default:

+ // Pointer values require an actual pointer or a string.

+ if (arg.type_ == Arg::POINTER)

+ i = reinterpret_cast<uintptr_t>(arg.ptr_);

+ else if (arg.type_ == Arg::STRING)

+ i = reinterpret_cast<uintptr_t>(arg.s_);

+ else

+ goto fail_to_expand;

+ // Pointers always include the "0x" prefix. This affects padding.

+ if (padding) {

+ if (pad == ' ') {

+ // Predict the number of hex digits (including "0x" prefix) that

+ // will be output for this address when it is converted to ASCII.

+ size_t chars = 2;

+ uint64_t j = i;

+ do {

+ ++chars;

+ j >>= 4;

+ } while (j);

+ // Output the necessary number of space characters to perform

+ // padding. We can't rely on IToASCII() to do that for us, as it

+ // would incorrectly add padding _after_ the "0x" prefix.

+ Pad(buf, sz, &count, pad, padding, chars, &ptr);

+ // Inform itoa_r() that it no longer needs to handle the padding.

+ padding = 0;

+ } else {

+ // Adjust for the two-character "0x" prefix.

+ padding = padding >= 2 ? padding - 2 : 0;

+ }

+ // Insert "0x" prefix, if there is still sufficient space in the

+ // output buffer.

+ if (Out(buf, sz, &count, '0'))

+ ++ptr;

+ if (Out(buf, sz, &count, 'x'))

+ ++ptr;

+ break;

+ }

+ // No matter what data type this value originated from, print it as

+ // a regular hexadecimal number.

+ IncrementCount(&count,

+ IToASCII(false, ch != 'x', i, ptr, sz - (ptr - buf),

+ 16, padding, pad));

+ // Advance "ptr" to the end of the string that was just emitted.

+ if (sz - (ptr - buf))

+ while (*ptr)

+ ++ptr;

+ break; }

+ case 's': {

+ // Check that there are arguments left to be inserted.

+ if (cur_arg >= max_args)

+ goto fail_to_expand;

+ // Check that the argument has the expected type.

+ const Arg& arg = args[cur_arg++];

+ if (arg.type_ != Arg::STRING)

+ goto fail_to_expand;

+ // Apply padding, if needed. This requires us to first check the

+ // length of the string that we are outputting.

+ if (padding) {

+ size_t len = 0;

+ for (const char* src = arg.s_ ? arg.s_ : "<NULL>"; *src++; )

+ ++len;

+ Pad(buf, sz, &count, ' ', padding, len, &ptr);

+ }

+ // Printing a string involves nothing more than copying it into the

+ // output buffer and making sure we don't output more bytes than

+ // available space.

+ for (const char* src = arg.s_ ? arg.s_ : "<NULL>"; *src; )

+ if (Out(buf, sz, &count, *src++))

+ ++ptr;

+ break; }

+ case '%':

+ // Quoted percent '%' character.

+ goto copy_verbatim;

+ fail_to_expand:

+ // C++ gives us tools to do type checking -- something that snprintf()

+ // could never really do. So, whenever we see arguments that don't

+ // match up with the format string, we refuse to output them. But

+ // since we have to be extremely conservative about being async-

+ // signal-safe, we are limited in the type of error handling that we

Jeffrey Yasskin 2013/07/30 23:13:53 Is abort() or "*(volatile char*)0 = 0" ok for debu

+ // can do. So, all we do is pass the format string unchanged. That

+ // should eventually get the user's attention.

+ default:

+ // Unknown or unsupported format character. Just copy verbatim to

+ // output.

+ if (Out(buf, sz, &count, '%'))

+ ++ptr;

+ if (!ch)

+ goto end_of_format_string;

+ if (Out(buf, sz, &count, ch))

+ ++ptr;

+ break;

+ }

+ } else {

+ copy_verbatim:

+ if (Out(buf, sz, &count, fmt[-1]))

+ ++ptr;

+ }

+ end_of_format_string:

+ end_of_output_buffer:

+ *ptr = '\000';

+ IncrementCount(&count);

+ return static_cast<ssize_t>(count)-1;

+ssize_t FormatN(char* buf, size_t N, const char* fmt) {

+ // Make sure we can write at least one NUL byte.

+ ssize_t n = static_cast<ssize_t>(N);

+ if (n < 1)

+ return -1;

+ size_t count = 0;

+ // In the slow-path, we deal with errors by copying the contents of

+ // "fmt" unexpanded. This means, if there are no arguments passed, the

+ // Format() function always degenerates to version of strncpy() that

+ // de-duplicates '%' characters.

+ char* dst = buf;

+ const char* src = fmt;

+ for (; *src; ++src) {

+ char ch = *src;

+ if (!IncrementCount(&count) && n > 1) {

+ --dst;

+ break;

+ }

+ if (n > 1) {

+ --n;

+ *dst++ = ch;

+ }

+ if (ch == '%' && src[1] == '%')

+ ++src;

+ }

+ IncrementCount(&count);

+ *dst = '\000';

+ return static_cast<ssize_t>(count)-1;

+} // namespace debug

+} // namespace base

« base/debug/format.h ('K') | « base/debug/format.h ('k') | base/debug/format_unittest.cc » ('j') | base/debug/format_unittest.cc » ('J')