Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(88)

Unified Diff: base/debug/format.cc

Issue 18656004: Added a new SafeSPrintf() function that implements snprintf() in an async-safe-fashion (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Fixed reference value in unittest Created 7 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: base/debug/format.cc
diff --git a/base/debug/format.cc b/base/debug/format.cc
new file mode 100644
index 0000000000000000000000000000000000000000..2582bbb61ab8072ae56115e3a98b5c7a3a651080
--- /dev/null
+++ b/base/debug/format.cc
@@ -0,0 +1,478 @@
+// Copyright (c) 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <limits>
+
+#include "base/debug/format.h"
+
+
+namespace base {
+namespace debug {
+
+// The code in this file is extremely careful to be async-signal-safe.
+//
+// Most obviously, we avoid calling any code that could dynamically allocate
+// memory. Doing so would almost certainly result in bugs and dead-locks.
+// We also avoid calling any other STL functions that could have unintended
+// side-effects involving memory allocation or access to other shared
+// resources.
+//
+// But on top of that, we also avoid calling other library functions, as many
+// of them have the side-effect of calling getenv() (in order to deal with
+// localization) or accessing errno. The latter sounds benign, but there are
+// several execution contexts where it isn't even possible to safely read let
+// alone write errno.
+//
+// The stated design goal of the Format() function is that it can be called
+// from any context that can safely call C or C++ code (i.e. anything that
+// doesn't require assembly code).
+//
+// For a brief overview of some but not all of the issues with async-signal-
+// safety, refer to:
+// http://pubs.opengroup.org/onlinepubs/009695399/functions/xsh_chap02_04.html
+
+namespace {
+
+inline bool IncrementCount(size_t* count, size_t inc = 1) {
+ if (*count > std::numeric_limits<ssize_t>::max() - inc) {
Jeffrey Yasskin 2013/07/30 23:13:53 I think this will give the wrong answer if inc>std
+ *count = std::numeric_limits<ssize_t>::max();
+ return false;
+ } else {
+ *count += inc;
+ return true;
+ }
+}
+
+inline bool Out(char* buf, size_t sz, size_t* count, char ch) {
+ if (*count + 1 < sz) {
+ buf[*count] = ch;
+ IncrementCount(count);
+ return true;
+ }
+ IncrementCount(count);
+ return false;
+}
+
+inline void Pad(char* buf, size_t sz, size_t* count, char pad, size_t padding,
+ size_t len, char** ptr) {
+ char *dst = *ptr;
+ for (; padding > len; --padding)
+ if (Out(buf, sz, count, pad))
+ ++dst;
+ else {
+ if (--padding)
+ IncrementCount(count, padding-len);
+ break;
+ }
+ *ptr = dst;
+}
+
+// POSIX doesn't define any async-signal safe function for converting
+// an integer to ASCII. Define our own version.
+//
+// This also gives us the ability to make the function a little more powerful
+// and have it deal with padding, with truncation, and with predicting the
+// length of the untruncated output.
+//
+// IToASCII() converts an (optionally signed) integer to ASCII. It never
+// writes more than "sz" bytes. Output will be truncated as needed, and a NUL
+// character is appended, unless "sz" is zero. It returns the number of non-NUL
+// bytes that would be output if no truncation had happened.
+//
+// It supports bases 2 through 16. Padding can be done with either '0' zeros
+// or ' ' spaces.
+size_t IToASCII(bool sign, bool upcase, int64_t i, char* buf, size_t sz,
+ int base, size_t padding, char pad) {
+ // Sanity check for the "base".
+ if (base < 2 || base > 16 || (sign && base != 10)) {
+ if (static_cast<ssize_t>(sz) >= 1)
+ buf[0] = '\000';
+ return 0;
+ }
+
+ // Handle negative numbers, if requested by caller.
+ size_t count = 0;
+ size_t n = 1;
+ char* start = buf;
+ int minint = 0;
+ bool needs_minus = false;
+ uint64_t num;
+ if (sign && i < 0) {
+ // If we aren't inserting padding, or if we are padding with '0' zeros,
+ // we should insert the minus character now. It makes it easier to
+ // correctly deal with truncated padded numbers.
+ // On the other hand, if we are padding with ' ' spaces, we have to
+ // delay outputting the minus character until later.
+ if (padding <= 2 || pad == '0') {
+ ++count;
+
+ // Make sure we can write the '-' character.
+ if (++n > sz) {
+ if (sz > 0)
+ *start = '\000';
+ } else
+ *start++ = '-';
+
+ // Adjust padding, since we just output one character already.
+ if (padding)
+ --padding;
+ } else
+ needs_minus = true;
+
+ // Turn our number positive.
+ if (i == -i) {
Jeffrey Yasskin 2013/07/30 23:13:53 That's undefined behavior for signed numbers (sorr
+ // The lowest-most negative integer needs special treatment.
+ minint = 1;
+ num = -(i + 1);
+ } else {
+ // "Normal" negative numbers are easy.
+ num = -i;
+ }
+ } else
+ num = i;
+
+ // Loop until we have converted the entire number. Output at least one
+ // character (i.e. '0').
+ char* ptr = start;
+ bool started = false;
+ do {
+ // Sanity check. If padding is used to fill the entire address space,
+ // don't allow more than MAXINT bytes.
Jeffrey Yasskin 2013/07/30 23:13:53 MAXINT != numeric_limits<ssize_t>::max(). SSIZE_MA
+ if (++count == static_cast<size_t>(std::numeric_limits<ssize_t>::max())) {
+ break;
+ }
+
+ // Make sure there is still enough space left in our output buffer.
+ if (n == sz) {
+ if (ptr > start) {
+ // It is rare that we need to output a partial number. But if asked
+ // to do so, we will still make sure we output the correct number of
+ // leading digits.
+ // memmove(start, start+1, --ptr - start)
Jeffrey Yasskin 2013/07/30 23:13:53 This is weird. Why do you need to erase the first
+ --ptr;
+ for (char* move = start; move < ptr; ++move)
+ *move = move[1];
+ } else
+ goto cannot_write_anything_but_nul;
+ } else
+ ++n;
+
+ // Output the next digit and (if necessary) compensate for the lowest-
Jeffrey Yasskin 2013/07/30 23:13:53 "lowest-most negative integer" should probably be
+ // most negative integer needing special treatment. This works because,
+ // no matter the bit width of the integer, the lowest-most decimal
+ // integer always ends in 2, 4, 6, or 8.
+ if (n <= sz) {
+ if (!num && started)
+ if (needs_minus) {
+ *ptr++ = '-';
+ needs_minus = false;
+ } else
+ *ptr++ = pad;
+ else {
+ started = true;
+ *ptr++ = (upcase ? "0123456789ABCDEF" : "0123456789abcdef")
+ [num%base+minint];
+ }
+ }
+
+ cannot_write_anything_but_nul:
+ minint = 0;
+ num /= base;
+
+ // Add padding, if requested.
+ if (padding > 0) {
+ --padding;
+
+ // Performance optimization for when we are asked to output
+ // excessive padding, but our output buffer is limited in size.
+ // Even if we output a 128bit number in binary, we would never
+ // write more than 130 characters. So, anything beyond this limit
+ // and we can compute the result arithmetically.
+ if (count > n && count - n > 130) {
+ IncrementCount(&count, padding);
+ padding = 0;
+ }
+ }
+ } while (num || padding || needs_minus);
+
+ // Terminate the output with a NUL character.
+ if (sz > 0)
+ *ptr = '\000';
+
+ // Conversion to ASCII actually resulted in the digits being in reverse
+ // order. We can't easily generate them in forward order, as we can't tell
+ // the number of characters needed until we are done converting.
+ // So, now, we reverse the string (except for the possible '-' sign).
+ while (--ptr > start) {
+ char ch = *ptr;
+ *ptr = *start;
+ *start++ = ch;
+ }
+ return count;
+}
+
+} // anonymous namespace
+
+ssize_t internal::FormatN(char* buf, size_t sz, const char* fmt,
+ const Arg* args, const size_t max_args) {
+ // Make sure we can write at least one NUL byte.
+ if (static_cast<ssize_t>(sz) < 1)
+ return -1;
+
+ // Iterate over format string and interpret '%' arguments as they are
+ // encountered.
+ char* ptr = buf;
+ size_t padding;
+ char pad;
+ size_t count = 0;
+ for (unsigned int cur_arg = 0;
+ *fmt &&
+ count != static_cast<size_t>(std::numeric_limits<ssize_t>::max()); ) {
+ if (*fmt++ == '%') {
+ padding = 0;
+ pad = ' ';
+ char ch = *fmt++;
+ format_character_found:
+ switch (ch) {
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ // Found a width parameter. Convert to an integer value and store in
+ // "padding". If the leading digit is a zero, change the padding
+ // character from a space ' ' to a zero '0'.
+ pad = ch == '0' ? '0' : ' ';
+ for (;;) {
+ const size_t max_padding = std::numeric_limits<ssize_t>::max();
+ if (padding > max_padding/10 ||
+ 10*padding > max_padding - (ch - '0')) {
+ // Integer overflow detected. Skip the rest of the width until
+ // we find the format character, then do the normal error handling.
+ while ((ch = *fmt++) >= '0' && ch <= '9') {
+ }
+ goto fail_to_expand;
+ }
+ padding = 10*padding + ch - '0';
+ ch = *fmt++;
+ if (ch < '0' || ch > '9') {
+ // Reached the end of the width parameter. This is where the format
+ // character is found.
+ goto format_character_found;
+ }
+ }
+ break;
+ case 'c': { // Output an ASCII character.
+ // Check that there are arguments left to be inserted.
+ if (cur_arg >= max_args)
+ goto fail_to_expand;
+
+ // Check that the argument has the expected type.
+ const Arg& arg = args[cur_arg++];
+ if (arg.type_ != Arg::INT &&
+ arg.type_ != Arg::UINT)
+ goto fail_to_expand;
+
+ // Apply padding, if needed.
+ Pad(buf, sz, &count, ' ', padding, 1, &ptr);
+
+ // Convert the argument to an ASCII character and output it.
+ char ch = static_cast<char>(arg.i_);
+ if (!ch)
+ goto end_of_output_buffer;
+ if (Out(buf, sz, &count, ch))
+ ++ptr;
+ break; }
+ case 'd': { // Output a signed or unsigned integer-like value.
+ // Check that there are arguments left to be inserted.
+ if (cur_arg >= max_args)
+ goto fail_to_expand;
+
+ // Check that the argument has the expected type.
+ const Arg& arg = args[cur_arg++];
+ if (arg.type_ != Arg::INT &&
+ arg.type_ != Arg::UINT)
+ goto fail_to_expand;
+
+ // Our implementation of IToASCII() can handle all widths of data types
+ // and can print both signed and unsigned values.
+ IncrementCount(&count,
+ IToASCII(arg.type_ == Arg::INT, false, arg.i_,
+ ptr, sz - (ptr - buf), 10, padding, pad));
+
+ // Advance "ptr" to the end of the string that was just emitted.
+ if (sz - (ptr - buf))
+ while (*ptr)
+ ++ptr;
+ break; }
+ case 'x': // Output an unsigned hexadecimal value.
+ case 'X':
+ case 'p': { // Output a pointer value.
+ // Check that there are arguments left to be inserted.
+ if (cur_arg >= max_args)
+ goto fail_to_expand;
+
+ const Arg& arg = args[cur_arg++];
+ int64_t i;
+ switch (ch) {
+ case 'x': // Hexadecimal values are available for integer-like args.
+ case 'X':
+ // Check that the argument has the expected type.
+ if (arg.type_ != Arg::INT &&
+ arg.type_ != Arg::UINT)
+ goto fail_to_expand;
+ i = arg.i_;
+
+ // The Arg() constructor automatically performed sign expansion on
+ // signed parameters. This is great when outputting a %d decimal
+ // number, but can result in unexpected leading 0xFF bytes when
+ // outputting a %c hexadecimal number. Mask bits, if necessary.
+ if (arg.type_ == Arg::INT && arg.width_ < 8)
+ i &= ~(static_cast<int64_t>(-1) << (8*arg.width_));
Jeffrey Yasskin 2013/07/30 23:13:53 Heh, alternately, "i &= (1LL << 8*arg.width_) - 1"
+ break;
+ default:
+ // Pointer values require an actual pointer or a string.
+ if (arg.type_ == Arg::POINTER)
+ i = reinterpret_cast<uintptr_t>(arg.ptr_);
+ else if (arg.type_ == Arg::STRING)
+ i = reinterpret_cast<uintptr_t>(arg.s_);
+ else
+ goto fail_to_expand;
+
+ // Pointers always include the "0x" prefix. This affects padding.
+ if (padding) {
+ if (pad == ' ') {
+ // Predict the number of hex digits (including "0x" prefix) that
+ // will be output for this address when it is converted to ASCII.
+ size_t chars = 2;
+ uint64_t j = i;
+ do {
+ ++chars;
+ j >>= 4;
+ } while (j);
+
+ // Output the necessary number of space characters to perform
+ // padding. We can't rely on IToASCII() to do that for us, as it
+ // would incorrectly add padding _after_ the "0x" prefix.
+ Pad(buf, sz, &count, pad, padding, chars, &ptr);
+
+ // Inform itoa_r() that it no longer needs to handle the padding.
+ padding = 0;
+ } else {
+ // Adjust for the two-character "0x" prefix.
+ padding = padding >= 2 ? padding - 2 : 0;
+ }
+ }
+
+ // Insert "0x" prefix, if there is still sufficient space in the
+ // output buffer.
+ if (Out(buf, sz, &count, '0'))
+ ++ptr;
+ if (Out(buf, sz, &count, 'x'))
+ ++ptr;
+ break;
+ }
+
+ // No matter what data type this value originated from, print it as
+ // a regular hexadecimal number.
+ IncrementCount(&count,
+ IToASCII(false, ch != 'x', i, ptr, sz - (ptr - buf),
+ 16, padding, pad));
+
+ // Advance "ptr" to the end of the string that was just emitted.
+ if (sz - (ptr - buf))
+ while (*ptr)
+ ++ptr;
+ break; }
+ case 's': {
+ // Check that there are arguments left to be inserted.
+ if (cur_arg >= max_args)
+ goto fail_to_expand;
+
+ // Check that the argument has the expected type.
+ const Arg& arg = args[cur_arg++];
+ if (arg.type_ != Arg::STRING)
+ goto fail_to_expand;
+
+ // Apply padding, if needed. This requires us to first check the
+ // length of the string that we are outputting.
+ if (padding) {
+ size_t len = 0;
+ for (const char* src = arg.s_ ? arg.s_ : "<NULL>"; *src++; )
+ ++len;
+ Pad(buf, sz, &count, ' ', padding, len, &ptr);
+ }
+
+ // Printing a string involves nothing more than copying it into the
+ // output buffer and making sure we don't output more bytes than
+ // available space.
+ for (const char* src = arg.s_ ? arg.s_ : "<NULL>"; *src; )
+ if (Out(buf, sz, &count, *src++))
+ ++ptr;
+ break; }
+ case '%':
+ // Quoted percent '%' character.
+ goto copy_verbatim;
+ fail_to_expand:
+ // C++ gives us tools to do type checking -- something that snprintf()
+ // could never really do. So, whenever we see arguments that don't
+ // match up with the format string, we refuse to output them. But
+ // since we have to be extremely conservative about being async-
+ // signal-safe, we are limited in the type of error handling that we
Jeffrey Yasskin 2013/07/30 23:13:53 Is abort() or "*(volatile char*)0 = 0" ok for debu
+ // can do. So, all we do is pass the format string unchanged. That
+ // should eventually get the user's attention.
+ default:
+ // Unknown or unsupported format character. Just copy verbatim to
+ // output.
+ if (Out(buf, sz, &count, '%'))
+ ++ptr;
+ if (!ch)
+ goto end_of_format_string;
+ if (Out(buf, sz, &count, ch))
+ ++ptr;
+ break;
+ }
+ } else {
+ copy_verbatim:
+ if (Out(buf, sz, &count, fmt[-1]))
+ ++ptr;
+ }
+ }
+ end_of_format_string:
+ end_of_output_buffer:
+ *ptr = '\000';
+ IncrementCount(&count);
+ return static_cast<ssize_t>(count)-1;
+}
+
+ssize_t FormatN(char* buf, size_t N, const char* fmt) {
+ // Make sure we can write at least one NUL byte.
+ ssize_t n = static_cast<ssize_t>(N);
+ if (n < 1)
+ return -1;
+ size_t count = 0;
+
+ // In the slow-path, we deal with errors by copying the contents of
+ // "fmt" unexpanded. This means, if there are no arguments passed, the
+ // Format() function always degenerates to version of strncpy() that
+ // de-duplicates '%' characters.
+ char* dst = buf;
+ const char* src = fmt;
+ for (; *src; ++src) {
+ char ch = *src;
+ if (!IncrementCount(&count) && n > 1) {
+ --dst;
+ break;
+ }
+ if (n > 1) {
+ --n;
+ *dst++ = ch;
+ }
+ if (ch == '%' && src[1] == '%')
+ ++src;
+ }
+ IncrementCount(&count);
+ *dst = '\000';
+ return static_cast<ssize_t>(count)-1;
+}
+
+} // namespace debug
+} // namespace base

Powered by Google App Engine
This is Rietveld 408576698