base/debug/format.cc - Issue 18656004: Added a new SafeSPrintf() function that implements snprintf() in an async-safe-fashion

Side by Side Diff: base/debug/format.cc

Issue 18656004: Added a new SafeSPrintf() function that implements snprintf() in an async-safe-fashion (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Fixed reference value in unittest Created 7 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
(Empty)
	1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.

	2 // Use of this source code is governed by a BSD-style license that can be

	3 // found in the LICENSE file.

	4

	5 #include <limits>

	6

	7 #include "base/debug/format.h"

	8

	9

	10 namespace base {

	11 namespace debug {

	12

	13 // The code in this file is extremely careful to be async-signal-safe.

	14 //

	15 // Most obviously, we avoid calling any code that could dynamically allocate

	16 // memory. Doing so would almost certainly result in bugs and dead-locks.

	17 // We also avoid calling any other STL functions that could have unintended

	18 // side-effects involving memory allocation or access to other shared

	19 // resources.

	20 //

	21 // But on top of that, we also avoid calling other library functions, as many

	22 // of them have the side-effect of calling getenv() (in order to deal with

	23 // localization) or accessing errno. The latter sounds benign, but there are

	24 // several execution contexts where it isn't even possible to safely read let

	25 // alone write errno.

	26 //

	27 // The stated design goal of the Format() function is that it can be called

	28 // from any context that can safely call C or C++ code (i.e. anything that

	29 // doesn't require assembly code).

	30 //

	31 // For a brief overview of some but not all of the issues with async-signal-

	32 // safety, refer to:

	33 // http://pubs.opengroup.org/onlinepubs/009695399/functions/xsh_chap02_04.html

	34

	35 namespace {

	36

	37 inline bool IncrementCount(size_t* count, size_t inc = 1) {

	38 if (*count > std::numeric_limits<ssize_t>::max() - inc) {
	Jeffrey Yasskin 2013/07/30 23:13:53 I think this will give the wrong answer if inc>std I think this will give the wrong answer if inc>std::numeric_limits<ssize_t>::max().
	39 *count = std::numeric_limits<ssize_t>::max();

	40 return false;

	41 } else {

	42 *count += inc;

	43 return true;

	44 }

	45 }

	46

	47 inline bool Out(char* buf, size_t sz, size_t* count, char ch) {

	48 if (*count + 1 < sz) {

	49 buf[*count] = ch;

	50 IncrementCount(count);

	51 return true;

	52 }

	53 IncrementCount(count);

	54 return false;

	55 }

	56

	57 inline void Pad(char* buf, size_t sz, size_t* count, char pad, size_t padding,

	58 size_t len, char** ptr) {

	59 char dst = ptr;

	60 for (; padding > len; --padding)

	61 if (Out(buf, sz, count, pad))

	62 ++dst;

	63 else {

	64 if (--padding)

	65 IncrementCount(count, padding-len);

	66 break;

	67 }

	68 *ptr = dst;

	69 }

	70

	71 // POSIX doesn't define any async-signal safe function for converting

	72 // an integer to ASCII. Define our own version.

	73 //

	74 // This also gives us the ability to make the function a little more powerful

	75 // and have it deal with padding, with truncation, and with predicting the

	76 // length of the untruncated output.

	77 //

	78 // IToASCII() converts an (optionally signed) integer to ASCII. It never

	79 // writes more than "sz" bytes. Output will be truncated as needed, and a NUL

	80 // character is appended, unless "sz" is zero. It returns the number of non-NUL

	81 // bytes that would be output if no truncation had happened.

	82 //

	83 // It supports bases 2 through 16. Padding can be done with either '0' zeros

	84 // or ' ' spaces.

	85 size_t IToASCII(bool sign, bool upcase, int64_t i, char* buf, size_t sz,

	86 int base, size_t padding, char pad) {

	87 // Sanity check for the "base".

	88 if (base < 2 \|\| base > 16 \|\| (sign && base != 10)) {

	89 if (static_cast<ssize_t>(sz) >= 1)

	90 buf[0] = '\000';

	91 return 0;

	92 }

	93

	94 // Handle negative numbers, if requested by caller.

	95 size_t count = 0;

	96 size_t n = 1;

	97 char* start = buf;

	98 int minint = 0;

	99 bool needs_minus = false;

	100 uint64_t num;

	101 if (sign && i < 0) {

	102 // If we aren't inserting padding, or if we are padding with '0' zeros,

	103 // we should insert the minus character now. It makes it easier to

	104 // correctly deal with truncated padded numbers.

	105 // On the other hand, if we are padding with ' ' spaces, we have to

	106 // delay outputting the minus character until later.

	107 if (padding <= 2 \|\| pad == '0') {

	108 ++count;

	109

	110 // Make sure we can write the '-' character.

	111 if (++n > sz) {

	112 if (sz > 0)

	113 *start = '\000';

	114 } else

	115 *start++ = '-';

	116

	117 // Adjust padding, since we just output one character already.

	118 if (padding)

	119 --padding;

	120 } else

	121 needs_minus = true;

	122

	123 // Turn our number positive.

	124 if (i == -i) {
	Jeffrey Yasskin 2013/07/30 23:13:53 That's undefined behavior for signed numbers (sorr That's undefined behavior for signed numbers (sorry); compute the negation as unsigned instead or use numeric_limits<int64_t>::min().
	125 // The lowest-most negative integer needs special treatment.

	126 minint = 1;

	127 num = -(i + 1);

	128 } else {

	129 // "Normal" negative numbers are easy.

	130 num = -i;

	131 }

	132 } else

	133 num = i;

	134

	135 // Loop until we have converted the entire number. Output at least one

	136 // character (i.e. '0').

	137 char* ptr = start;

	138 bool started = false;

	139 do {

	140 // Sanity check. If padding is used to fill the entire address space,

	141 // don't allow more than MAXINT bytes.
	Jeffrey Yasskin 2013/07/30 23:13:53 MAXINT != numeric_limits<ssize_t>::max(). SSIZE_MA MAXINT != numeric_limits<ssize_t>::max(). SSIZE_MAX instead? (Here and in the header comment.)
	142 if (++count == static_cast<size_t>(std::numeric_limits<ssize_t>::max())) {

	143 break;

	144 }

	145

	146 // Make sure there is still enough space left in our output buffer.

	147 if (n == sz) {

	148 if (ptr > start) {

	149 // It is rare that we need to output a partial number. But if asked

	150 // to do so, we will still make sure we output the correct number of

	151 // leading digits.

	152 // memmove(start, start+1, --ptr - start)
	Jeffrey Yasskin 2013/07/30 23:13:53 This is weird. Why do you need to erase the first This is weird. Why do you need to erase the first digit you printed?
	153 --ptr;

	154 for (char* move = start; move < ptr; ++move)

	155 *move = move[1];

	156 } else

	157 goto cannot_write_anything_but_nul;

	158 } else

	159 ++n;

	160

	161 // Output the next digit and (if necessary) compensate for the lowest-
	Jeffrey Yasskin 2013/07/30 23:13:53 "lowest-most negative integer" should probably be "lowest-most negative integer" should probably be "lowest integer" or "most-negative integer".
	162 // most negative integer needing special treatment. This works because,

	163 // no matter the bit width of the integer, the lowest-most decimal

	164 // integer always ends in 2, 4, 6, or 8.

	165 if (n <= sz) {

	166 if (!num && started)

	167 if (needs_minus) {

	168 *ptr++ = '-';

	169 needs_minus = false;

	170 } else

	171 *ptr++ = pad;

	172 else {

	173 started = true;

	174 *ptr++ = (upcase ? "0123456789ABCDEF" : "0123456789abcdef")

	175 [num%base+minint];

	176 }

	177 }

	178

	179 cannot_write_anything_but_nul:

	180 minint = 0;

	181 num /= base;

	182

	183 // Add padding, if requested.

	184 if (padding > 0) {

	185 --padding;

	186

	187 // Performance optimization for when we are asked to output

	188 // excessive padding, but our output buffer is limited in size.

	189 // Even if we output a 128bit number in binary, we would never

	190 // write more than 130 characters. So, anything beyond this limit

	191 // and we can compute the result arithmetically.

	192 if (count > n && count - n > 130) {

	193 IncrementCount(&count, padding);

	194 padding = 0;

	195 }

	196 }

	197 } while (num \|\| padding \|\| needs_minus);

	198

	199 // Terminate the output with a NUL character.

	200 if (sz > 0)

	201 *ptr = '\000';

	202

	203 // Conversion to ASCII actually resulted in the digits being in reverse

	204 // order. We can't easily generate them in forward order, as we can't tell

	205 // the number of characters needed until we are done converting.

	206 // So, now, we reverse the string (except for the possible '-' sign).

	207 while (--ptr > start) {

	208 char ch = *ptr;

	209 ptr = start;

	210 *start++ = ch;

	211 }

	212 return count;

	213 }

	214

	215 } // anonymous namespace

	216

	217 ssize_t internal::FormatN(char* buf, size_t sz, const char* fmt,

	218 const Arg* args, const size_t max_args) {

	219 // Make sure we can write at least one NUL byte.

	220 if (static_cast<ssize_t>(sz) < 1)

	221 return -1;

	222

	223 // Iterate over format string and interpret '%' arguments as they are

	224 // encountered.

	225 char* ptr = buf;

	226 size_t padding;

	227 char pad;

	228 size_t count = 0;

	229 for (unsigned int cur_arg = 0;

	230 *fmt &&

	231 count != static_cast<size_t>(std::numeric_limits<ssize_t>::max()); ) {

	232 if (*fmt++ == '%') {

	233 padding = 0;

	234 pad = ' ';

	235 char ch = *fmt++;

	236 format_character_found:

	237 switch (ch) {

	238 case '0': case '1': case '2': case '3': case '4':

	239 case '5': case '6': case '7': case '8': case '9':

	240 // Found a width parameter. Convert to an integer value and store in

	241 // "padding". If the leading digit is a zero, change the padding

	242 // character from a space ' ' to a zero '0'.

	243 pad = ch == '0' ? '0' : ' ';

	244 for (;;) {

	245 const size_t max_padding = std::numeric_limits<ssize_t>::max();

	246 if (padding > max_padding/10 \|\|

	247 10*padding > max_padding - (ch - '0')) {

	248 // Integer overflow detected. Skip the rest of the width until

	249 // we find the format character, then do the normal error handling.

	250 while ((ch = *fmt++) >= '0' && ch <= '9') {

	251 }

	252 goto fail_to_expand;

	253 }

	254 padding = 10*padding + ch - '0';

	255 ch = *fmt++;

	256 if (ch < '0' \|\| ch > '9') {

	257 // Reached the end of the width parameter. This is where the format

	258 // character is found.

	259 goto format_character_found;

	260 }

	261 }

	262 break;

	263 case 'c': { // Output an ASCII character.

	264 // Check that there are arguments left to be inserted.

	265 if (cur_arg >= max_args)

	266 goto fail_to_expand;

	267

	268 // Check that the argument has the expected type.

	269 const Arg& arg = args[cur_arg++];

	270 if (arg.type_ != Arg::INT &&

	271 arg.type_ != Arg::UINT)

	272 goto fail_to_expand;

	273

	274 // Apply padding, if needed.

	275 Pad(buf, sz, &count, ' ', padding, 1, &ptr);

	276

	277 // Convert the argument to an ASCII character and output it.

	278 char ch = static_cast<char>(arg.i_);

	279 if (!ch)

	280 goto end_of_output_buffer;

	281 if (Out(buf, sz, &count, ch))

	282 ++ptr;

	283 break; }

	284 case 'd': { // Output a signed or unsigned integer-like value.

	285 // Check that there are arguments left to be inserted.

	286 if (cur_arg >= max_args)

	287 goto fail_to_expand;

	288

	289 // Check that the argument has the expected type.

	290 const Arg& arg = args[cur_arg++];

	291 if (arg.type_ != Arg::INT &&

	292 arg.type_ != Arg::UINT)

	293 goto fail_to_expand;

	294

	295 // Our implementation of IToASCII() can handle all widths of data types

	296 // and can print both signed and unsigned values.

	297 IncrementCount(&count,

	298 IToASCII(arg.type_ == Arg::INT, false, arg.i_,

	299 ptr, sz - (ptr - buf), 10, padding, pad));

	300

	301 // Advance "ptr" to the end of the string that was just emitted.

	302 if (sz - (ptr - buf))

	303 while (*ptr)

	304 ++ptr;

	305 break; }

	306 case 'x': // Output an unsigned hexadecimal value.

	307 case 'X':

	308 case 'p': { // Output a pointer value.

	309 // Check that there are arguments left to be inserted.

	310 if (cur_arg >= max_args)

	311 goto fail_to_expand;

	312

	313 const Arg& arg = args[cur_arg++];

	314 int64_t i;

	315 switch (ch) {

	316 case 'x': // Hexadecimal values are available for integer-like args.

	317 case 'X':

	318 // Check that the argument has the expected type.

	319 if (arg.type_ != Arg::INT &&

	320 arg.type_ != Arg::UINT)

	321 goto fail_to_expand;

	322 i = arg.i_;

	323

	324 // The Arg() constructor automatically performed sign expansion on

	325 // signed parameters. This is great when outputting a %d decimal

	326 // number, but can result in unexpected leading 0xFF bytes when

	327 // outputting a %c hexadecimal number. Mask bits, if necessary.

	328 if (arg.type_ == Arg::INT && arg.width_ < 8)

	329 i &= ~(static_cast<int64_t>(-1) << (8*arg.width_));
	Jeffrey Yasskin 2013/07/30 23:13:53 Heh, alternately, "i &= (1LL << 8arg.width_) - 1" Heh, alternately, "i &= (1LL << 8arg.width_) - 1". You could handle this in the Arg() constructor by converting unsigned arguments to uint64_t before storing them in the int64_t.
	330 break;

	331 default:

	332 // Pointer values require an actual pointer or a string.

	333 if (arg.type_ == Arg::POINTER)

	334 i = reinterpret_cast<uintptr_t>(arg.ptr_);

	335 else if (arg.type_ == Arg::STRING)

	336 i = reinterpret_cast<uintptr_t>(arg.s_);

	337 else

	338 goto fail_to_expand;

	339

	340 // Pointers always include the "0x" prefix. This affects padding.

	341 if (padding) {

	342 if (pad == ' ') {

	343 // Predict the number of hex digits (including "0x" prefix) that

	344 // will be output for this address when it is converted to ASCII.

	345 size_t chars = 2;

	346 uint64_t j = i;

	347 do {

	348 ++chars;

	349 j >>= 4;

	350 } while (j);

	351

	352 // Output the necessary number of space characters to perform

	353 // padding. We can't rely on IToASCII() to do that for us, as it

	354 // would incorrectly add padding _after_ the "0x" prefix.

	355 Pad(buf, sz, &count, pad, padding, chars, &ptr);

	356

	357 // Inform itoa_r() that it no longer needs to handle the padding.

	358 padding = 0;

	359 } else {

	360 // Adjust for the two-character "0x" prefix.

	361 padding = padding >= 2 ? padding - 2 : 0;

	362 }

	363 }

	364

	365 // Insert "0x" prefix, if there is still sufficient space in the

	366 // output buffer.

	367 if (Out(buf, sz, &count, '0'))

	368 ++ptr;

	369 if (Out(buf, sz, &count, 'x'))

	370 ++ptr;

	371 break;

	372 }

	373

	374 // No matter what data type this value originated from, print it as

	375 // a regular hexadecimal number.

	376 IncrementCount(&count,

	377 IToASCII(false, ch != 'x', i, ptr, sz - (ptr - buf),

	378 16, padding, pad));

	379

	380 // Advance "ptr" to the end of the string that was just emitted.

	381 if (sz - (ptr - buf))

	382 while (*ptr)

	383 ++ptr;

	384 break; }

	385 case 's': {

	386 // Check that there are arguments left to be inserted.

	387 if (cur_arg >= max_args)

	388 goto fail_to_expand;

	389

	390 // Check that the argument has the expected type.

	391 const Arg& arg = args[cur_arg++];

	392 if (arg.type_ != Arg::STRING)

	393 goto fail_to_expand;

	394

	395 // Apply padding, if needed. This requires us to first check the

	396 // length of the string that we are outputting.

	397 if (padding) {

	398 size_t len = 0;

	399 for (const char* src = arg.s_ ? arg.s_ : "<NULL>"; *src++; )

	400 ++len;

	401 Pad(buf, sz, &count, ' ', padding, len, &ptr);

	402 }

	403

	404 // Printing a string involves nothing more than copying it into the

	405 // output buffer and making sure we don't output more bytes than

	406 // available space.

	407 for (const char* src = arg.s_ ? arg.s_ : "<NULL>"; *src; )

	408 if (Out(buf, sz, &count, *src++))

	409 ++ptr;

	410 break; }

	411 case '%':

	412 // Quoted percent '%' character.

	413 goto copy_verbatim;

	414 fail_to_expand:

	415 // C++ gives us tools to do type checking -- something that snprintf()

	416 // could never really do. So, whenever we see arguments that don't

	417 // match up with the format string, we refuse to output them. But

	418 // since we have to be extremely conservative about being async-

	419 // signal-safe, we are limited in the type of error handling that we
	Jeffrey Yasskin 2013/07/30 23:13:53 Is abort() or "(volatile char)0 = 0" ok for debu Is abort() or "(volatile char)0 = 0" ok for debug mode?
	420 // can do. So, all we do is pass the format string unchanged. That

	421 // should eventually get the user's attention.

	422 default:

	423 // Unknown or unsupported format character. Just copy verbatim to

	424 // output.

	425 if (Out(buf, sz, &count, '%'))

	426 ++ptr;

	427 if (!ch)

	428 goto end_of_format_string;

	429 if (Out(buf, sz, &count, ch))

	430 ++ptr;

	431 break;

	432 }

	433 } else {

	434 copy_verbatim:

	435 if (Out(buf, sz, &count, fmt[-1]))

	436 ++ptr;

	437 }

	438 }

	439 end_of_format_string:

	440 end_of_output_buffer:

	441 *ptr = '\000';

	442 IncrementCount(&count);

	443 return static_cast<ssize_t>(count)-1;

	444 }

	445

	446 ssize_t FormatN(char* buf, size_t N, const char* fmt) {

	447 // Make sure we can write at least one NUL byte.

	448 ssize_t n = static_cast<ssize_t>(N);

	449 if (n < 1)

	450 return -1;

	451 size_t count = 0;

	452

	453 // In the slow-path, we deal with errors by copying the contents of

	454 // "fmt" unexpanded. This means, if there are no arguments passed, the

	455 // Format() function always degenerates to version of strncpy() that

	456 // de-duplicates '%' characters.

	457 char* dst = buf;

	458 const char* src = fmt;

	459 for (; *src; ++src) {

	460 char ch = *src;

	461 if (!IncrementCount(&count) && n > 1) {

	462 --dst;

	463 break;

	464 }

	465 if (n > 1) {

	466 --n;

	467 *dst++ = ch;

	468 }

	469 if (ch == '%' && src[1] == '%')

	470 ++src;

	471 }

	472 IncrementCount(&count);

	473 *dst = '\000';

	474 return static_cast<ssize_t>(count)-1;

	475 }

	476

	477 } // namespace debug

	478 } // namespace base

OLD	NEW

« base/debug/format.h ('K') | « base/debug/format.h ('k') | base/debug/format_unittest.cc » ('j') | base/debug/format_unittest.cc » ('J')