Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(389)

Side by Side Diff: base/debug/format.cc

Issue 18656004: Added a new SafeSPrintf() function that implements snprintf() in an async-safe-fashion (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Fixed reference value in unittest Created 7 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include <limits>
6
7 #include "base/debug/format.h"
8
9
10 namespace base {
11 namespace debug {
12
13 // The code in this file is extremely careful to be async-signal-safe.
14 //
15 // Most obviously, we avoid calling any code that could dynamically allocate
16 // memory. Doing so would almost certainly result in bugs and dead-locks.
17 // We also avoid calling any other STL functions that could have unintended
18 // side-effects involving memory allocation or access to other shared
19 // resources.
20 //
21 // But on top of that, we also avoid calling other library functions, as many
22 // of them have the side-effect of calling getenv() (in order to deal with
23 // localization) or accessing errno. The latter sounds benign, but there are
24 // several execution contexts where it isn't even possible to safely read let
25 // alone write errno.
26 //
27 // The stated design goal of the Format() function is that it can be called
28 // from any context that can safely call C or C++ code (i.e. anything that
29 // doesn't require assembly code).
30 //
31 // For a brief overview of some but not all of the issues with async-signal-
32 // safety, refer to:
33 // http://pubs.opengroup.org/onlinepubs/009695399/functions/xsh_chap02_04.html
34
35 namespace {
36
37 inline bool IncrementCount(size_t* count, size_t inc = 1) {
38 if (*count > std::numeric_limits<ssize_t>::max() - inc) {
Jeffrey Yasskin 2013/07/30 23:13:53 I think this will give the wrong answer if inc>std
39 *count = std::numeric_limits<ssize_t>::max();
40 return false;
41 } else {
42 *count += inc;
43 return true;
44 }
45 }
46
47 inline bool Out(char* buf, size_t sz, size_t* count, char ch) {
48 if (*count + 1 < sz) {
49 buf[*count] = ch;
50 IncrementCount(count);
51 return true;
52 }
53 IncrementCount(count);
54 return false;
55 }
56
57 inline void Pad(char* buf, size_t sz, size_t* count, char pad, size_t padding,
58 size_t len, char** ptr) {
59 char *dst = *ptr;
60 for (; padding > len; --padding)
61 if (Out(buf, sz, count, pad))
62 ++dst;
63 else {
64 if (--padding)
65 IncrementCount(count, padding-len);
66 break;
67 }
68 *ptr = dst;
69 }
70
71 // POSIX doesn't define any async-signal safe function for converting
72 // an integer to ASCII. Define our own version.
73 //
74 // This also gives us the ability to make the function a little more powerful
75 // and have it deal with padding, with truncation, and with predicting the
76 // length of the untruncated output.
77 //
78 // IToASCII() converts an (optionally signed) integer to ASCII. It never
79 // writes more than "sz" bytes. Output will be truncated as needed, and a NUL
80 // character is appended, unless "sz" is zero. It returns the number of non-NUL
81 // bytes that would be output if no truncation had happened.
82 //
83 // It supports bases 2 through 16. Padding can be done with either '0' zeros
84 // or ' ' spaces.
85 size_t IToASCII(bool sign, bool upcase, int64_t i, char* buf, size_t sz,
86 int base, size_t padding, char pad) {
87 // Sanity check for the "base".
88 if (base < 2 || base > 16 || (sign && base != 10)) {
89 if (static_cast<ssize_t>(sz) >= 1)
90 buf[0] = '\000';
91 return 0;
92 }
93
94 // Handle negative numbers, if requested by caller.
95 size_t count = 0;
96 size_t n = 1;
97 char* start = buf;
98 int minint = 0;
99 bool needs_minus = false;
100 uint64_t num;
101 if (sign && i < 0) {
102 // If we aren't inserting padding, or if we are padding with '0' zeros,
103 // we should insert the minus character now. It makes it easier to
104 // correctly deal with truncated padded numbers.
105 // On the other hand, if we are padding with ' ' spaces, we have to
106 // delay outputting the minus character until later.
107 if (padding <= 2 || pad == '0') {
108 ++count;
109
110 // Make sure we can write the '-' character.
111 if (++n > sz) {
112 if (sz > 0)
113 *start = '\000';
114 } else
115 *start++ = '-';
116
117 // Adjust padding, since we just output one character already.
118 if (padding)
119 --padding;
120 } else
121 needs_minus = true;
122
123 // Turn our number positive.
124 if (i == -i) {
Jeffrey Yasskin 2013/07/30 23:13:53 That's undefined behavior for signed numbers (sorr
125 // The lowest-most negative integer needs special treatment.
126 minint = 1;
127 num = -(i + 1);
128 } else {
129 // "Normal" negative numbers are easy.
130 num = -i;
131 }
132 } else
133 num = i;
134
135 // Loop until we have converted the entire number. Output at least one
136 // character (i.e. '0').
137 char* ptr = start;
138 bool started = false;
139 do {
140 // Sanity check. If padding is used to fill the entire address space,
141 // don't allow more than MAXINT bytes.
Jeffrey Yasskin 2013/07/30 23:13:53 MAXINT != numeric_limits<ssize_t>::max(). SSIZE_MA
142 if (++count == static_cast<size_t>(std::numeric_limits<ssize_t>::max())) {
143 break;
144 }
145
146 // Make sure there is still enough space left in our output buffer.
147 if (n == sz) {
148 if (ptr > start) {
149 // It is rare that we need to output a partial number. But if asked
150 // to do so, we will still make sure we output the correct number of
151 // leading digits.
152 // memmove(start, start+1, --ptr - start)
Jeffrey Yasskin 2013/07/30 23:13:53 This is weird. Why do you need to erase the first
153 --ptr;
154 for (char* move = start; move < ptr; ++move)
155 *move = move[1];
156 } else
157 goto cannot_write_anything_but_nul;
158 } else
159 ++n;
160
161 // Output the next digit and (if necessary) compensate for the lowest-
Jeffrey Yasskin 2013/07/30 23:13:53 "lowest-most negative integer" should probably be
162 // most negative integer needing special treatment. This works because,
163 // no matter the bit width of the integer, the lowest-most decimal
164 // integer always ends in 2, 4, 6, or 8.
165 if (n <= sz) {
166 if (!num && started)
167 if (needs_minus) {
168 *ptr++ = '-';
169 needs_minus = false;
170 } else
171 *ptr++ = pad;
172 else {
173 started = true;
174 *ptr++ = (upcase ? "0123456789ABCDEF" : "0123456789abcdef")
175 [num%base+minint];
176 }
177 }
178
179 cannot_write_anything_but_nul:
180 minint = 0;
181 num /= base;
182
183 // Add padding, if requested.
184 if (padding > 0) {
185 --padding;
186
187 // Performance optimization for when we are asked to output
188 // excessive padding, but our output buffer is limited in size.
189 // Even if we output a 128bit number in binary, we would never
190 // write more than 130 characters. So, anything beyond this limit
191 // and we can compute the result arithmetically.
192 if (count > n && count - n > 130) {
193 IncrementCount(&count, padding);
194 padding = 0;
195 }
196 }
197 } while (num || padding || needs_minus);
198
199 // Terminate the output with a NUL character.
200 if (sz > 0)
201 *ptr = '\000';
202
203 // Conversion to ASCII actually resulted in the digits being in reverse
204 // order. We can't easily generate them in forward order, as we can't tell
205 // the number of characters needed until we are done converting.
206 // So, now, we reverse the string (except for the possible '-' sign).
207 while (--ptr > start) {
208 char ch = *ptr;
209 *ptr = *start;
210 *start++ = ch;
211 }
212 return count;
213 }
214
215 } // anonymous namespace
216
217 ssize_t internal::FormatN(char* buf, size_t sz, const char* fmt,
218 const Arg* args, const size_t max_args) {
219 // Make sure we can write at least one NUL byte.
220 if (static_cast<ssize_t>(sz) < 1)
221 return -1;
222
223 // Iterate over format string and interpret '%' arguments as they are
224 // encountered.
225 char* ptr = buf;
226 size_t padding;
227 char pad;
228 size_t count = 0;
229 for (unsigned int cur_arg = 0;
230 *fmt &&
231 count != static_cast<size_t>(std::numeric_limits<ssize_t>::max()); ) {
232 if (*fmt++ == '%') {
233 padding = 0;
234 pad = ' ';
235 char ch = *fmt++;
236 format_character_found:
237 switch (ch) {
238 case '0': case '1': case '2': case '3': case '4':
239 case '5': case '6': case '7': case '8': case '9':
240 // Found a width parameter. Convert to an integer value and store in
241 // "padding". If the leading digit is a zero, change the padding
242 // character from a space ' ' to a zero '0'.
243 pad = ch == '0' ? '0' : ' ';
244 for (;;) {
245 const size_t max_padding = std::numeric_limits<ssize_t>::max();
246 if (padding > max_padding/10 ||
247 10*padding > max_padding - (ch - '0')) {
248 // Integer overflow detected. Skip the rest of the width until
249 // we find the format character, then do the normal error handling.
250 while ((ch = *fmt++) >= '0' && ch <= '9') {
251 }
252 goto fail_to_expand;
253 }
254 padding = 10*padding + ch - '0';
255 ch = *fmt++;
256 if (ch < '0' || ch > '9') {
257 // Reached the end of the width parameter. This is where the format
258 // character is found.
259 goto format_character_found;
260 }
261 }
262 break;
263 case 'c': { // Output an ASCII character.
264 // Check that there are arguments left to be inserted.
265 if (cur_arg >= max_args)
266 goto fail_to_expand;
267
268 // Check that the argument has the expected type.
269 const Arg& arg = args[cur_arg++];
270 if (arg.type_ != Arg::INT &&
271 arg.type_ != Arg::UINT)
272 goto fail_to_expand;
273
274 // Apply padding, if needed.
275 Pad(buf, sz, &count, ' ', padding, 1, &ptr);
276
277 // Convert the argument to an ASCII character and output it.
278 char ch = static_cast<char>(arg.i_);
279 if (!ch)
280 goto end_of_output_buffer;
281 if (Out(buf, sz, &count, ch))
282 ++ptr;
283 break; }
284 case 'd': { // Output a signed or unsigned integer-like value.
285 // Check that there are arguments left to be inserted.
286 if (cur_arg >= max_args)
287 goto fail_to_expand;
288
289 // Check that the argument has the expected type.
290 const Arg& arg = args[cur_arg++];
291 if (arg.type_ != Arg::INT &&
292 arg.type_ != Arg::UINT)
293 goto fail_to_expand;
294
295 // Our implementation of IToASCII() can handle all widths of data types
296 // and can print both signed and unsigned values.
297 IncrementCount(&count,
298 IToASCII(arg.type_ == Arg::INT, false, arg.i_,
299 ptr, sz - (ptr - buf), 10, padding, pad));
300
301 // Advance "ptr" to the end of the string that was just emitted.
302 if (sz - (ptr - buf))
303 while (*ptr)
304 ++ptr;
305 break; }
306 case 'x': // Output an unsigned hexadecimal value.
307 case 'X':
308 case 'p': { // Output a pointer value.
309 // Check that there are arguments left to be inserted.
310 if (cur_arg >= max_args)
311 goto fail_to_expand;
312
313 const Arg& arg = args[cur_arg++];
314 int64_t i;
315 switch (ch) {
316 case 'x': // Hexadecimal values are available for integer-like args.
317 case 'X':
318 // Check that the argument has the expected type.
319 if (arg.type_ != Arg::INT &&
320 arg.type_ != Arg::UINT)
321 goto fail_to_expand;
322 i = arg.i_;
323
324 // The Arg() constructor automatically performed sign expansion on
325 // signed parameters. This is great when outputting a %d decimal
326 // number, but can result in unexpected leading 0xFF bytes when
327 // outputting a %c hexadecimal number. Mask bits, if necessary.
328 if (arg.type_ == Arg::INT && arg.width_ < 8)
329 i &= ~(static_cast<int64_t>(-1) << (8*arg.width_));
Jeffrey Yasskin 2013/07/30 23:13:53 Heh, alternately, "i &= (1LL << 8*arg.width_) - 1"
330 break;
331 default:
332 // Pointer values require an actual pointer or a string.
333 if (arg.type_ == Arg::POINTER)
334 i = reinterpret_cast<uintptr_t>(arg.ptr_);
335 else if (arg.type_ == Arg::STRING)
336 i = reinterpret_cast<uintptr_t>(arg.s_);
337 else
338 goto fail_to_expand;
339
340 // Pointers always include the "0x" prefix. This affects padding.
341 if (padding) {
342 if (pad == ' ') {
343 // Predict the number of hex digits (including "0x" prefix) that
344 // will be output for this address when it is converted to ASCII.
345 size_t chars = 2;
346 uint64_t j = i;
347 do {
348 ++chars;
349 j >>= 4;
350 } while (j);
351
352 // Output the necessary number of space characters to perform
353 // padding. We can't rely on IToASCII() to do that for us, as it
354 // would incorrectly add padding _after_ the "0x" prefix.
355 Pad(buf, sz, &count, pad, padding, chars, &ptr);
356
357 // Inform itoa_r() that it no longer needs to handle the padding.
358 padding = 0;
359 } else {
360 // Adjust for the two-character "0x" prefix.
361 padding = padding >= 2 ? padding - 2 : 0;
362 }
363 }
364
365 // Insert "0x" prefix, if there is still sufficient space in the
366 // output buffer.
367 if (Out(buf, sz, &count, '0'))
368 ++ptr;
369 if (Out(buf, sz, &count, 'x'))
370 ++ptr;
371 break;
372 }
373
374 // No matter what data type this value originated from, print it as
375 // a regular hexadecimal number.
376 IncrementCount(&count,
377 IToASCII(false, ch != 'x', i, ptr, sz - (ptr - buf),
378 16, padding, pad));
379
380 // Advance "ptr" to the end of the string that was just emitted.
381 if (sz - (ptr - buf))
382 while (*ptr)
383 ++ptr;
384 break; }
385 case 's': {
386 // Check that there are arguments left to be inserted.
387 if (cur_arg >= max_args)
388 goto fail_to_expand;
389
390 // Check that the argument has the expected type.
391 const Arg& arg = args[cur_arg++];
392 if (arg.type_ != Arg::STRING)
393 goto fail_to_expand;
394
395 // Apply padding, if needed. This requires us to first check the
396 // length of the string that we are outputting.
397 if (padding) {
398 size_t len = 0;
399 for (const char* src = arg.s_ ? arg.s_ : "<NULL>"; *src++; )
400 ++len;
401 Pad(buf, sz, &count, ' ', padding, len, &ptr);
402 }
403
404 // Printing a string involves nothing more than copying it into the
405 // output buffer and making sure we don't output more bytes than
406 // available space.
407 for (const char* src = arg.s_ ? arg.s_ : "<NULL>"; *src; )
408 if (Out(buf, sz, &count, *src++))
409 ++ptr;
410 break; }
411 case '%':
412 // Quoted percent '%' character.
413 goto copy_verbatim;
414 fail_to_expand:
415 // C++ gives us tools to do type checking -- something that snprintf()
416 // could never really do. So, whenever we see arguments that don't
417 // match up with the format string, we refuse to output them. But
418 // since we have to be extremely conservative about being async-
419 // signal-safe, we are limited in the type of error handling that we
Jeffrey Yasskin 2013/07/30 23:13:53 Is abort() or "*(volatile char*)0 = 0" ok for debu
420 // can do. So, all we do is pass the format string unchanged. That
421 // should eventually get the user's attention.
422 default:
423 // Unknown or unsupported format character. Just copy verbatim to
424 // output.
425 if (Out(buf, sz, &count, '%'))
426 ++ptr;
427 if (!ch)
428 goto end_of_format_string;
429 if (Out(buf, sz, &count, ch))
430 ++ptr;
431 break;
432 }
433 } else {
434 copy_verbatim:
435 if (Out(buf, sz, &count, fmt[-1]))
436 ++ptr;
437 }
438 }
439 end_of_format_string:
440 end_of_output_buffer:
441 *ptr = '\000';
442 IncrementCount(&count);
443 return static_cast<ssize_t>(count)-1;
444 }
445
446 ssize_t FormatN(char* buf, size_t N, const char* fmt) {
447 // Make sure we can write at least one NUL byte.
448 ssize_t n = static_cast<ssize_t>(N);
449 if (n < 1)
450 return -1;
451 size_t count = 0;
452
453 // In the slow-path, we deal with errors by copying the contents of
454 // "fmt" unexpanded. This means, if there are no arguments passed, the
455 // Format() function always degenerates to version of strncpy() that
456 // de-duplicates '%' characters.
457 char* dst = buf;
458 const char* src = fmt;
459 for (; *src; ++src) {
460 char ch = *src;
461 if (!IncrementCount(&count) && n > 1) {
462 --dst;
463 break;
464 }
465 if (n > 1) {
466 --n;
467 *dst++ = ch;
468 }
469 if (ch == '%' && src[1] == '%')
470 ++src;
471 }
472 IncrementCount(&count);
473 *dst = '\000';
474 return static_cast<ssize_t>(count)-1;
475 }
476
477 } // namespace debug
478 } // namespace base
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698