OLD | NEW |
---|---|
(Empty) | |
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include <limits> | |
6 | |
7 #include "base/debug/format.h" | |
8 | |
9 | |
10 namespace base { | |
11 namespace debug { | |
12 | |
13 // The code in this file is extremely careful to be async-signal-safe. | |
14 // | |
15 // Most obviously, we avoid calling any code that could dynamically allocate | |
16 // memory. Doing so would almost certainly result in bugs and dead-locks. | |
17 // We also avoid calling any other STL functions that could have unintended | |
18 // side-effects involving memory allocation or access to other shared | |
19 // resources. | |
20 // | |
21 // But on top of that, we also avoid calling other library functions, as many | |
22 // of them have the side-effect of calling getenv() (in order to deal with | |
23 // localization) or accessing errno. The latter sounds benign, but there are | |
24 // several execution contexts where it isn't even possible to safely read let | |
25 // alone write errno. | |
26 // | |
27 // The stated design goal of the Format() function is that it can be called | |
28 // from any context that can safely call C or C++ code (i.e. anything that | |
29 // doesn't require assembly code). | |
30 // | |
31 // For a brief overview of some but not all of the issues with async-signal- | |
32 // safety, refer to: | |
33 // http://pubs.opengroup.org/onlinepubs/009695399/functions/xsh_chap02_04.html | |
34 | |
35 namespace { | |
36 | |
37 inline bool IncrementCount(size_t* count, size_t inc = 1) { | |
38 if (*count > std::numeric_limits<ssize_t>::max() - inc) { | |
Jeffrey Yasskin
2013/07/30 23:13:53
I think this will give the wrong answer if inc>std
| |
39 *count = std::numeric_limits<ssize_t>::max(); | |
40 return false; | |
41 } else { | |
42 *count += inc; | |
43 return true; | |
44 } | |
45 } | |
46 | |
47 inline bool Out(char* buf, size_t sz, size_t* count, char ch) { | |
48 if (*count + 1 < sz) { | |
49 buf[*count] = ch; | |
50 IncrementCount(count); | |
51 return true; | |
52 } | |
53 IncrementCount(count); | |
54 return false; | |
55 } | |
56 | |
57 inline void Pad(char* buf, size_t sz, size_t* count, char pad, size_t padding, | |
58 size_t len, char** ptr) { | |
59 char *dst = *ptr; | |
60 for (; padding > len; --padding) | |
61 if (Out(buf, sz, count, pad)) | |
62 ++dst; | |
63 else { | |
64 if (--padding) | |
65 IncrementCount(count, padding-len); | |
66 break; | |
67 } | |
68 *ptr = dst; | |
69 } | |
70 | |
71 // POSIX doesn't define any async-signal safe function for converting | |
72 // an integer to ASCII. Define our own version. | |
73 // | |
74 // This also gives us the ability to make the function a little more powerful | |
75 // and have it deal with padding, with truncation, and with predicting the | |
76 // length of the untruncated output. | |
77 // | |
78 // IToASCII() converts an (optionally signed) integer to ASCII. It never | |
79 // writes more than "sz" bytes. Output will be truncated as needed, and a NUL | |
80 // character is appended, unless "sz" is zero. It returns the number of non-NUL | |
81 // bytes that would be output if no truncation had happened. | |
82 // | |
83 // It supports bases 2 through 16. Padding can be done with either '0' zeros | |
84 // or ' ' spaces. | |
85 size_t IToASCII(bool sign, bool upcase, int64_t i, char* buf, size_t sz, | |
86 int base, size_t padding, char pad) { | |
87 // Sanity check for the "base". | |
88 if (base < 2 || base > 16 || (sign && base != 10)) { | |
89 if (static_cast<ssize_t>(sz) >= 1) | |
90 buf[0] = '\000'; | |
91 return 0; | |
92 } | |
93 | |
94 // Handle negative numbers, if requested by caller. | |
95 size_t count = 0; | |
96 size_t n = 1; | |
97 char* start = buf; | |
98 int minint = 0; | |
99 bool needs_minus = false; | |
100 uint64_t num; | |
101 if (sign && i < 0) { | |
102 // If we aren't inserting padding, or if we are padding with '0' zeros, | |
103 // we should insert the minus character now. It makes it easier to | |
104 // correctly deal with truncated padded numbers. | |
105 // On the other hand, if we are padding with ' ' spaces, we have to | |
106 // delay outputting the minus character until later. | |
107 if (padding <= 2 || pad == '0') { | |
108 ++count; | |
109 | |
110 // Make sure we can write the '-' character. | |
111 if (++n > sz) { | |
112 if (sz > 0) | |
113 *start = '\000'; | |
114 } else | |
115 *start++ = '-'; | |
116 | |
117 // Adjust padding, since we just output one character already. | |
118 if (padding) | |
119 --padding; | |
120 } else | |
121 needs_minus = true; | |
122 | |
123 // Turn our number positive. | |
124 if (i == -i) { | |
Jeffrey Yasskin
2013/07/30 23:13:53
That's undefined behavior for signed numbers (sorr
| |
125 // The lowest-most negative integer needs special treatment. | |
126 minint = 1; | |
127 num = -(i + 1); | |
128 } else { | |
129 // "Normal" negative numbers are easy. | |
130 num = -i; | |
131 } | |
132 } else | |
133 num = i; | |
134 | |
135 // Loop until we have converted the entire number. Output at least one | |
136 // character (i.e. '0'). | |
137 char* ptr = start; | |
138 bool started = false; | |
139 do { | |
140 // Sanity check. If padding is used to fill the entire address space, | |
141 // don't allow more than MAXINT bytes. | |
Jeffrey Yasskin
2013/07/30 23:13:53
MAXINT != numeric_limits<ssize_t>::max(). SSIZE_MA
| |
142 if (++count == static_cast<size_t>(std::numeric_limits<ssize_t>::max())) { | |
143 break; | |
144 } | |
145 | |
146 // Make sure there is still enough space left in our output buffer. | |
147 if (n == sz) { | |
148 if (ptr > start) { | |
149 // It is rare that we need to output a partial number. But if asked | |
150 // to do so, we will still make sure we output the correct number of | |
151 // leading digits. | |
152 // memmove(start, start+1, --ptr - start) | |
Jeffrey Yasskin
2013/07/30 23:13:53
This is weird. Why do you need to erase the first
| |
153 --ptr; | |
154 for (char* move = start; move < ptr; ++move) | |
155 *move = move[1]; | |
156 } else | |
157 goto cannot_write_anything_but_nul; | |
158 } else | |
159 ++n; | |
160 | |
161 // Output the next digit and (if necessary) compensate for the lowest- | |
Jeffrey Yasskin
2013/07/30 23:13:53
"lowest-most negative integer" should probably be
| |
162 // most negative integer needing special treatment. This works because, | |
163 // no matter the bit width of the integer, the lowest-most decimal | |
164 // integer always ends in 2, 4, 6, or 8. | |
165 if (n <= sz) { | |
166 if (!num && started) | |
167 if (needs_minus) { | |
168 *ptr++ = '-'; | |
169 needs_minus = false; | |
170 } else | |
171 *ptr++ = pad; | |
172 else { | |
173 started = true; | |
174 *ptr++ = (upcase ? "0123456789ABCDEF" : "0123456789abcdef") | |
175 [num%base+minint]; | |
176 } | |
177 } | |
178 | |
179 cannot_write_anything_but_nul: | |
180 minint = 0; | |
181 num /= base; | |
182 | |
183 // Add padding, if requested. | |
184 if (padding > 0) { | |
185 --padding; | |
186 | |
187 // Performance optimization for when we are asked to output | |
188 // excessive padding, but our output buffer is limited in size. | |
189 // Even if we output a 128bit number in binary, we would never | |
190 // write more than 130 characters. So, anything beyond this limit | |
191 // and we can compute the result arithmetically. | |
192 if (count > n && count - n > 130) { | |
193 IncrementCount(&count, padding); | |
194 padding = 0; | |
195 } | |
196 } | |
197 } while (num || padding || needs_minus); | |
198 | |
199 // Terminate the output with a NUL character. | |
200 if (sz > 0) | |
201 *ptr = '\000'; | |
202 | |
203 // Conversion to ASCII actually resulted in the digits being in reverse | |
204 // order. We can't easily generate them in forward order, as we can't tell | |
205 // the number of characters needed until we are done converting. | |
206 // So, now, we reverse the string (except for the possible '-' sign). | |
207 while (--ptr > start) { | |
208 char ch = *ptr; | |
209 *ptr = *start; | |
210 *start++ = ch; | |
211 } | |
212 return count; | |
213 } | |
214 | |
215 } // anonymous namespace | |
216 | |
217 ssize_t internal::FormatN(char* buf, size_t sz, const char* fmt, | |
218 const Arg* args, const size_t max_args) { | |
219 // Make sure we can write at least one NUL byte. | |
220 if (static_cast<ssize_t>(sz) < 1) | |
221 return -1; | |
222 | |
223 // Iterate over format string and interpret '%' arguments as they are | |
224 // encountered. | |
225 char* ptr = buf; | |
226 size_t padding; | |
227 char pad; | |
228 size_t count = 0; | |
229 for (unsigned int cur_arg = 0; | |
230 *fmt && | |
231 count != static_cast<size_t>(std::numeric_limits<ssize_t>::max()); ) { | |
232 if (*fmt++ == '%') { | |
233 padding = 0; | |
234 pad = ' '; | |
235 char ch = *fmt++; | |
236 format_character_found: | |
237 switch (ch) { | |
238 case '0': case '1': case '2': case '3': case '4': | |
239 case '5': case '6': case '7': case '8': case '9': | |
240 // Found a width parameter. Convert to an integer value and store in | |
241 // "padding". If the leading digit is a zero, change the padding | |
242 // character from a space ' ' to a zero '0'. | |
243 pad = ch == '0' ? '0' : ' '; | |
244 for (;;) { | |
245 const size_t max_padding = std::numeric_limits<ssize_t>::max(); | |
246 if (padding > max_padding/10 || | |
247 10*padding > max_padding - (ch - '0')) { | |
248 // Integer overflow detected. Skip the rest of the width until | |
249 // we find the format character, then do the normal error handling. | |
250 while ((ch = *fmt++) >= '0' && ch <= '9') { | |
251 } | |
252 goto fail_to_expand; | |
253 } | |
254 padding = 10*padding + ch - '0'; | |
255 ch = *fmt++; | |
256 if (ch < '0' || ch > '9') { | |
257 // Reached the end of the width parameter. This is where the format | |
258 // character is found. | |
259 goto format_character_found; | |
260 } | |
261 } | |
262 break; | |
263 case 'c': { // Output an ASCII character. | |
264 // Check that there are arguments left to be inserted. | |
265 if (cur_arg >= max_args) | |
266 goto fail_to_expand; | |
267 | |
268 // Check that the argument has the expected type. | |
269 const Arg& arg = args[cur_arg++]; | |
270 if (arg.type_ != Arg::INT && | |
271 arg.type_ != Arg::UINT) | |
272 goto fail_to_expand; | |
273 | |
274 // Apply padding, if needed. | |
275 Pad(buf, sz, &count, ' ', padding, 1, &ptr); | |
276 | |
277 // Convert the argument to an ASCII character and output it. | |
278 char ch = static_cast<char>(arg.i_); | |
279 if (!ch) | |
280 goto end_of_output_buffer; | |
281 if (Out(buf, sz, &count, ch)) | |
282 ++ptr; | |
283 break; } | |
284 case 'd': { // Output a signed or unsigned integer-like value. | |
285 // Check that there are arguments left to be inserted. | |
286 if (cur_arg >= max_args) | |
287 goto fail_to_expand; | |
288 | |
289 // Check that the argument has the expected type. | |
290 const Arg& arg = args[cur_arg++]; | |
291 if (arg.type_ != Arg::INT && | |
292 arg.type_ != Arg::UINT) | |
293 goto fail_to_expand; | |
294 | |
295 // Our implementation of IToASCII() can handle all widths of data types | |
296 // and can print both signed and unsigned values. | |
297 IncrementCount(&count, | |
298 IToASCII(arg.type_ == Arg::INT, false, arg.i_, | |
299 ptr, sz - (ptr - buf), 10, padding, pad)); | |
300 | |
301 // Advance "ptr" to the end of the string that was just emitted. | |
302 if (sz - (ptr - buf)) | |
303 while (*ptr) | |
304 ++ptr; | |
305 break; } | |
306 case 'x': // Output an unsigned hexadecimal value. | |
307 case 'X': | |
308 case 'p': { // Output a pointer value. | |
309 // Check that there are arguments left to be inserted. | |
310 if (cur_arg >= max_args) | |
311 goto fail_to_expand; | |
312 | |
313 const Arg& arg = args[cur_arg++]; | |
314 int64_t i; | |
315 switch (ch) { | |
316 case 'x': // Hexadecimal values are available for integer-like args. | |
317 case 'X': | |
318 // Check that the argument has the expected type. | |
319 if (arg.type_ != Arg::INT && | |
320 arg.type_ != Arg::UINT) | |
321 goto fail_to_expand; | |
322 i = arg.i_; | |
323 | |
324 // The Arg() constructor automatically performed sign expansion on | |
325 // signed parameters. This is great when outputting a %d decimal | |
326 // number, but can result in unexpected leading 0xFF bytes when | |
327 // outputting a %c hexadecimal number. Mask bits, if necessary. | |
328 if (arg.type_ == Arg::INT && arg.width_ < 8) | |
329 i &= ~(static_cast<int64_t>(-1) << (8*arg.width_)); | |
Jeffrey Yasskin
2013/07/30 23:13:53
Heh, alternately, "i &= (1LL << 8*arg.width_) - 1"
| |
330 break; | |
331 default: | |
332 // Pointer values require an actual pointer or a string. | |
333 if (arg.type_ == Arg::POINTER) | |
334 i = reinterpret_cast<uintptr_t>(arg.ptr_); | |
335 else if (arg.type_ == Arg::STRING) | |
336 i = reinterpret_cast<uintptr_t>(arg.s_); | |
337 else | |
338 goto fail_to_expand; | |
339 | |
340 // Pointers always include the "0x" prefix. This affects padding. | |
341 if (padding) { | |
342 if (pad == ' ') { | |
343 // Predict the number of hex digits (including "0x" prefix) that | |
344 // will be output for this address when it is converted to ASCII. | |
345 size_t chars = 2; | |
346 uint64_t j = i; | |
347 do { | |
348 ++chars; | |
349 j >>= 4; | |
350 } while (j); | |
351 | |
352 // Output the necessary number of space characters to perform | |
353 // padding. We can't rely on IToASCII() to do that for us, as it | |
354 // would incorrectly add padding _after_ the "0x" prefix. | |
355 Pad(buf, sz, &count, pad, padding, chars, &ptr); | |
356 | |
357 // Inform itoa_r() that it no longer needs to handle the padding. | |
358 padding = 0; | |
359 } else { | |
360 // Adjust for the two-character "0x" prefix. | |
361 padding = padding >= 2 ? padding - 2 : 0; | |
362 } | |
363 } | |
364 | |
365 // Insert "0x" prefix, if there is still sufficient space in the | |
366 // output buffer. | |
367 if (Out(buf, sz, &count, '0')) | |
368 ++ptr; | |
369 if (Out(buf, sz, &count, 'x')) | |
370 ++ptr; | |
371 break; | |
372 } | |
373 | |
374 // No matter what data type this value originated from, print it as | |
375 // a regular hexadecimal number. | |
376 IncrementCount(&count, | |
377 IToASCII(false, ch != 'x', i, ptr, sz - (ptr - buf), | |
378 16, padding, pad)); | |
379 | |
380 // Advance "ptr" to the end of the string that was just emitted. | |
381 if (sz - (ptr - buf)) | |
382 while (*ptr) | |
383 ++ptr; | |
384 break; } | |
385 case 's': { | |
386 // Check that there are arguments left to be inserted. | |
387 if (cur_arg >= max_args) | |
388 goto fail_to_expand; | |
389 | |
390 // Check that the argument has the expected type. | |
391 const Arg& arg = args[cur_arg++]; | |
392 if (arg.type_ != Arg::STRING) | |
393 goto fail_to_expand; | |
394 | |
395 // Apply padding, if needed. This requires us to first check the | |
396 // length of the string that we are outputting. | |
397 if (padding) { | |
398 size_t len = 0; | |
399 for (const char* src = arg.s_ ? arg.s_ : "<NULL>"; *src++; ) | |
400 ++len; | |
401 Pad(buf, sz, &count, ' ', padding, len, &ptr); | |
402 } | |
403 | |
404 // Printing a string involves nothing more than copying it into the | |
405 // output buffer and making sure we don't output more bytes than | |
406 // available space. | |
407 for (const char* src = arg.s_ ? arg.s_ : "<NULL>"; *src; ) | |
408 if (Out(buf, sz, &count, *src++)) | |
409 ++ptr; | |
410 break; } | |
411 case '%': | |
412 // Quoted percent '%' character. | |
413 goto copy_verbatim; | |
414 fail_to_expand: | |
415 // C++ gives us tools to do type checking -- something that snprintf() | |
416 // could never really do. So, whenever we see arguments that don't | |
417 // match up with the format string, we refuse to output them. But | |
418 // since we have to be extremely conservative about being async- | |
419 // signal-safe, we are limited in the type of error handling that we | |
Jeffrey Yasskin
2013/07/30 23:13:53
Is abort() or "*(volatile char*)0 = 0" ok for debu
| |
420 // can do. So, all we do is pass the format string unchanged. That | |
421 // should eventually get the user's attention. | |
422 default: | |
423 // Unknown or unsupported format character. Just copy verbatim to | |
424 // output. | |
425 if (Out(buf, sz, &count, '%')) | |
426 ++ptr; | |
427 if (!ch) | |
428 goto end_of_format_string; | |
429 if (Out(buf, sz, &count, ch)) | |
430 ++ptr; | |
431 break; | |
432 } | |
433 } else { | |
434 copy_verbatim: | |
435 if (Out(buf, sz, &count, fmt[-1])) | |
436 ++ptr; | |
437 } | |
438 } | |
439 end_of_format_string: | |
440 end_of_output_buffer: | |
441 *ptr = '\000'; | |
442 IncrementCount(&count); | |
443 return static_cast<ssize_t>(count)-1; | |
444 } | |
445 | |
446 ssize_t FormatN(char* buf, size_t N, const char* fmt) { | |
447 // Make sure we can write at least one NUL byte. | |
448 ssize_t n = static_cast<ssize_t>(N); | |
449 if (n < 1) | |
450 return -1; | |
451 size_t count = 0; | |
452 | |
453 // In the slow-path, we deal with errors by copying the contents of | |
454 // "fmt" unexpanded. This means, if there are no arguments passed, the | |
455 // Format() function always degenerates to version of strncpy() that | |
456 // de-duplicates '%' characters. | |
457 char* dst = buf; | |
458 const char* src = fmt; | |
459 for (; *src; ++src) { | |
460 char ch = *src; | |
461 if (!IncrementCount(&count) && n > 1) { | |
462 --dst; | |
463 break; | |
464 } | |
465 if (n > 1) { | |
466 --n; | |
467 *dst++ = ch; | |
468 } | |
469 if (ch == '%' && src[1] == '%') | |
470 ++src; | |
471 } | |
472 IncrementCount(&count); | |
473 *dst = '\000'; | |
474 return static_cast<ssize_t>(count)-1; | |
475 } | |
476 | |
477 } // namespace debug | |
478 } // namespace base | |
OLD | NEW |