Index: gcc/gcc/pretty-print.c |
diff --git a/gcc/gcc/pretty-print.c b/gcc/gcc/pretty-print.c |
index b611ec985167c629258d885d7741137b1233d963..56789490931c68e8cbf8d9cb20288b36dbdd7752 100644 |
--- a/gcc/gcc/pretty-print.c |
+++ b/gcc/gcc/pretty-print.c |
@@ -26,6 +26,11 @@ along with GCC; see the file COPYING3. If not see |
#include "intl.h" |
#include "pretty-print.h" |
#include "tree.h" |
+#include "ggc.h" |
+ |
+#if HAVE_ICONV |
+#include <iconv.h> |
+#endif |
#define obstack_chunk_alloc xmalloc |
#define obstack_chunk_free free |
@@ -185,8 +190,6 @@ pp_base_indent (pretty_printer *pp) |
%.*s: a substring the length of which is specified by an argument |
integer. |
%Ns: likewise, but length specified as constant in the format string. |
- %H: location_t. |
- %J: a decl tree, from which DECL_SOURCE_LOCATION will be recorded. |
%K: a statement, from which EXPR_LOCATION and TREE_BLOCK will be recorded. |
Flag 'q': quote formatted text (must come immediately after '%'). |
@@ -230,7 +233,7 @@ pp_base_format (pretty_printer *pp, text_info *text) |
this point. */ |
memset (formatters, 0, sizeof formatters); |
- |
+ |
for (p = text->format_spec; *p; ) |
{ |
while (*p != '\0' && *p != '%') |
@@ -246,7 +249,7 @@ pp_base_format (pretty_printer *pp, text_info *text) |
{ |
case '\0': |
gcc_unreachable (); |
- |
+ |
case '%': |
obstack_1grow (&buffer->chunk_obstack, '%'); |
p++; |
@@ -363,7 +366,7 @@ pp_base_format (pretty_printer *pp, text_info *text) |
gcc_assert (chunk < PP_NL_ARGMAX * 2); |
args[chunk++] = XOBFINISH (&buffer->chunk_obstack, const char *); |
args[chunk] = 0; |
- |
+ |
/* Set output to the argument obstack, and switch line-wrapping and |
prefixing off. */ |
buffer->obstack = &buffer->chunk_obstack; |
@@ -471,22 +474,6 @@ pp_base_format (pretty_printer *pp, text_info *text) |
(pp, *text->args_ptr, precision, unsigned, "x"); |
break; |
- case 'H': |
- { |
- location_t *locus = va_arg (*text->args_ptr, location_t *); |
- gcc_assert (text->locus != NULL); |
- *text->locus = *locus; |
- } |
- break; |
- |
- case 'J': |
- { |
- tree t = va_arg (*text->args_ptr, tree); |
- gcc_assert (text->locus != NULL); |
- *text->locus = DECL_SOURCE_LOCATION (t); |
- } |
- break; |
- |
case 'K': |
{ |
tree t = va_arg (*text->args_ptr, tree), block; |
@@ -714,6 +701,7 @@ pp_construct (pretty_printer *pp, const char *prefix, int maximum_length) |
pp_line_cutoff (pp) = maximum_length; |
pp_prefixing_rule (pp) = DIAGNOSTICS_SHOW_PREFIX_ONCE; |
pp_set_prefix (pp, prefix); |
+ pp_translate_identifiers (pp) = true; |
} |
/* Append a string delimited by START and END to the output area of |
@@ -844,3 +832,234 @@ pp_base_maybe_space (pretty_printer *pp) |
pp_base (pp)->padding = pp_none; |
} |
} |
+ |
+/* Print the identifier ID to PRETTY-PRINTER. */ |
+ |
+void |
+pp_base_tree_identifier (pretty_printer *pp, tree id) |
+{ |
+ if (pp_translate_identifiers (pp)) |
+ { |
+ const char *text = identifier_to_locale (IDENTIFIER_POINTER (id)); |
+ pp_append_text (pp, text, text + strlen (text)); |
+ } |
+ else |
+ pp_append_text (pp, IDENTIFIER_POINTER (id), |
+ IDENTIFIER_POINTER (id) + IDENTIFIER_LENGTH (id)); |
+} |
+ |
+/* The string starting at P has LEN (at least 1) bytes left; if they |
+ start with a valid UTF-8 sequence, return the length of that |
+ sequence and set *VALUE to the value of that sequence, and |
+ otherwise return 0 and set *VALUE to (unsigned int) -1. */ |
+ |
+static int |
+decode_utf8_char (const unsigned char *p, size_t len, unsigned int *value) |
+{ |
+ unsigned int t = *p; |
+ |
+ if (len == 0) |
+ abort (); |
+ if (t & 0x80) |
+ { |
+ size_t utf8_len = 0; |
+ unsigned int ch; |
+ size_t i; |
+ for (t = *p; t & 0x80; t <<= 1) |
+ utf8_len++; |
+ |
+ if (utf8_len > len || utf8_len < 2 || utf8_len > 6) |
+ { |
+ *value = (unsigned int) -1; |
+ return 0; |
+ } |
+ ch = *p & ((1 << (7 - utf8_len)) - 1); |
+ for (i = 1; i < utf8_len; i++) |
+ { |
+ unsigned int u = p[i]; |
+ if ((u & 0xC0) != 0x80) |
+ { |
+ *value = (unsigned int) -1; |
+ return 0; |
+ } |
+ ch = (ch << 6) | (u & 0x3F); |
+ } |
+ if ( (ch <= 0x7F && utf8_len > 1) |
+ || (ch <= 0x7FF && utf8_len > 2) |
+ || (ch <= 0xFFFF && utf8_len > 3) |
+ || (ch <= 0x1FFFFF && utf8_len > 4) |
+ || (ch <= 0x3FFFFFF && utf8_len > 5) |
+ || (ch >= 0xD800 && ch <= 0xDFFF)) |
+ { |
+ *value = (unsigned int) -1; |
+ return 0; |
+ } |
+ *value = ch; |
+ return utf8_len; |
+ } |
+ else |
+ { |
+ *value = t; |
+ return 1; |
+ } |
+} |
+ |
+/* Given IDENT, an identifier in the internal encoding, return a |
+ version of IDENT suitable for diagnostics in the locale character |
+ set: either IDENT itself, or a garbage-collected string converted |
+ to the locale character set and using escape sequences if not |
+ representable in the locale character set or containing control |
+ characters or invalid byte sequences. Existing backslashes in |
+ IDENT are not doubled, so the result may not uniquely specify the |
+ contents of an arbitrary byte sequence identifier. */ |
+ |
+const char * |
+identifier_to_locale (const char *ident) |
+{ |
+ const unsigned char *uid = (const unsigned char *) ident; |
+ size_t idlen = strlen (ident); |
+ bool valid_printable_utf8 = true; |
+ bool all_ascii = true; |
+ size_t i; |
+ |
+ for (i = 0; i < idlen;) |
+ { |
+ unsigned int c; |
+ size_t utf8_len = decode_utf8_char (&uid[i], idlen - i, &c); |
+ if (utf8_len == 0 || c <= 0x1F || (c >= 0x7F && c <= 0x9F)) |
+ { |
+ valid_printable_utf8 = false; |
+ break; |
+ } |
+ if (utf8_len > 1) |
+ all_ascii = false; |
+ i += utf8_len; |
+ } |
+ |
+ /* If IDENT contains invalid UTF-8 sequences (which may occur with |
+ attributes putting arbitrary byte sequences in identifiers), or |
+ control characters, we use octal escape sequences for all bytes |
+ outside printable ASCII. */ |
+ if (!valid_printable_utf8) |
+ { |
+ char *ret = GGC_NEWVEC (char, 4 * idlen + 1); |
+ char *p = ret; |
+ for (i = 0; i < idlen; i++) |
+ { |
+ if (uid[i] > 0x1F && uid[i] < 0x7F) |
+ *p++ = uid[i]; |
+ else |
+ { |
+ sprintf (p, "\\%03o", uid[i]); |
+ p += 4; |
+ } |
+ } |
+ *p = 0; |
+ return ret; |
+ } |
+ |
+ /* Otherwise, if it is valid printable ASCII, or printable UTF-8 |
+ with the locale character set being UTF-8, IDENT is used. */ |
+ if (all_ascii || locale_utf8) |
+ return ident; |
+ |
+ /* Otherwise IDENT is converted to the locale character set if |
+ possible. */ |
+#if defined ENABLE_NLS && defined HAVE_LANGINFO_CODESET && HAVE_ICONV |
+ if (locale_encoding != NULL) |
+ { |
+ iconv_t cd = iconv_open (locale_encoding, "UTF-8"); |
+ bool conversion_ok = true; |
+ char *ret = NULL; |
+ if (cd != (iconv_t) -1) |
+ { |
+ size_t ret_alloc = 4 * idlen + 1; |
+ for (;;) |
+ { |
+ /* Repeat the whole conversion process as needed with |
+ larger buffers so non-reversible transformations can |
+ always be detected. */ |
+ ICONV_CONST char *inbuf = CONST_CAST (char *, ident); |
+ char *outbuf; |
+ size_t inbytesleft = idlen; |
+ size_t outbytesleft = ret_alloc - 1; |
+ size_t iconv_ret; |
+ |
+ ret = GGC_NEWVEC (char, ret_alloc); |
+ outbuf = ret; |
+ |
+ if (iconv (cd, 0, 0, 0, 0) == (size_t) -1) |
+ { |
+ conversion_ok = false; |
+ break; |
+ } |
+ |
+ iconv_ret = iconv (cd, &inbuf, &inbytesleft, |
+ &outbuf, &outbytesleft); |
+ if (iconv_ret == (size_t) -1 || inbytesleft != 0) |
+ { |
+ if (errno == E2BIG) |
+ { |
+ ret_alloc *= 2; |
+ ggc_free (ret); |
+ ret = NULL; |
+ continue; |
+ } |
+ else |
+ { |
+ conversion_ok = false; |
+ break; |
+ } |
+ } |
+ else if (iconv_ret != 0) |
+ { |
+ conversion_ok = false; |
+ break; |
+ } |
+ /* Return to initial shift state. */ |
+ if (iconv (cd, 0, 0, &outbuf, &outbytesleft) == (size_t) -1) |
+ { |
+ if (errno == E2BIG) |
+ { |
+ ret_alloc *= 2; |
+ ggc_free (ret); |
+ ret = NULL; |
+ continue; |
+ } |
+ else |
+ { |
+ conversion_ok = false; |
+ break; |
+ } |
+ } |
+ *outbuf = 0; |
+ break; |
+ } |
+ iconv_close (cd); |
+ if (conversion_ok) |
+ return ret; |
+ } |
+ } |
+#endif |
+ |
+ /* Otherwise, convert non-ASCII characters in IDENT to UCNs. */ |
+ { |
+ char *ret = GGC_NEWVEC (char, 10 * idlen + 1); |
+ char *p = ret; |
+ for (i = 0; i < idlen;) |
+ { |
+ unsigned int c; |
+ size_t utf8_len = decode_utf8_char (&uid[i], idlen - i, &c); |
+ if (utf8_len == 1) |
+ *p++ = uid[i]; |
+ else |
+ { |
+ sprintf (p, "\\U%08x", c); |
+ p += 10; |
+ } |
+ i += utf8_len; |
+ } |
+ *p = 0; |
+ return ret; |
+ } |
+} |