Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(41)

Unified Diff: gcc/libcpp/lex.c

Issue 3050029: [gcc] GCC 4.5.0=>4.5.1 (Closed) Base URL: ssh://git@gitrw.chromium.org:9222/nacl-toolchain.git
Patch Set: Created 10 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « gcc/libcpp/internal.h ('k') | gcc/libcpp/line-map.c » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: gcc/libcpp/lex.c
diff --git a/gcc/libcpp/lex.c b/gcc/libcpp/lex.c
index e08c5bd81436c5fd5e6fd04d57de308b243c9842..f29998225340954e0affb7f59da3c1d66ea51bd9 100644
--- a/gcc/libcpp/lex.c
+++ b/gcc/libcpp/lex.c
@@ -76,7 +76,7 @@ cpp_ideq (const cpp_token *token, const char *string)
if (token->type != CPP_NAME)
return 0;
- return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
+ return !ustrcmp (NODE_NAME (token->val.node.node), (const uchar *) string);
}
/* Record a note TYPE at byte POS into the current cleaned logical
@@ -314,6 +314,8 @@ _cpp_process_line_notes (cpp_reader *pfile, int in_comment)
}
}
}
+ else if (note->type == 0)
+ /* Already processed in lex_raw_string. */;
else
abort ();
}
@@ -540,6 +542,12 @@ lex_identifier_intern (cpp_reader *pfile, const uchar *base)
cpp_error (pfile, CPP_DL_PEDWARN,
"__VA_ARGS__ can only appear in the expansion"
" of a C99 variadic macro");
+
+ /* For -Wc++-compat, warn about use of C++ named operators. */
+ if (result->flags & NODE_WARN_OPERATOR)
+ cpp_error (pfile, CPP_DL_WARNING,
+ "identifier \"%s\" is a special operator name in C++",
+ NODE_NAME (result));
}
return result;
@@ -611,6 +619,12 @@ lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
cpp_error (pfile, CPP_DL_PEDWARN,
"__VA_ARGS__ can only appear in the expansion"
" of a C99 variadic macro");
+
+ /* For -Wc++-compat, warn about use of C++ named operators. */
+ if (result->flags & NODE_WARN_OPERATOR)
+ cpp_error (pfile, CPP_DL_WARNING,
+ "identifier \"%s\" is a special operator name in C++",
+ NODE_NAME (result));
}
return result;
@@ -662,12 +676,291 @@ create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
token->val.str.text = dest;
}
+/* Subroutine of lex_raw_string: Append LEN chars from BASE to the buffer
+ sequence from *FIRST_BUFF_P to LAST_BUFF_P. */
+
+static void
+bufring_append (cpp_reader *pfile, const uchar *base, size_t len,
+ _cpp_buff **first_buff_p, _cpp_buff **last_buff_p)
+{
+ _cpp_buff *first_buff = *first_buff_p;
+ _cpp_buff *last_buff = *last_buff_p;
+
+ if (first_buff == NULL)
+ first_buff = last_buff = _cpp_get_buff (pfile, len);
+ else if (len > BUFF_ROOM (last_buff))
+ {
+ size_t room = BUFF_ROOM (last_buff);
+ memcpy (BUFF_FRONT (last_buff), base, room);
+ BUFF_FRONT (last_buff) += room;
+ base += room;
+ len -= room;
+ last_buff = _cpp_append_extend_buff (pfile, last_buff, len);
+ }
+
+ memcpy (BUFF_FRONT (last_buff), base, len);
+ BUFF_FRONT (last_buff) += len;
+
+ *first_buff_p = first_buff;
+ *last_buff_p = last_buff;
+}
+
+/* Lexes a raw string. The stored string contains the spelling, including
+ double quotes, delimiter string, '(' and ')', any leading
+ 'L', 'u', 'U' or 'u8' and 'R' modifier. It returns the type of the
+ literal, or CPP_OTHER if it was not properly terminated.
+
+ The spelling is NUL-terminated, but it is not guaranteed that this
+ is the first NUL since embedded NULs are preserved. */
+
+static void
+lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base,
+ const uchar *cur)
+{
+ source_location saw_NUL = 0;
+ const uchar *raw_prefix;
+ unsigned int raw_prefix_len = 0;
+ enum cpp_ttype type;
+ size_t total_len = 0;
+ _cpp_buff *first_buff = NULL, *last_buff = NULL;
+ _cpp_line_note *note = &pfile->buffer->notes[pfile->buffer->cur_note];
+
+ type = (*base == 'L' ? CPP_WSTRING :
+ *base == 'U' ? CPP_STRING32 :
+ *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
+ : CPP_STRING);
+
+ raw_prefix = cur + 1;
+ while (raw_prefix_len < 16)
+ {
+ switch (raw_prefix[raw_prefix_len])
+ {
+ case ' ': case '(': case ')': case '\\': case '\t':
+ case '\v': case '\f': case '\n': default:
+ break;
+ /* Basic source charset except the above chars. */
+ case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+ case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
+ case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
+ case 's': case 't': case 'u': case 'v': case 'w': case 'x':
+ case 'y': case 'z':
+ case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+ case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
+ case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
+ case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
+ case 'Y': case 'Z':
+ case '0': case '1': case '2': case '3': case '4': case '5':
+ case '6': case '7': case '8': case '9':
+ case '_': case '{': case '}': case '#': case '[': case ']':
+ case '<': case '>': case '%': case ':': case ';': case '.':
+ case '?': case '*': case '+': case '-': case '/': case '^':
+ case '&': case '|': case '~': case '!': case '=': case ',':
+ case '"': case '\'':
+ raw_prefix_len++;
+ continue;
+ }
+ break;
+ }
+
+ if (raw_prefix[raw_prefix_len] != '(')
+ {
+ int col = CPP_BUF_COLUMN (pfile->buffer, raw_prefix + raw_prefix_len)
+ + 1;
+ if (raw_prefix_len == 16)
+ cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, col,
+ "raw string delimiter longer than 16 characters");
+ else
+ cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, col,
+ "invalid character '%c' in raw string delimiter",
+ (int) raw_prefix[raw_prefix_len]);
+ pfile->buffer->cur = raw_prefix - 1;
+ create_literal (pfile, token, base, raw_prefix - 1 - base, CPP_OTHER);
+ return;
+ }
+
+ cur = raw_prefix + raw_prefix_len + 1;
+ for (;;)
+ {
+#define BUF_APPEND(STR,LEN) \
+ do { \
+ bufring_append (pfile, (const uchar *)(STR), (LEN), \
+ &first_buff, &last_buff); \
+ total_len += (LEN); \
+ } while (0);
+
+ cppchar_t c;
+
+ /* If we previously performed any trigraph or line splicing
+ transformations, undo them within the body of the raw string. */
+ while (note->pos < cur)
+ ++note;
+ for (; note->pos == cur; ++note)
+ {
+ switch (note->type)
+ {
+ case '\\':
+ case ' ':
+ /* Restore backslash followed by newline. */
+ BUF_APPEND (base, cur - base);
+ base = cur;
+ BUF_APPEND ("\\", 1);
+ after_backslash:
+ if (note->type == ' ')
+ {
+ /* GNU backslash whitespace newline extension. FIXME
+ could be any sequence of non-vertical space. When we
+ can properly restore any such sequence, we should mark
+ this note as handled so _cpp_process_line_notes
+ doesn't warn. */
+ BUF_APPEND (" ", 1);
+ }
+
+ BUF_APPEND ("\n", 1);
+ break;
+
+ case 0:
+ /* Already handled. */
+ break;
+
+ default:
+ if (_cpp_trigraph_map[note->type])
+ {
+ /* Don't warn about this trigraph in
+ _cpp_process_line_notes, since trigraphs show up as
+ trigraphs in raw strings. */
+ uchar type = note->type;
+ note->type = 0;
+
+ if (!CPP_OPTION (pfile, trigraphs))
+ /* If we didn't convert the trigraph in the first
+ place, don't do anything now either. */
+ break;
+
+ BUF_APPEND (base, cur - base);
+ base = cur;
+ BUF_APPEND ("??", 2);
+
+ /* ??/ followed by newline gets two line notes, one for
+ the trigraph and one for the backslash/newline. */
+ if (type == '/' && note[1].pos == cur)
+ {
+ if (note[1].type != '\\'
+ && note[1].type != ' ')
+ abort ();
+ BUF_APPEND ("/", 1);
+ ++note;
+ goto after_backslash;
+ }
+ /* The ) from ??) could be part of the suffix. */
+ else if (type == ')'
+ && strncmp ((const char *) cur+1,
+ (const char *) raw_prefix,
+ raw_prefix_len) == 0
+ && cur[raw_prefix_len+1] == '"')
+ {
+ cur += raw_prefix_len+2;
+ goto break_outer_loop;
+ }
+ else
+ {
+ /* Skip the replacement character. */
+ base = ++cur;
+ BUF_APPEND (&type, 1);
+ }
+ }
+ else
+ abort ();
+ break;
+ }
+ }
+ c = *cur++;
+
+ if (c == ')'
+ && strncmp ((const char *) cur, (const char *) raw_prefix,
+ raw_prefix_len) == 0
+ && cur[raw_prefix_len] == '"')
+ {
+ cur += raw_prefix_len + 1;
+ break;
+ }
+ else if (c == '\n')
+ {
+ if (pfile->state.in_directive
+ || pfile->state.parsing_args
+ || pfile->state.in_deferred_pragma)
+ {
+ cur--;
+ type = CPP_OTHER;
+ cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, 0,
+ "unterminated raw string");
+ break;
+ }
+
+ BUF_APPEND (base, cur - base);
+
+ if (pfile->buffer->cur < pfile->buffer->rlimit)
+ CPP_INCREMENT_LINE (pfile, 0);
+ pfile->buffer->need_line = true;
+
+ pfile->buffer->cur = cur-1;
+ _cpp_process_line_notes (pfile, false);
+ if (!_cpp_get_fresh_line (pfile))
+ {
+ source_location src_loc = token->src_loc;
+ token->type = CPP_EOF;
+ /* Tell the compiler the line number of the EOF token. */
+ token->src_loc = pfile->line_table->highest_line;
+ token->flags = BOL;
+ if (first_buff != NULL)
+ _cpp_release_buff (pfile, first_buff);
+ cpp_error_with_line (pfile, CPP_DL_ERROR, src_loc, 0,
+ "unterminated raw string");
+ return;
+ }
+
+ cur = base = pfile->buffer->cur;
+ note = &pfile->buffer->notes[pfile->buffer->cur_note];
+ }
+ else if (c == '\0' && !saw_NUL)
+ LINEMAP_POSITION_FOR_COLUMN (saw_NUL, pfile->line_table,
+ CPP_BUF_COLUMN (pfile->buffer, cur));
+ }
+ break_outer_loop:
+
+ if (saw_NUL && !pfile->state.skipping)
+ cpp_error_with_line (pfile, CPP_DL_WARNING, saw_NUL, 0,
+ "null character(s) preserved in literal");
+
+ pfile->buffer->cur = cur;
+ if (first_buff == NULL)
+ create_literal (pfile, token, base, cur - base, type);
+ else
+ {
+ uchar *dest = _cpp_unaligned_alloc (pfile, total_len + (cur - base) + 1);
+
+ token->type = type;
+ token->val.str.len = total_len + (cur - base);
+ token->val.str.text = dest;
+ last_buff = first_buff;
+ while (last_buff != NULL)
+ {
+ memcpy (dest, last_buff->base,
+ BUFF_FRONT (last_buff) - last_buff->base);
+ dest += BUFF_FRONT (last_buff) - last_buff->base;
+ last_buff = last_buff->next;
+ }
+ _cpp_release_buff (pfile, first_buff);
+ memcpy (dest, base, cur - base);
+ dest[cur - base] = '\0';
+ }
+}
+
/* Lexes a string, character constant, or angle-bracketed header file
name. The stored string contains the spelling, including opening
- quote and leading any leading 'L', 'u' or 'U'. It returns the type
- of the literal, or CPP_OTHER if it was not properly terminated, or
- CPP_LESS for an unterminated header name which must be relexed as
- normal tokens.
+ quote and any leading 'L', 'u', 'U' or 'u8' and optional
+ 'R' modifier. It returns the type of the literal, or CPP_OTHER
+ if it was not properly terminated, or CPP_LESS for an unterminated
+ header name which must be relexed as normal tokens.
The spelling is NUL-terminated, but it is not guaranteed that this
is the first NUL since embedded NULs are preserved. */
@@ -681,12 +974,24 @@ lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
cur = base;
terminator = *cur++;
- if (terminator == 'L' || terminator == 'u' || terminator == 'U')
+ if (terminator == 'L' || terminator == 'U')
terminator = *cur++;
- if (terminator == '\"')
+ else if (terminator == 'u')
+ {
+ terminator = *cur++;
+ if (terminator == '8')
+ terminator = *cur++;
+ }
+ if (terminator == 'R')
+ {
+ lex_raw_string (pfile, token, base, cur);
+ return;
+ }
+ if (terminator == '"')
type = (*base == 'L' ? CPP_WSTRING :
*base == 'U' ? CPP_STRING32 :
- *base == 'u' ? CPP_STRING16 : CPP_STRING);
+ *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
+ : CPP_STRING);
else if (terminator == '\'')
type = (*base == 'L' ? CPP_WCHAR :
*base == 'U' ? CPP_CHAR32 :
@@ -1146,10 +1451,21 @@ _cpp_lex_direct (cpp_reader *pfile)
case 'L':
case 'u':
case 'U':
- /* 'L', 'u' or 'U' may introduce wide characters or strings. */
+ case 'R':
+ /* 'L', 'u', 'U', 'u8' or 'R' may introduce wide characters,
+ wide strings or raw strings. */
if (c == 'L' || CPP_OPTION (pfile, uliterals))
{
- if (*buffer->cur == '\'' || *buffer->cur == '"')
+ if ((*buffer->cur == '\'' && c != 'R')
+ || *buffer->cur == '"'
+ || (*buffer->cur == 'R'
+ && c != 'R'
+ && buffer->cur[1] == '"'
+ && CPP_OPTION (pfile, uliterals))
+ || (*buffer->cur == '8'
+ && c == 'u'
+ && (buffer->cur[1] == '"'
+ || (buffer->cur[1] == 'R' && buffer->cur[2] == '"'))))
{
lex_string (pfile, result, buffer->cur - 1);
break;
@@ -1165,22 +1481,22 @@ _cpp_lex_direct (cpp_reader *pfile)
case 'y': case 'z':
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
case 'G': case 'H': case 'I': case 'J': case 'K':
- case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
+ case 'M': case 'N': case 'O': case 'P': case 'Q':
case 'S': case 'T': case 'V': case 'W': case 'X':
case 'Y': case 'Z':
result->type = CPP_NAME;
{
struct normalize_state nst = INITIAL_NORMALIZE_STATE;
- result->val.node = lex_identifier (pfile, buffer->cur - 1, false,
- &nst);
+ result->val.node.node = lex_identifier (pfile, buffer->cur - 1, false,
+ &nst);
warn_about_normalization (pfile, result, &nst);
}
/* Convert named operators to their proper types. */
- if (result->val.node->flags & NODE_OPERATOR)
+ if (result->val.node.node->flags & NODE_OPERATOR)
{
result->flags |= NAMED_OP;
- result->type = (enum cpp_ttype) result->val.node->directive_index;
+ result->type = (enum cpp_ttype) result->val.node.node->directive_index;
}
break;
@@ -1295,7 +1611,7 @@ _cpp_lex_direct (cpp_reader *pfile)
result->flags |= DIGRAPH;
result->type = CPP_HASH;
if (*buffer->cur == '%' && buffer->cur[1] == ':')
- buffer->cur += 2, result->type = CPP_PASTE;
+ buffer->cur += 2, result->type = CPP_PASTE, result->val.token_no = 0;
}
else if (*buffer->cur == '>')
{
@@ -1376,7 +1692,7 @@ _cpp_lex_direct (cpp_reader *pfile)
case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
- case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
+ case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); result->val.token_no = 0; break;
case '?': result->type = CPP_QUERY; break;
case '~': result->type = CPP_COMPL; break;
@@ -1401,7 +1717,7 @@ _cpp_lex_direct (cpp_reader *pfile)
if (forms_identifier_p (pfile, true, &nst))
{
result->type = CPP_NAME;
- result->val.node = lex_identifier (pfile, base, true, &nst);
+ result->val.node.node = lex_identifier (pfile, base, true, &nst);
warn_about_normalization (pfile, result, &nst);
break;
}
@@ -1427,7 +1743,7 @@ cpp_token_len (const cpp_token *token)
{
default: len = 6; break;
case SPELL_LITERAL: len = token->val.str.len; break;
- case SPELL_IDENT: len = NODE_LEN (token->val.node) * 10; break;
+ case SPELL_IDENT: len = NODE_LEN (token->val.node.node) * 10; break;
}
return len;
@@ -1467,6 +1783,13 @@ utf8_to_ucn (unsigned char *buffer, const unsigned char *name)
return ucn_len;
}
+/* Given a token TYPE corresponding to a digraph, return a pointer to
+ the spelling of the digraph. */
+static const unsigned char *
+cpp_digraph2name (enum cpp_ttype type)
+{
+ return digraph_spellings[(int) type - (int) CPP_FIRST_DIGRAPH];
+}
/* Write the spelling of a token TOKEN to BUFFER. The buffer must
already contain the enough space to hold the token's spelling.
@@ -1486,8 +1809,7 @@ cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
unsigned char c;
if (token->flags & DIGRAPH)
- spelling
- = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
+ spelling = cpp_digraph2name (token->type);
else if (token->flags & NAMED_OP)
goto spell_ident;
else
@@ -1502,23 +1824,23 @@ cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
case SPELL_IDENT:
if (forstring)
{
- memcpy (buffer, NODE_NAME (token->val.node),
- NODE_LEN (token->val.node));
- buffer += NODE_LEN (token->val.node);
+ memcpy (buffer, NODE_NAME (token->val.node.node),
+ NODE_LEN (token->val.node.node));
+ buffer += NODE_LEN (token->val.node.node);
}
else
{
size_t i;
- const unsigned char * name = NODE_NAME (token->val.node);
+ const unsigned char * name = NODE_NAME (token->val.node.node);
- for (i = 0; i < NODE_LEN (token->val.node); i++)
+ for (i = 0; i < NODE_LEN (token->val.node.node); i++)
if (name[i] & ~0x7F)
{
i += utf8_to_ucn (buffer, name + i) - 1;
buffer += 10;
}
else
- *buffer++ = NODE_NAME (token->val.node)[i];
+ *buffer++ = NODE_NAME (token->val.node.node)[i];
}
break;
@@ -1550,11 +1872,17 @@ cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
return start;
}
-/* Used by C front ends, which really should move to using
- cpp_token_as_text. */
+/* Returns a pointer to a string which spells the token defined by
+ TYPE and FLAGS. Used by C front ends, which really should move to
+ using cpp_token_as_text. */
const char *
-cpp_type2name (enum cpp_ttype type)
+cpp_type2name (enum cpp_ttype type, unsigned char flags)
{
+ if (flags & DIGRAPH)
+ return (const char *) cpp_digraph2name (type);
+ else if (flags & NAMED_OP)
+ return cpp_named_operator2name (type);
+
return (const char *) token_spellings[type].name;
}
@@ -1572,8 +1900,7 @@ cpp_output_token (const cpp_token *token, FILE *fp)
int c;
if (token->flags & DIGRAPH)
- spelling
- = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
+ spelling = cpp_digraph2name (token->type);
else if (token->flags & NAMED_OP)
goto spell_ident;
else
@@ -1590,9 +1917,9 @@ cpp_output_token (const cpp_token *token, FILE *fp)
case SPELL_IDENT:
{
size_t i;
- const unsigned char * name = NODE_NAME (token->val.node);
+ const unsigned char * name = NODE_NAME (token->val.node.node);
- for (i = 0; i < NODE_LEN (token->val.node); i++)
+ for (i = 0; i < NODE_LEN (token->val.node.node); i++)
if (name[i] & ~0x7F)
{
unsigned char buffer[10];
@@ -1600,7 +1927,7 @@ cpp_output_token (const cpp_token *token, FILE *fp)
fwrite (buffer, 1, 10, fp);
}
else
- fputc (NODE_NAME (token->val.node)[i], fp);
+ fputc (NODE_NAME (token->val.node.node)[i], fp);
}
break;
@@ -1623,11 +1950,14 @@ _cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
{
default: /* Keep compiler happy. */
case SPELL_OPERATOR:
- return 1;
+ /* token_no is used to track where multiple consecutive ##
+ tokens were originally located. */
+ return (a->type != CPP_PASTE || a->val.token_no == b->val.token_no);
case SPELL_NONE:
- return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
+ return (a->type != CPP_MACRO_ARG
+ || a->val.macro_arg.arg_no == b->val.macro_arg.arg_no);
case SPELL_IDENT:
- return a->val.node == b->val.node;
+ return a->val.node.node == b->val.node.node;
case SPELL_LITERAL:
return (a->val.str.len == b->val.str.len
&& !memcmp (a->val.str.text, b->val.str.text,
@@ -1937,6 +2267,11 @@ cpp_token_val_index (cpp_token *tok)
return CPP_TOKEN_FLD_NODE;
case SPELL_LITERAL:
return CPP_TOKEN_FLD_STR;
+ case SPELL_OPERATOR:
+ if (tok->type == CPP_PASTE)
+ return CPP_TOKEN_FLD_TOKEN_NO;
+ else
+ return CPP_TOKEN_FLD_NONE;
case SPELL_NONE:
if (tok->type == CPP_MACRO_ARG)
return CPP_TOKEN_FLD_ARG_NO;
« no previous file with comments | « gcc/libcpp/internal.h ('k') | gcc/libcpp/line-map.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698