gcc/libcpp/lex.c - Issue 3050029: [gcc] GCC 4.5.0=>4.5.1

Unified Diff: gcc/libcpp/lex.c

Issue 3050029: [gcc] GCC 4.5.0=>4.5.1 (Closed) Base URL: ssh://git@gitrw.chromium.org:9222/nacl-toolchain.git

Patch Set: Created 10 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: gcc/libcpp/lex.c

diff --git a/gcc/libcpp/lex.c b/gcc/libcpp/lex.c

index e08c5bd81436c5fd5e6fd04d57de308b243c9842..f29998225340954e0affb7f59da3c1d66ea51bd9 100644

--- a/gcc/libcpp/lex.c

+++ b/gcc/libcpp/lex.c

@@ -76,7 +76,7 @@ cpp_ideq (const cpp_token *token, const char *string)

if (token->type != CPP_NAME)

return 0;

- return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);

+ return !ustrcmp (NODE_NAME (token->val.node.node), (const uchar *) string);

}

/* Record a note TYPE at byte POS into the current cleaned logical

@@ -314,6 +314,8 @@ _cpp_process_line_notes (cpp_reader *pfile, int in_comment)

}

+ else if (note->type == 0)

+ /* Already processed in lex_raw_string. */;

else

abort ();

}

@@ -540,6 +542,12 @@ lex_identifier_intern (cpp_reader *pfile, const uchar *base)

cpp_error (pfile, CPP_DL_PEDWARN,

"__VA_ARGS__ can only appear in the expansion"

" of a C99 variadic macro");

+ /* For -Wc++-compat, warn about use of C++ named operators. */

+ if (result->flags & NODE_WARN_OPERATOR)

+ cpp_error (pfile, CPP_DL_WARNING,

+ "identifier \"%s\" is a special operator name in C++",

+ NODE_NAME (result));

}

return result;

@@ -611,6 +619,12 @@ lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,

cpp_error (pfile, CPP_DL_PEDWARN,

"__VA_ARGS__ can only appear in the expansion"

" of a C99 variadic macro");

+ /* For -Wc++-compat, warn about use of C++ named operators. */

+ if (result->flags & NODE_WARN_OPERATOR)

+ cpp_error (pfile, CPP_DL_WARNING,

+ "identifier \"%s\" is a special operator name in C++",

+ NODE_NAME (result));

}

return result;

@@ -662,12 +676,291 @@ create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,

token->val.str.text = dest;

}

+/* Subroutine of lex_raw_string: Append LEN chars from BASE to the buffer

+ sequence from *FIRST_BUFF_P to LAST_BUFF_P. */

+static void

+bufring_append (cpp_reader *pfile, const uchar *base, size_t len,

+ _cpp_buff **first_buff_p, _cpp_buff **last_buff_p)

+ _cpp_buff *first_buff = *first_buff_p;

+ _cpp_buff *last_buff = *last_buff_p;

+ if (first_buff == NULL)

+ first_buff = last_buff = _cpp_get_buff (pfile, len);

+ else if (len > BUFF_ROOM (last_buff))

+ {

+ size_t room = BUFF_ROOM (last_buff);

+ memcpy (BUFF_FRONT (last_buff), base, room);

+ BUFF_FRONT (last_buff) += room;

+ base += room;

+ len -= room;

+ last_buff = _cpp_append_extend_buff (pfile, last_buff, len);

+ }

+ memcpy (BUFF_FRONT (last_buff), base, len);

+ BUFF_FRONT (last_buff) += len;

+ *first_buff_p = first_buff;

+ *last_buff_p = last_buff;

+/* Lexes a raw string. The stored string contains the spelling, including

+ double quotes, delimiter string, '(' and ')', any leading

+ 'L', 'u', 'U' or 'u8' and 'R' modifier. It returns the type of the

+ literal, or CPP_OTHER if it was not properly terminated.

+ The spelling is NUL-terminated, but it is not guaranteed that this

+ is the first NUL since embedded NULs are preserved. */

+static void

+lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base,

+ const uchar *cur)

+ source_location saw_NUL = 0;

+ const uchar *raw_prefix;

+ unsigned int raw_prefix_len = 0;

+ enum cpp_ttype type;

+ size_t total_len = 0;

+ _cpp_buff *first_buff = NULL, *last_buff = NULL;

+ _cpp_line_note *note = &pfile->buffer->notes[pfile->buffer->cur_note];

+ type = (*base == 'L' ? CPP_WSTRING :

+ *base == 'U' ? CPP_STRING32 :

+ *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)

+ : CPP_STRING);

+ raw_prefix = cur + 1;

+ while (raw_prefix_len < 16)

+ {

+ switch (raw_prefix[raw_prefix_len])

+ {

+ case ' ': case '(': case ')': case '\\': case '\t':

+ case '\v': case '\f': case '\n': default:

+ break;

+ /* Basic source charset except the above chars. */

+ case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':

+ case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':

+ case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':

+ case 's': case 't': case 'u': case 'v': case 'w': case 'x':

+ case 'y': case 'z':

+ case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':

+ case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':

+ case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':

+ case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':

+ case 'Y': case 'Z':

+ case '0': case '1': case '2': case '3': case '4': case '5':

+ case '6': case '7': case '8': case '9':

+ case '_': case '{': case '}': case '#': case '[': case ']':

+ case '<': case '>': case '%': case ':': case ';': case '.':

+ case '?': case '*': case '+': case '-': case '/': case '^':

+ case '&': case '|': case '~': case '!': case '=': case ',':

+ case '"': case '\'':

+ raw_prefix_len++;

+ continue;

+ }

+ break;

+ }

+ if (raw_prefix[raw_prefix_len] != '(')

+ {

+ int col = CPP_BUF_COLUMN (pfile->buffer, raw_prefix + raw_prefix_len)

+ + 1;

+ if (raw_prefix_len == 16)

+ cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, col,

+ "raw string delimiter longer than 16 characters");

+ else

+ cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, col,

+ "invalid character '%c' in raw string delimiter",

+ (int) raw_prefix[raw_prefix_len]);

+ pfile->buffer->cur = raw_prefix - 1;

+ create_literal (pfile, token, base, raw_prefix - 1 - base, CPP_OTHER);

+ return;

+ }

+ cur = raw_prefix + raw_prefix_len + 1;

+ for (;;)

+ {

+#define BUF_APPEND(STR,LEN) \

+ do { \

+ bufring_append (pfile, (const uchar *)(STR), (LEN), \

+ &first_buff, &last_buff); \

+ total_len += (LEN); \

+ } while (0);

+ cppchar_t c;

+ /* If we previously performed any trigraph or line splicing

+ transformations, undo them within the body of the raw string. */

+ while (note->pos < cur)

+ ++note;

+ for (; note->pos == cur; ++note)

+ {

+ switch (note->type)

+ {

+ case '\\':

+ case ' ':

+ /* Restore backslash followed by newline. */

+ BUF_APPEND (base, cur - base);

+ base = cur;

+ BUF_APPEND ("\\", 1);

+ after_backslash:

+ if (note->type == ' ')

+ {

+ /* GNU backslash whitespace newline extension. FIXME

+ could be any sequence of non-vertical space. When we

+ can properly restore any such sequence, we should mark

+ this note as handled so _cpp_process_line_notes

+ doesn't warn. */

+ BUF_APPEND (" ", 1);

+ }

+ BUF_APPEND ("\n", 1);

+ break;

+ case 0:

+ /* Already handled. */

+ break;

+ default:

+ if (_cpp_trigraph_map[note->type])

+ {

+ /* Don't warn about this trigraph in

+ _cpp_process_line_notes, since trigraphs show up as

+ trigraphs in raw strings. */

+ uchar type = note->type;

+ note->type = 0;

+ if (!CPP_OPTION (pfile, trigraphs))

+ /* If we didn't convert the trigraph in the first

+ place, don't do anything now either. */

+ break;

+ BUF_APPEND (base, cur - base);

+ base = cur;

+ BUF_APPEND ("??", 2);

+ /* ??/ followed by newline gets two line notes, one for

+ the trigraph and one for the backslash/newline. */

+ if (type == '/' && note[1].pos == cur)

+ {

+ if (note[1].type != '\\'

+ && note[1].type != ' ')

+ abort ();

+ BUF_APPEND ("/", 1);

+ ++note;

+ goto after_backslash;

+ }

+ /* The ) from ??) could be part of the suffix. */

+ else if (type == ')'

+ && strncmp ((const char *) cur+1,

+ (const char *) raw_prefix,

+ raw_prefix_len) == 0

+ && cur[raw_prefix_len+1] == '"')

+ {

+ cur += raw_prefix_len+2;

+ goto break_outer_loop;

+ }

+ else

+ {

+ /* Skip the replacement character. */

+ base = ++cur;

+ BUF_APPEND (&type, 1);

+ }

+ else

+ abort ();

+ break;

+ }

+ c = *cur++;

+ if (c == ')'

+ && strncmp ((const char *) cur, (const char *) raw_prefix,

+ raw_prefix_len) == 0

+ && cur[raw_prefix_len] == '"')

+ {

+ cur += raw_prefix_len + 1;

+ break;

+ }

+ else if (c == '\n')

+ {

+ if (pfile->state.in_directive

+ || pfile->state.parsing_args

+ || pfile->state.in_deferred_pragma)

+ {

+ cur--;

+ type = CPP_OTHER;

+ cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, 0,

+ "unterminated raw string");

+ break;

+ }

+ BUF_APPEND (base, cur - base);

+ if (pfile->buffer->cur < pfile->buffer->rlimit)

+ CPP_INCREMENT_LINE (pfile, 0);

+ pfile->buffer->need_line = true;

+ pfile->buffer->cur = cur-1;

+ _cpp_process_line_notes (pfile, false);

+ if (!_cpp_get_fresh_line (pfile))

+ {

+ source_location src_loc = token->src_loc;

+ token->type = CPP_EOF;

+ /* Tell the compiler the line number of the EOF token. */

+ token->src_loc = pfile->line_table->highest_line;

+ token->flags = BOL;

+ if (first_buff != NULL)

+ _cpp_release_buff (pfile, first_buff);

+ cpp_error_with_line (pfile, CPP_DL_ERROR, src_loc, 0,

+ "unterminated raw string");

+ return;

+ }

+ cur = base = pfile->buffer->cur;

+ note = &pfile->buffer->notes[pfile->buffer->cur_note];

+ }

+ else if (c == '\0' && !saw_NUL)

+ LINEMAP_POSITION_FOR_COLUMN (saw_NUL, pfile->line_table,

+ CPP_BUF_COLUMN (pfile->buffer, cur));

+ }

+ break_outer_loop:

+ if (saw_NUL && !pfile->state.skipping)

+ cpp_error_with_line (pfile, CPP_DL_WARNING, saw_NUL, 0,

+ "null character(s) preserved in literal");

+ pfile->buffer->cur = cur;

+ if (first_buff == NULL)

+ create_literal (pfile, token, base, cur - base, type);

+ else

+ {

+ uchar *dest = _cpp_unaligned_alloc (pfile, total_len + (cur - base) + 1);

+ token->type = type;

+ token->val.str.len = total_len + (cur - base);

+ token->val.str.text = dest;

+ last_buff = first_buff;

+ while (last_buff != NULL)

+ {

+ memcpy (dest, last_buff->base,

+ BUFF_FRONT (last_buff) - last_buff->base);

+ dest += BUFF_FRONT (last_buff) - last_buff->base;

+ last_buff = last_buff->next;

+ }

+ _cpp_release_buff (pfile, first_buff);

+ memcpy (dest, base, cur - base);

+ dest[cur - base] = '\0';

+ }

/* Lexes a string, character constant, or angle-bracketed header file

name. The stored string contains the spelling, including opening

- quote and leading any leading 'L', 'u' or 'U'. It returns the type

- of the literal, or CPP_OTHER if it was not properly terminated, or

- CPP_LESS for an unterminated header name which must be relexed as

- normal tokens.

+ quote and any leading 'L', 'u', 'U' or 'u8' and optional

+ 'R' modifier. It returns the type of the literal, or CPP_OTHER

+ if it was not properly terminated, or CPP_LESS for an unterminated

+ header name which must be relexed as normal tokens.

The spelling is NUL-terminated, but it is not guaranteed that this

is the first NUL since embedded NULs are preserved. */

@@ -681,12 +974,24 @@ lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)

cur = base;

terminator = *cur++;

- if (terminator == 'L' || terminator == 'u' || terminator == 'U')

+ if (terminator == 'L' || terminator == 'U')

terminator = *cur++;

- if (terminator == '\"')

+ else if (terminator == 'u')

+ {

+ terminator = *cur++;

+ if (terminator == '8')

+ terminator = *cur++;

+ }

+ if (terminator == 'R')

+ {

+ lex_raw_string (pfile, token, base, cur);

+ return;

+ }

+ if (terminator == '"')

type = (*base == 'L' ? CPP_WSTRING :

*base == 'U' ? CPP_STRING32 :

- *base == 'u' ? CPP_STRING16 : CPP_STRING);

+ *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)

+ : CPP_STRING);

else if (terminator == '\'')

type = (*base == 'L' ? CPP_WCHAR :

*base == 'U' ? CPP_CHAR32 :

@@ -1146,10 +1451,21 @@ _cpp_lex_direct (cpp_reader *pfile)

case 'L':

case 'u':

case 'U':

- /* 'L', 'u' or 'U' may introduce wide characters or strings. */

+ case 'R':

+ /* 'L', 'u', 'U', 'u8' or 'R' may introduce wide characters,

+ wide strings or raw strings. */

if (c == 'L' || CPP_OPTION (pfile, uliterals))

{

- if (*buffer->cur == '\'' || *buffer->cur == '"')

+ if ((*buffer->cur == '\'' && c != 'R')

+ || *buffer->cur == '"'

+ || (*buffer->cur == 'R'

+ && c != 'R'

+ && buffer->cur[1] == '"'

+ && CPP_OPTION (pfile, uliterals))

+ || (*buffer->cur == '8'

+ && c == 'u'

+ && (buffer->cur[1] == '"'

+ || (buffer->cur[1] == 'R' && buffer->cur[2] == '"'))))

{

lex_string (pfile, result, buffer->cur - 1);

break;

@@ -1165,22 +1481,22 @@ _cpp_lex_direct (cpp_reader *pfile)

case 'y': case 'z':

case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':

case 'G': case 'H': case 'I': case 'J': case 'K':

- case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':

+ case 'M': case 'N': case 'O': case 'P': case 'Q':

case 'S': case 'T': case 'V': case 'W': case 'X':

case 'Y': case 'Z':

result->type = CPP_NAME;

{

struct normalize_state nst = INITIAL_NORMALIZE_STATE;

- result->val.node = lex_identifier (pfile, buffer->cur - 1, false,

- &nst);

+ result->val.node.node = lex_identifier (pfile, buffer->cur - 1, false,

+ &nst);

warn_about_normalization (pfile, result, &nst);

}

/* Convert named operators to their proper types. */

- if (result->val.node->flags & NODE_OPERATOR)

+ if (result->val.node.node->flags & NODE_OPERATOR)

{

result->flags |= NAMED_OP;

- result->type = (enum cpp_ttype) result->val.node->directive_index;

+ result->type = (enum cpp_ttype) result->val.node.node->directive_index;

}

break;

@@ -1295,7 +1611,7 @@ _cpp_lex_direct (cpp_reader *pfile)

result->flags |= DIGRAPH;

result->type = CPP_HASH;

if (*buffer->cur == '%' && buffer->cur[1] == ':')

- buffer->cur += 2, result->type = CPP_PASTE;

+ buffer->cur += 2, result->type = CPP_PASTE, result->val.token_no = 0;

}

else if (*buffer->cur == '>')

{

@@ -1376,7 +1692,7 @@ _cpp_lex_direct (cpp_reader *pfile)

case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;

case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;

case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;

- case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;

+ case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); result->val.token_no = 0; break;

case '?': result->type = CPP_QUERY; break;

case '~': result->type = CPP_COMPL; break;

@@ -1401,7 +1717,7 @@ _cpp_lex_direct (cpp_reader *pfile)

if (forms_identifier_p (pfile, true, &nst))

{

result->type = CPP_NAME;

- result->val.node = lex_identifier (pfile, base, true, &nst);

+ result->val.node.node = lex_identifier (pfile, base, true, &nst);

warn_about_normalization (pfile, result, &nst);

break;

}

@@ -1427,7 +1743,7 @@ cpp_token_len (const cpp_token *token)

{

default: len = 6; break;

case SPELL_LITERAL: len = token->val.str.len; break;

- case SPELL_IDENT: len = NODE_LEN (token->val.node) * 10; break;

+ case SPELL_IDENT: len = NODE_LEN (token->val.node.node) * 10; break;

}

return len;

@@ -1467,6 +1783,13 @@ utf8_to_ucn (unsigned char *buffer, const unsigned char *name)

return ucn_len;

}

+/* Given a token TYPE corresponding to a digraph, return a pointer to

+ the spelling of the digraph. */

+static const unsigned char *

+cpp_digraph2name (enum cpp_ttype type)

+ return digraph_spellings[(int) type - (int) CPP_FIRST_DIGRAPH];

/* Write the spelling of a token TOKEN to BUFFER. The buffer must

already contain the enough space to hold the token's spelling.

@@ -1486,8 +1809,7 @@ cpp_spell_token (cpp_reader *pfile, const cpp_token *token,

unsigned char c;

if (token->flags & DIGRAPH)

- spelling

- = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];

+ spelling = cpp_digraph2name (token->type);

else if (token->flags & NAMED_OP)

goto spell_ident;

else

@@ -1502,23 +1824,23 @@ cpp_spell_token (cpp_reader *pfile, const cpp_token *token,

case SPELL_IDENT:

if (forstring)

{

- memcpy (buffer, NODE_NAME (token->val.node),

- NODE_LEN (token->val.node));

- buffer += NODE_LEN (token->val.node);

+ memcpy (buffer, NODE_NAME (token->val.node.node),

+ NODE_LEN (token->val.node.node));

+ buffer += NODE_LEN (token->val.node.node);

}

else

{

size_t i;

- const unsigned char * name = NODE_NAME (token->val.node);

+ const unsigned char * name = NODE_NAME (token->val.node.node);

- for (i = 0; i < NODE_LEN (token->val.node); i++)

+ for (i = 0; i < NODE_LEN (token->val.node.node); i++)

if (name[i] & ~0x7F)

{

i += utf8_to_ucn (buffer, name + i) - 1;

buffer += 10;

}

else

- *buffer++ = NODE_NAME (token->val.node)[i];

+ *buffer++ = NODE_NAME (token->val.node.node)[i];

}

break;

@@ -1550,11 +1872,17 @@ cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)

return start;

}

-/* Used by C front ends, which really should move to using

- cpp_token_as_text. */

+/* Returns a pointer to a string which spells the token defined by

+ TYPE and FLAGS. Used by C front ends, which really should move to

+ using cpp_token_as_text. */

const char *

-cpp_type2name (enum cpp_ttype type)

+cpp_type2name (enum cpp_ttype type, unsigned char flags)

{

+ if (flags & DIGRAPH)

+ return (const char *) cpp_digraph2name (type);

+ else if (flags & NAMED_OP)

+ return cpp_named_operator2name (type);

return (const char *) token_spellings[type].name;

}

@@ -1572,8 +1900,7 @@ cpp_output_token (const cpp_token *token, FILE *fp)

int c;

if (token->flags & DIGRAPH)

- spelling

- = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];

+ spelling = cpp_digraph2name (token->type);

else if (token->flags & NAMED_OP)

goto spell_ident;

else

@@ -1590,9 +1917,9 @@ cpp_output_token (const cpp_token *token, FILE *fp)

case SPELL_IDENT:

{

size_t i;

- const unsigned char * name = NODE_NAME (token->val.node);

+ const unsigned char * name = NODE_NAME (token->val.node.node);

- for (i = 0; i < NODE_LEN (token->val.node); i++)

+ for (i = 0; i < NODE_LEN (token->val.node.node); i++)

if (name[i] & ~0x7F)

{

unsigned char buffer[10];

@@ -1600,7 +1927,7 @@ cpp_output_token (const cpp_token *token, FILE *fp)

fwrite (buffer, 1, 10, fp);

}

else

- fputc (NODE_NAME (token->val.node)[i], fp);

+ fputc (NODE_NAME (token->val.node.node)[i], fp);

}

break;

@@ -1623,11 +1950,14 @@ _cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)

{

default: /* Keep compiler happy. */

case SPELL_OPERATOR:

- return 1;

+ /* token_no is used to track where multiple consecutive ##

+ tokens were originally located. */

+ return (a->type != CPP_PASTE || a->val.token_no == b->val.token_no);

case SPELL_NONE:

- return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);

+ return (a->type != CPP_MACRO_ARG

+ || a->val.macro_arg.arg_no == b->val.macro_arg.arg_no);

case SPELL_IDENT:

- return a->val.node == b->val.node;

+ return a->val.node.node == b->val.node.node;

case SPELL_LITERAL:

return (a->val.str.len == b->val.str.len

&& !memcmp (a->val.str.text, b->val.str.text,

@@ -1937,6 +2267,11 @@ cpp_token_val_index (cpp_token *tok)

return CPP_TOKEN_FLD_NODE;

case SPELL_LITERAL:

return CPP_TOKEN_FLD_STR;

+ case SPELL_OPERATOR:

+ if (tok->type == CPP_PASTE)

+ return CPP_TOKEN_FLD_TOKEN_NO;

+ else

+ return CPP_TOKEN_FLD_NONE;

case SPELL_NONE:

if (tok->type == CPP_MACRO_ARG)

return CPP_TOKEN_FLD_ARG_NO;

« no previous file with comments | « gcc/libcpp/internal.h ('k') | gcc/libcpp/line-map.c » ('j') | no next file with comments »