gcc/libcpp/charset.c - Issue 3050029: [gcc] GCC 4.5.0=>4.5.1

Unified Diff: gcc/libcpp/charset.c

Issue 3050029: [gcc] GCC 4.5.0=>4.5.1 (Closed) Base URL: ssh://git@gitrw.chromium.org:9222/nacl-toolchain.git

Patch Set: Created 10 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: gcc/libcpp/charset.c

diff --git a/gcc/libcpp/charset.c b/gcc/libcpp/charset.c

index e743b1e277fcd412972bde2d70871697075ca028..304efc8de0d55672c80fec7119136a4b351e3531 100644

--- a/gcc/libcpp/charset.c

+++ b/gcc/libcpp/charset.c

@@ -169,7 +169,7 @@ static inline int

one_utf8_to_cppchar (const uchar **inbufp, size_t *inbytesleftp,

cppchar_t *cp)

{

- static const uchar masks[6] = { 0x7F, 0x1F, 0x0F, 0x07, 0x02, 0x01 };

+ static const uchar masks[6] = { 0x7F, 0x1F, 0x0F, 0x07, 0x03, 0x01 };

static const uchar patns[6] = { 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };

cppchar_t c;

@@ -721,6 +721,8 @@ cpp_init_iconv (cpp_reader *pfile)

pfile->narrow_cset_desc = init_iconv_desc (pfile, ncset, SOURCE_CHARSET);

pfile->narrow_cset_desc.width = CPP_OPTION (pfile, char_precision);

+ pfile->utf8_cset_desc = init_iconv_desc (pfile, "UTF-8", SOURCE_CHARSET);

+ pfile->utf8_cset_desc.width = CPP_OPTION (pfile, char_precision);

pfile->char16_cset_desc = init_iconv_desc (pfile,

be ? "UTF-16BE" : "UTF-16LE",

SOURCE_CHARSET);

@@ -741,6 +743,12 @@ _cpp_destroy_iconv (cpp_reader *pfile)

{

if (pfile->narrow_cset_desc.func == convert_using_iconv)

iconv_close (pfile->narrow_cset_desc.cd);

+ if (pfile->utf8_cset_desc.func == convert_using_iconv)

+ iconv_close (pfile->utf8_cset_desc.cd);

+ if (pfile->char16_cset_desc.func == convert_using_iconv)

+ iconv_close (pfile->char16_cset_desc.cd);

+ if (pfile->char32_cset_desc.func == convert_using_iconv)

+ iconv_close (pfile->char32_cset_desc.cd);

if (pfile->wide_cset_desc.func == convert_using_iconv)

iconv_close (pfile->wide_cset_desc.cd);

}

@@ -948,10 +956,16 @@ ucn_valid_in_identifier (cpp_reader *pfile, cppchar_t c,

ISO/IEC 10646 is NNNNNNNN; the character designated by the

universal character name \uNNNN is that character whose character

short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value

- for a universal character name is less than 0x20 or in the range

- 0x7F-0x9F (inclusive), or if the universal character name

- designates a character in the basic source character set, then the

- program is ill-formed.

+ for a universal character name corresponds to a surrogate code point

+ (in the range 0xD800-0xDFFF, inclusive), the program is ill-formed.

+ Additionally, if the hexadecimal value for a universal-character-name

+ outside a character or string literal corresponds to a control character

+ (in either of the ranges 0x00-0x1F or 0x7F-0x9F, both inclusive) or to a

+ character in the basic source character set, the program is ill-formed.

+ C99 6.4.3: A universal character name shall not specify a character

+ whose short identifier is less than 00A0 other than 0024 ($), 0040 (@),

+ or 0060 (`), nor one in the range D800 through DFFF inclusive.

*PSTR must be preceded by "\u" or "\U"; it is assumed that the

buffer end is delimited by a non-hex digit. Returns zero if the

@@ -1018,9 +1032,12 @@ _cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr,

(int) (str - base), base);

result = 1;

}

- /* The standard permits $, @ and ` to be specified as UCNs. We use

- hex escapes so that this also works with EBCDIC hosts. */

+ /* The C99 standard permits $, @ and ` to be specified as UCNs. We use

+ hex escapes so that this also works with EBCDIC hosts.

+ C++0x permits everything below 0xa0 within literals;

+ ucn_valid_in_identifier will complain about identifiers. */

else if ((result < 0xa0

+ && !CPP_OPTION (pfile, cplusplus)

&& (result != 0x24 && result != 0x40 && result != 0x60))

|| (result & 0x80000000)

|| (result >= 0xD800 && result <= 0xDFFF))

@@ -1301,7 +1318,7 @@ convert_escape (cpp_reader *pfile, const uchar *from, const uchar *limit,

unknown:

if (ISGRAPH (c))

cpp_error (pfile, CPP_DL_PEDWARN,

- "unknown escape sequence '\\%c'", (int) c);

+ "unknown escape sequence: '\\%c'", (int) c);

else

{

/* diagnostic.c does not support "%03o". When it does, this

@@ -1330,6 +1347,8 @@ converter_for_type (cpp_reader *pfile, enum cpp_ttype type)

{

default:

return pfile->narrow_cset_desc;

+ case CPP_UTF8STRING:

+ return pfile->utf8_cset_desc;

case CPP_CHAR16:

case CPP_STRING16:

return pfile->char16_cset_desc;

@@ -1364,7 +1383,34 @@ cpp_interpret_string (cpp_reader *pfile, const cpp_string *from, size_t count,

for (i = 0; i < count; i++)

{

p = from[i].text;

- if (*p == 'L' || *p == 'u' || *p == 'U') p++;

+ if (*p == 'u')

+ {

+ if (*++p == '8')

+ p++;

+ }

+ else if (*p == 'L' || *p == 'U') p++;

+ if (*p == 'R')

+ {

+ const uchar *prefix;

+ /* Skip over 'R"'. */

+ p += 2;

+ prefix = p;

+ while (*p != '(')

+ p++;

+ limit = from[i].text + from[i].len;

+ if (limit >= p + (p - prefix) + 1)

+ limit -= (p - prefix) + 1;

+ /* Raw strings are all normal characters; these can be fed

+ directly to convert_cset. */

+ if (!APPLY_CONVERSION (cvt, p, limit - p, &tbuf))

+ goto fail;

+ continue;

+ }

p++; /* Skip leading quote. */

limit = from[i].text + from[i].len - 1; /* Skip trailing quote. */

« no previous file with comments | « gcc/libcpp/ChangeLog ('k') | gcc/libcpp/expr.c » ('j') | no next file with comments »