From: Neil Booth Date: Wed, 23 May 2001 22:50:28 +0000 (+0000) Subject: cpp.texi: Update for handling of charconsts. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=f8710242d38de963d4d8204af1058cef8c05472b;p=gcc.git cpp.texi: Update for handling of charconsts. * cpp.texi: Update for handling of charconsts. * cpplex.c (maybe_read_ucs): Don't accept D800-DFFF. Update diagnostics. Skip to the end if the UCS is too short. (cpp_interpret_charconst): Long charconsts issue a warning not an error. * gcc.dg/cpp/charconst.c: New tests. * gcc.dg/cpp/escape.c: New tests. * gcc.dg/cpp/escape-1.c: New tests. * gcc.dg/cpp/escape-2.c: New tests. * gcc.dg/cpp/ucs.c: New tests. From-SVN: r42514 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index a29771e2be9..14102e11d02 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,11 @@ +2001-05-23 Neil Booth + + * cpp.texi: Update for handling of charconsts. + * cpplex.c (maybe_read_ucs): Don't accept D800-DFFF. Update + diagnostics. Skip to the end if the UCS is too short. + (cpp_interpret_charconst): Long charconsts issue a warning + not an error. + 2001-05-23 Richard Henderson * doc/install.texi (alpha-linux): Require binutils 2.11. diff --git a/gcc/cpp.texi b/gcc/cpp.texi index ba50a260c53..a5dc889ae92 100644 --- a/gcc/cpp.texi +++ b/gcc/cpp.texi @@ -2889,22 +2889,17 @@ same column as it did in the original source file. @item The numeric value of character constants in preprocessor expressions. -The preprocessor interprets character constants in preprocessing -directives on the host machine. Expressions outside preprocessing -directives are compiled to be interpreted on the target machine. In the -normal case of a native compiler, these two environments are the same -and so character constants will be evaluated identically in both cases. -However, in the case of a cross compiler, the values may be different. +The preprocessor and compiler interpret character constants in the same +way; escape sequences such as @code{\a} are given the values they would +have on the target machine. Multi-character character constants are interpreted a character at a time, shifting the previous result left by the number of bits per character on the host, and adding the new character. For example, 'ab' on an 8-bit host would be interpreted as 'a' * 256 + 'b'. If there are more characters in the constant than can fit in the widest native -integer type on the host, usually a @samp{long}, the behavior is -undefined. - -Evaluation of wide character constants is not properly implemented yet. +integer type on the host, usually a @samp{long}, the excess characters +are ignored and a diagnostic is given. @item Source file inclusion. diff --git a/gcc/cpplex.c b/gcc/cpplex.c index d2617494edd..3185accece0 100644 --- a/gcc/cpplex.c +++ b/gcc/cpplex.c @@ -1706,46 +1706,48 @@ maybe_read_ucs (pfile, pstr, limit, pc) if (CPP_WTRADITIONAL (pfile)) cpp_warning (pfile, "the meaning of '\\%c' varies with -traditional", c); - - for (length = (c == 'u' ? 4: 8); length; --length) - { - if (p >= limit) - { - cpp_error (pfile, "incomplete universal-character-name"); - break; - } - c = *p; - if (ISXDIGIT (c)) - { - code = (code << 4) + hex_digit_value (c); - p++; - } - else + length = (c == 'u' ? 4: 8); + + if ((size_t) (limit - p) < length) + { + cpp_error (pfile, "incomplete universal-character-name"); + /* Skip to the end to avoid more diagnostics. */ + p = limit; + } + else + { + for (; length; length--, p++) { - cpp_error (pfile, - "non-hex digit '%c' in universal-character-name", c); - break; + c = *p; + if (ISXDIGIT (c)) + code = (code << 4) + hex_digit_value (c); + else + { + cpp_error (pfile, + "non-hex digit '%c' in universal-character-name", c); + /* We shouldn't skip in case there are multibyte chars. */ + break; + } } - } #ifdef TARGET_EBCDIC cpp_error (pfile, "universal-character-name on EBCDIC target"); code = 0x3f; /* EBCDIC invalid character */ #else - if (code > 0x9f && !(code & 0x80000000)) - ; /* True extended character, OK. */ - else if (code >= 0x20 && code < 0x7f) - { - /* ASCII printable character. The C character set consists of all of - these except $, @ and `. We use hex escapes so that this also - works with EBCDIC hosts. */ - if (code != 0x24 && code != 0x40 && code != 0x60) - cpp_error (pfile, "universal-character-name used for '%c'", code); - } - else - cpp_error (pfile, "invalid universal-character-name"); + /* True extended characters are OK. */ + if (code >= 0xa0 + && !(code & 0x80000000) + && !(code >= 0xD800 && code <= 0xDFFF)) + ; + /* The standard permits $, @ and ` to be specified as UCNs. We use + hex escapes so that this also works with EBCDIC hosts. */ + else if (code == 0x24 || code == 0x40 || code == 0x60) + ; + /* Don't give another error if one occurred above. */ + else if (length == 0) + cpp_error (pfile, "universal-character-name out of range"); #endif *pstr = p; @@ -1970,7 +1972,7 @@ cpp_interpret_charconst (pfile, token, warn_multi, traditional, pchars_seen) else if (chars_seen > max_chars) { chars_seen = max_chars; - cpp_error (pfile, "character constant too long"); + cpp_warning (pfile, "character constant too long"); } else if (chars_seen > 1 && !traditional && warn_multi) cpp_warning (pfile, "multi-character character constant"); diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index f27895ac250..826918ea64e 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,11 @@ +2001-05-23 Neil Booth + + * gcc.dg/cpp/charconst.c: New tests. + * gcc.dg/cpp/escape.c: New tests. + * gcc.dg/cpp/escape-1.c: New tests. + * gcc.dg/cpp/escape-2.c: New tests. + * gcc.dg/cpp/ucs.c: New tests. + 2001-05-23 David.Billinghurst * gcc.misc-tests/linkage.exp: Pass appropriate flags to diff --git a/gcc/testsuite/gcc.dg/cpp/charconst.c b/gcc/testsuite/gcc.dg/cpp/charconst.c new file mode 100644 index 00000000000..d8a178b7a9f --- /dev/null +++ b/gcc/testsuite/gcc.dg/cpp/charconst.c @@ -0,0 +1,33 @@ +/* Copyright (C) 2001 Free Software Foundation, Inc. */ + +/* { dg-do compile } */ + +/* This tests various diagnostics about character constants, for both + the preprocessor and the compiler. + + Neil Booth, 22 May 2001. */ + +#if '' /* { dg-warning "empty" "empty charconst" } */ +#endif +#if L'' /* { dg-warning "empty" "empty wide charconst" } */ +#endif +#if 'very long' /* { dg-warning "too long" "long charconst" } */ +#endif +#if L'very long' /* { dg-warning "too long" "long wide charconst" } */ +#endif +/* Don't do this test for L'ab'; it depends upon sizeof (wchar_t). */ +#if 'ab' /* { dg-warning "multi-char" "multi-character" } */ +#endif + +void foo () +{ + int c = ''; /* { dg-warning "empty" "empty charconst" } */ + c = L''; /* { dg-warning "empty" "empty wide charconst" } */ + + c = 'very long'; /* { dg-warning "too long" "long charconst" } */ + c = L'very long'; /* { dg-warning "too long" "long wide charconst" } */ + + /* Don't do this test for L'ab'; it depends upon sizeof (wchar_t). */ + c = 'ab'; /* { dg-warning "multi-char" "multi-character" } */ + +} diff --git a/gcc/testsuite/gcc.dg/cpp/escape-1.c b/gcc/testsuite/gcc.dg/cpp/escape-1.c new file mode 100644 index 00000000000..52d7414f3ae --- /dev/null +++ b/gcc/testsuite/gcc.dg/cpp/escape-1.c @@ -0,0 +1,39 @@ +/* Copyright (C) 2001 Free Software Foundation, Inc. */ + +/* { dg-do compile } */ + +/* This tests various diagnostics about escape sequences, for both + the preprocessor and the compiler. + + Neil Booth, 22 May 2001. */ + +#if '\x' /* { dg-error "no following" "\x with no digits" } */ +#endif +#if '\x400' /* { dg-warning "out of range" "\x out of range" } */ +#endif +#if '\x0ff' /* { dg-bogus "out of range" "\x out of range" } */ +#endif +#if '\400' /* { dg-warning "out of range" "\x out of range" } */ +#endif +#if '\377' /* { dg-bogus "out of range" "bogus \x out of range" } */ +#endif +#if '\177' != 0x7f /* { dg-bogus "out of range" "bogus \x out of range" } */ +#error bad octal /* { dg-bogus "bad" "bad octal evaluation" } */ +#endif +#if '\0377' /* { dg-warning "multi" "too long octal" } */ +#endif +#if '\p' /* { dg-error "unknown escape" "unknown escape seq" } */ +#endif + +void foo () +{ + int c; + + c = '\x'; /* { dg-error "no following" "\x with no digits" } */ + c = '\x100'; /* { dg-warning "out of range" "\x out of range" } */ + c = '\x0ff'; /* { dg-bogus "out of range" "\x out of range" } */ + c = '\400'; /* { dg-warning "out of range" "\x out of range" } */ + c = '\377'; /* { dg-bogus "out of range" "bogus \x out of range" } */ + c = '\0377'; /* { dg-warning "multi" "too long octal" } */ + c = '\p'; /* { dg-error "unknown escape" "unknown escape seq" } */ +} diff --git a/gcc/testsuite/gcc.dg/cpp/escape-2.c b/gcc/testsuite/gcc.dg/cpp/escape-2.c new file mode 100644 index 00000000000..31bf882c721 --- /dev/null +++ b/gcc/testsuite/gcc.dg/cpp/escape-2.c @@ -0,0 +1,20 @@ +/* Copyright (C) 2001 Free Software Foundation, Inc. */ + +/* { dg-do compile } */ +/* { dg-options "-pedantic -std=c99 -fno-show-column" } */ + +/* This tests various diagnostics with -pedantic about escape + sequences, for both the preprocessor and the compiler. + + Neil Booth, 22 May 2001. */ + +#if '\e' /* { dg-warning "non-ISO" "non-ISO \\e" } */ +#endif +#if '\u00a0' /* { dg-bogus "unknown" "\\u is known in C99" } */ +#endif + +void foo () +{ + int c = '\E'; /* { dg-warning "non-ISO" "non-ISO \\E" } */ + c = '\u00a0'; /* { dg-bogus "unknown" "\\u is known in C99" } */ +} diff --git a/gcc/testsuite/gcc.dg/cpp/escape.c b/gcc/testsuite/gcc.dg/cpp/escape.c new file mode 100644 index 00000000000..44ad4c1d2bf --- /dev/null +++ b/gcc/testsuite/gcc.dg/cpp/escape.c @@ -0,0 +1,25 @@ +/* Copyright (C) 2001 Free Software Foundation, Inc. */ + +/* { dg-do compile } */ +/* { dg-options "-Wtraditional -std=c89 -fno-show-column" } */ + +/* This tests various diagnostics with -Wtraditioanl about escape + sequences, for both the preprocessor and the compiler. + + Neil Booth, 22 May 2001. */ + +#if '\a' /* { dg-warning "traditional" "traditional bell" } */ +#endif +#if '\x1a' != 26 /* { dg-warning "traditional" "traditional hex" } */ + #error bad hex /* { dg-bogus "bad" "bad hexadecimal evaluation" } */ +#endif +#if '\u' /* { dg-warning "unknown" "\u is unknown in C89" } */ +#endif + +void foo () +{ + int c = '\a'; /* { dg-warning "traditional" "traditional bell" } */ + + c = '\xa1'; /* { dg-warning "traditional" "traditional hex" } */ + c = '\u'; /* { dg-warning "unknown" "\u is unknown in C89" } */ +} diff --git a/gcc/testsuite/gcc.dg/cpp/ucs.c b/gcc/testsuite/gcc.dg/cpp/ucs.c new file mode 100644 index 00000000000..d131b0b5b93 --- /dev/null +++ b/gcc/testsuite/gcc.dg/cpp/ucs.c @@ -0,0 +1,35 @@ +/* Copyright (C) 2001 Free Software Foundation, Inc. */ + +/* { dg-do compile } */ +/* { dg-options "-std=c99" } */ + +/* This tests universal character sequences. + + Neil Booth, 22 May 2001. */ + +#if L'\u1234' != 0x1234 +#error bad short ucs /* { dg-bogus "bad" "bad \u1234 evaluation" } */ +#endif +#if L'\U1234abcd' != 0x1234abcd +#error bad long ucs /* { dg-bogus "bad" "bad \U1234abcd evaluation" } */ +#endif + +void foo () +{ + int c; + + c = L'\ubad'; /* { dg-error "incomplete" "incompete UCN 1" } */ + c = L"\U1234"[0]; /* { dg-error "incomplete" "incompete UCN 2" } */ + + c = L'\u000x'; /* { dg-error "non-hex" "non-hex digit in UCN" } */ + /* { dg-warning "too long" "" { target *-*-* } 24 } */ + + c = '\u0024'; /* { dg-bogus "invalid" "0024 is a valid UCN" } */ + c = "\u0040"[0]; /* { dg-bogus "invalid" "0040 is a valid UCN" } */ + c = '\u00a0'; /* { dg-bogus "invalid" "00a0 is a valid UCN" } */ + c = '\U00000060'; /* { dg-bogus "invalid" "0060 is a valid UCN" } */ + + c = '\u0025'; /* { dg-error "range" "0025 is an invalid UCN" } */ + c = L"\uD800"[0]; /* { dg-error "range" "D800 is an invalid UCN" } */ + c = L'\U0000DFFF'; /* { dg-error "range" "DFFF is an invalid UCN" } */ +}