* cpp.texi: Update for handling of charconsts.
* cpplex.c (maybe_read_ucs): Don't accept D800-DFFF. Update
diagnostics. Skip to the end if the UCS is too short.
(cpp_interpret_charconst): Long charconsts issue a warning
not an error.
* gcc.dg/cpp/charconst.c: New tests.
* gcc.dg/cpp/escape.c: New tests.
* gcc.dg/cpp/escape-1.c: New tests.
* gcc.dg/cpp/escape-2.c: New tests.
* gcc.dg/cpp/ucs.c: New tests.
From-SVN: r42514
+2001-05-23 Neil Booth <neil@daikokuya.demon.co.uk>
+
+ * cpp.texi: Update for handling of charconsts.
+ * cpplex.c (maybe_read_ucs): Don't accept D800-DFFF. Update
+ diagnostics. Skip to the end if the UCS is too short.
+ (cpp_interpret_charconst): Long charconsts issue a warning
+ not an error.
+
2001-05-23 Richard Henderson <rth@redhat.com>
* doc/install.texi (alpha-linux): Require binutils 2.11.
@item The numeric value of character constants in preprocessor expressions.
-The preprocessor interprets character constants in preprocessing
-directives on the host machine. Expressions outside preprocessing
-directives are compiled to be interpreted on the target machine. In the
-normal case of a native compiler, these two environments are the same
-and so character constants will be evaluated identically in both cases.
-However, in the case of a cross compiler, the values may be different.
+The preprocessor and compiler interpret character constants in the same
+way; escape sequences such as @code{\a} are given the values they would
+have on the target machine.
Multi-character character constants are interpreted a character at a
time, shifting the previous result left by the number of bits per
character on the host, and adding the new character. For example, 'ab'
on an 8-bit host would be interpreted as 'a' * 256 + 'b'. If there are
more characters in the constant than can fit in the widest native
-integer type on the host, usually a @samp{long}, the behavior is
-undefined.
-
-Evaluation of wide character constants is not properly implemented yet.
+integer type on the host, usually a @samp{long}, the excess characters
+are ignored and a diagnostic is given.
@item Source file inclusion.
if (CPP_WTRADITIONAL (pfile))
cpp_warning (pfile, "the meaning of '\\%c' varies with -traditional", c);
-
- for (length = (c == 'u' ? 4: 8); length; --length)
- {
- if (p >= limit)
- {
- cpp_error (pfile, "incomplete universal-character-name");
- break;
- }
- c = *p;
- if (ISXDIGIT (c))
- {
- code = (code << 4) + hex_digit_value (c);
- p++;
- }
- else
+ length = (c == 'u' ? 4: 8);
+
+ if ((size_t) (limit - p) < length)
+ {
+ cpp_error (pfile, "incomplete universal-character-name");
+ /* Skip to the end to avoid more diagnostics. */
+ p = limit;
+ }
+ else
+ {
+ for (; length; length--, p++)
{
- cpp_error (pfile,
- "non-hex digit '%c' in universal-character-name", c);
- break;
+ c = *p;
+ if (ISXDIGIT (c))
+ code = (code << 4) + hex_digit_value (c);
+ else
+ {
+ cpp_error (pfile,
+ "non-hex digit '%c' in universal-character-name", c);
+ /* We shouldn't skip in case there are multibyte chars. */
+ break;
+ }
}
-
}
#ifdef TARGET_EBCDIC
cpp_error (pfile, "universal-character-name on EBCDIC target");
code = 0x3f; /* EBCDIC invalid character */
#else
- if (code > 0x9f && !(code & 0x80000000))
- ; /* True extended character, OK. */
- else if (code >= 0x20 && code < 0x7f)
- {
- /* ASCII printable character. The C character set consists of all of
- these except $, @ and `. We use hex escapes so that this also
- works with EBCDIC hosts. */
- if (code != 0x24 && code != 0x40 && code != 0x60)
- cpp_error (pfile, "universal-character-name used for '%c'", code);
- }
- else
- cpp_error (pfile, "invalid universal-character-name");
+ /* True extended characters are OK. */
+ if (code >= 0xa0
+ && !(code & 0x80000000)
+ && !(code >= 0xD800 && code <= 0xDFFF))
+ ;
+ /* The standard permits $, @ and ` to be specified as UCNs. We use
+ hex escapes so that this also works with EBCDIC hosts. */
+ else if (code == 0x24 || code == 0x40 || code == 0x60)
+ ;
+ /* Don't give another error if one occurred above. */
+ else if (length == 0)
+ cpp_error (pfile, "universal-character-name out of range");
#endif
*pstr = p;
else if (chars_seen > max_chars)
{
chars_seen = max_chars;
- cpp_error (pfile, "character constant too long");
+ cpp_warning (pfile, "character constant too long");
}
else if (chars_seen > 1 && !traditional && warn_multi)
cpp_warning (pfile, "multi-character character constant");
+2001-05-23 Neil Booth <neil@daikokuya.demon.co.uk>
+
+ * gcc.dg/cpp/charconst.c: New tests.
+ * gcc.dg/cpp/escape.c: New tests.
+ * gcc.dg/cpp/escape-1.c: New tests.
+ * gcc.dg/cpp/escape-2.c: New tests.
+ * gcc.dg/cpp/ucs.c: New tests.
+
2001-05-23 David.Billinghurst <David.Billinghurst@riotinto.com>
* gcc.misc-tests/linkage.exp: Pass appropriate flags to
--- /dev/null
+/* Copyright (C) 2001 Free Software Foundation, Inc. */
+
+/* { dg-do compile } */
+
+/* This tests various diagnostics about character constants, for both
+ the preprocessor and the compiler.
+
+ Neil Booth, 22 May 2001. */
+
+#if '' /* { dg-warning "empty" "empty charconst" } */
+#endif
+#if L'' /* { dg-warning "empty" "empty wide charconst" } */
+#endif
+#if 'very long' /* { dg-warning "too long" "long charconst" } */
+#endif
+#if L'very long' /* { dg-warning "too long" "long wide charconst" } */
+#endif
+/* Don't do this test for L'ab'; it depends upon sizeof (wchar_t). */
+#if 'ab' /* { dg-warning "multi-char" "multi-character" } */
+#endif
+
+void foo ()
+{
+ int c = ''; /* { dg-warning "empty" "empty charconst" } */
+ c = L''; /* { dg-warning "empty" "empty wide charconst" } */
+
+ c = 'very long'; /* { dg-warning "too long" "long charconst" } */
+ c = L'very long'; /* { dg-warning "too long" "long wide charconst" } */
+
+ /* Don't do this test for L'ab'; it depends upon sizeof (wchar_t). */
+ c = 'ab'; /* { dg-warning "multi-char" "multi-character" } */
+
+}
--- /dev/null
+/* Copyright (C) 2001 Free Software Foundation, Inc. */
+
+/* { dg-do compile } */
+
+/* This tests various diagnostics about escape sequences, for both
+ the preprocessor and the compiler.
+
+ Neil Booth, 22 May 2001. */
+
+#if '\x' /* { dg-error "no following" "\x with no digits" } */
+#endif
+#if '\x400' /* { dg-warning "out of range" "\x out of range" } */
+#endif
+#if '\x0ff' /* { dg-bogus "out of range" "\x out of range" } */
+#endif
+#if '\400' /* { dg-warning "out of range" "\x out of range" } */
+#endif
+#if '\377' /* { dg-bogus "out of range" "bogus \x out of range" } */
+#endif
+#if '\177' != 0x7f /* { dg-bogus "out of range" "bogus \x out of range" } */
+#error bad octal /* { dg-bogus "bad" "bad octal evaluation" } */
+#endif
+#if '\0377' /* { dg-warning "multi" "too long octal" } */
+#endif
+#if '\p' /* { dg-error "unknown escape" "unknown escape seq" } */
+#endif
+
+void foo ()
+{
+ int c;
+
+ c = '\x'; /* { dg-error "no following" "\x with no digits" } */
+ c = '\x100'; /* { dg-warning "out of range" "\x out of range" } */
+ c = '\x0ff'; /* { dg-bogus "out of range" "\x out of range" } */
+ c = '\400'; /* { dg-warning "out of range" "\x out of range" } */
+ c = '\377'; /* { dg-bogus "out of range" "bogus \x out of range" } */
+ c = '\0377'; /* { dg-warning "multi" "too long octal" } */
+ c = '\p'; /* { dg-error "unknown escape" "unknown escape seq" } */
+}
--- /dev/null
+/* Copyright (C) 2001 Free Software Foundation, Inc. */
+
+/* { dg-do compile } */
+/* { dg-options "-pedantic -std=c99 -fno-show-column" } */
+
+/* This tests various diagnostics with -pedantic about escape
+ sequences, for both the preprocessor and the compiler.
+
+ Neil Booth, 22 May 2001. */
+
+#if '\e' /* { dg-warning "non-ISO" "non-ISO \\e" } */
+#endif
+#if '\u00a0' /* { dg-bogus "unknown" "\\u is known in C99" } */
+#endif
+
+void foo ()
+{
+ int c = '\E'; /* { dg-warning "non-ISO" "non-ISO \\E" } */
+ c = '\u00a0'; /* { dg-bogus "unknown" "\\u is known in C99" } */
+}
--- /dev/null
+/* Copyright (C) 2001 Free Software Foundation, Inc. */
+
+/* { dg-do compile } */
+/* { dg-options "-Wtraditional -std=c89 -fno-show-column" } */
+
+/* This tests various diagnostics with -Wtraditioanl about escape
+ sequences, for both the preprocessor and the compiler.
+
+ Neil Booth, 22 May 2001. */
+
+#if '\a' /* { dg-warning "traditional" "traditional bell" } */
+#endif
+#if '\x1a' != 26 /* { dg-warning "traditional" "traditional hex" } */
+ #error bad hex /* { dg-bogus "bad" "bad hexadecimal evaluation" } */
+#endif
+#if '\u' /* { dg-warning "unknown" "\u is unknown in C89" } */
+#endif
+
+void foo ()
+{
+ int c = '\a'; /* { dg-warning "traditional" "traditional bell" } */
+
+ c = '\xa1'; /* { dg-warning "traditional" "traditional hex" } */
+ c = '\u'; /* { dg-warning "unknown" "\u is unknown in C89" } */
+}
--- /dev/null
+/* Copyright (C) 2001 Free Software Foundation, Inc. */
+
+/* { dg-do compile } */
+/* { dg-options "-std=c99" } */
+
+/* This tests universal character sequences.
+
+ Neil Booth, 22 May 2001. */
+
+#if L'\u1234' != 0x1234
+#error bad short ucs /* { dg-bogus "bad" "bad \u1234 evaluation" } */
+#endif
+#if L'\U1234abcd' != 0x1234abcd
+#error bad long ucs /* { dg-bogus "bad" "bad \U1234abcd evaluation" } */
+#endif
+
+void foo ()
+{
+ int c;
+
+ c = L'\ubad'; /* { dg-error "incomplete" "incompete UCN 1" } */
+ c = L"\U1234"[0]; /* { dg-error "incomplete" "incompete UCN 2" } */
+
+ c = L'\u000x'; /* { dg-error "non-hex" "non-hex digit in UCN" } */
+ /* { dg-warning "too long" "" { target *-*-* } 24 } */
+
+ c = '\u0024'; /* { dg-bogus "invalid" "0024 is a valid UCN" } */
+ c = "\u0040"[0]; /* { dg-bogus "invalid" "0040 is a valid UCN" } */
+ c = '\u00a0'; /* { dg-bogus "invalid" "00a0 is a valid UCN" } */
+ c = '\U00000060'; /* { dg-bogus "invalid" "0060 is a valid UCN" } */
+
+ c = '\u0025'; /* { dg-error "range" "0025 is an invalid UCN" } */
+ c = L"\uD800"[0]; /* { dg-error "range" "D800 is an invalid UCN" } */
+ c = L'\U0000DFFF'; /* { dg-error "range" "DFFF is an invalid UCN" } */
+}