+2014-11-06 Joseph Myers <joseph@codesourcery.com>
+
+ * doc/invoke.texi (-std=c99, -std=c11): Don't refer to corner
+ cases of extended identifiers.
+
2014-11-06 Eric Botcazou <ebotcazou@adacore.com>
* tree-cfgcleanup.c (fixup_noreturn_call): Do not perform DCE here.
@itemx iso9899:1999
@itemx iso9899:199x
ISO C99. This standard is substantially completely supported, modulo
-bugs, corner cases of extended identifiers and floating-point issues
+bugs and floating-point issues
(mainly but not entirely relating to optional C99 features from
Annexes F and G). See
@w{@uref{http://gcc.gnu.org/c99status.html}} for more information. The
@itemx c1x
@itemx iso9899:2011
ISO C11, the 2011 revision of the ISO C standard. This standard is
-substantially completely supported, modulo bugs, corner cases of
-extended identifiers, floating-point issues
+substantially completely supported, modulo bugs, floating-point issues
(mainly but not entirely relating to optional C11 features from
Annexes F and G) and the optional Annexes K (Bounds-checking
interfaces) and L (Analyzability). The name @samp{c1x} is deprecated.
+2014-11-06 Joseph Myers <joseph@codesourcery.com>
+
+ * g++.dg/cpp/ucnid-2.C, g++.dg/cpp/ucnid-3.C,
+ gcc.dg/cpp/ucnid-11.c, gcc.dg/cpp/ucnid-12.c,
+ gcc.dg/cpp/ucnid-13.c, gcc.dg/cpp/ucnid-14.c,
+ gcc.dg/cpp/ucnid-15.c: New tests.
+
2014-11-06 Eric Botcazou <ebotcazou@adacore.com>
* gnat.dg/opt43.adb: New test.
--- /dev/null
+/* Test stringization of identifiers with UCNs preserves spelling. */
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <string.h>
+
+#define h(s) #s
+#define str(s) h(s)
+
+int
+main ()
+{
+ if (strcmp (str (str (\u00c1)), "\"\\u00c1\""))
+ abort ();
+ if (strcmp (str (str (\u00C1)), "\"\\u00C1\""))
+ abort ();
+}
--- /dev/null
+/* Test pasting of identifiers with UCNs preserves spelling. */
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <string.h>
+
+#define c(s1, s2) s1 ## s2
+#define h(s) #s
+#define str(s) h(s)
+
+int
+main ()
+{
+ if (strcmp (str (str (c (\u00c1, \u00C1))), "\"\\u00c1\\u00C1\""))
+ abort ();
+}
--- /dev/null
+/* Test spelling differences in UCNs are properly diagnosed for macro
+ redefinitions. */
+/* { dg-do preprocess } */
+/* { dg-options "-std=c99 -pedantic-errors" } */
+
+/* Different spelling of UCN in expansion. */
+#define m1 \u00c1 /* { dg-message "previous definition" } */
+#define m1 \u00C1 /* { dg-error "redefined" } */
+
+#define m1ok \u00c1
+#define m1ok \u00c1
+
+/* Different spelling of UCN in argument name. */
+#define m2(\u00c1) /* { dg-message "previous definition" } */
+#define m2(\u00C1) /* { dg-error "redefined" } */
+
+#define m2ok(\u00c1)
+#define m2ok(\u00c1)
+
+/* Same spelling in argument name but different spelling when used in
+ expansion. */
+#define m3(\u00c1) \u00c1 /* { dg-message "previous definition" } */
+#define m3(\u00c1) \u00C1 /* { dg-error "redefined" } */
+
+#define m3ok(\u00c1) \u00C1
+#define m3ok(\u00c1) \u00C1
+
+/* Different spelling of the macro name itself is OK. */
+#define m4ok\u00c1
+#define m4ok\u00C1
--- /dev/null
+/* Test spelling differences in UCNs in macro definitions still count
+ as the same identifier for macro expansion. */
+/* { dg-do compile } */
+/* { dg-options "-std=c99 -pedantic-errors" } */
+
+#define m1\u00c1
+#ifndef m1\u00C1
+#error not defined
+#endif
+
+#define m2(\u00c1) \u00C1
+
+int i = m2 (0);
--- /dev/null
+/* Verify macros named with UCNs are output in -dD output with UCNs,
+ not UTF-8. */
+/* { dg-do preprocess } */
+/* { dg-options "-std=c99 -dD" } */
+/* { dg-final { scan-file ucnid-13.i "\\\\U000000c1" } } */
+#define \u00c1 1
--- /dev/null
+/* Verify macro definitions with UCNs are output in -dD output with
+ the original spelling. */
+/* { dg-do preprocess } */
+/* { dg-options "-std=c99 -dD" } */
+/* { dg-final { scan-file ucnid-14.i "\\\\u00c1" } } */
+#define a \u00c1
--- /dev/null
+/* Verify macro definitions with UCNs in argument names are output in
+ -dD output with the original spelling. */
+/* { dg-do preprocess } */
+/* { dg-options "-std=c99 -dD" } */
+/* { dg-final { scan-file ucnid-15.i "#define a\\(\\\\u00c1\\) x:\\\\u00C1:y:\\\\u00c1:z" } } */
+#define a(\u00c1) x:\u00C1:y:\u00c1:z
+2014-11-06 Joseph Myers <joseph@codesourcery.com>
+
+ * include/cpp-id-data.h (struct cpp_macro): Update comment
+ regarding parameters.
+ * include/cpplib.h (struct cpp_macro_arg, struct cpp_identifier):
+ Add spelling fields.
+ (struct cpp_token): Update comment on macro_arg.
+ * internal.h (_cpp_save_parameter): Add extra argument.
+ (_cpp_spell_ident_ucns): New declaration.
+ * lex.c (lex_identifier): Add SPELLING argument. Set *SPELLING to
+ original spelling of identifier.
+ (_cpp_lex_direct): Update calls to lex_identifier.
+ (_cpp_spell_ident_ucns): New function, factored out of
+ cpp_spell_token.
+ (cpp_spell_token): Adjust FORSTRING argument semantics to return
+ original spelling of identifiers. Use _cpp_spell_ident_ucns in
+ !FORSTRING case.
+ (_cpp_equiv_tokens): Check spellings of identifiers and macro
+ arguments are identical.
+ * macro.c (macro_arg_saved_data): New structure.
+ (paste_tokens): Use original spellings of identifiers from
+ cpp_spell_token.
+ (_cpp_save_parameter): Add argument SPELLING. Save both canonical
+ node and its value.
+ (parse_params): Update calls to _cpp_save_parameter.
+ (lex_expansion_token): Save spelling of macro argument tokens.
+ (_cpp_create_definition): Extract canonical node from saved data.
+ (cpp_macro_definition): Use UCNs in spelling of macro name. Use
+ original spellings of macro argument tokens and identifiers.
+ * traditional.c (scan_parameters): Update call to
+ _cpp_save_parameter.
+
2014-11-05 Joseph Myers <joseph@codesourcery.com>
PR preprocessor/9449
/* Each macro definition is recorded in a cpp_macro structure.
Variadic macros cannot occur with traditional cpp. */
struct GTY(()) cpp_macro {
- /* Parameters, if any. */
+ /* Parameters, if any. If parameter names use extended identifiers,
+ the original spelling of those identifiers, not the canonical
+ UTF-8 spelling, goes here. */
cpp_hashnode ** GTY ((nested_ptr (union tree_node,
"%h ? CPP_HASHNODE (GCC_IDENT_TO_HT_IDENT (%h)) : NULL",
"%h ? HT_IDENT_TO_GCC_IDENT (HT_NODE (%h)) : NULL"),
struct GTY(()) cpp_macro_arg {
/* Argument number. */
unsigned int arg_no;
+ /* The original spelling of the macro argument token. */
+ cpp_hashnode *
+ GTY ((nested_ptr (union tree_node,
+ "%h ? CPP_HASHNODE (GCC_IDENT_TO_HT_IDENT (%h)) : NULL",
+ "%h ? HT_IDENT_TO_GCC_IDENT (HT_NODE (%h)) : NULL")))
+ spelling;
};
/* An identifier in the cpp_token union. */
"%h ? CPP_HASHNODE (GCC_IDENT_TO_HT_IDENT (%h)) : NULL",
"%h ? HT_IDENT_TO_GCC_IDENT (HT_NODE (%h)) : NULL")))
node;
+ /* The original spelling of the identifier. */
+ cpp_hashnode *
+ GTY ((nested_ptr (union tree_node,
+ "%h ? CPP_HASHNODE (GCC_IDENT_TO_HT_IDENT (%h)) : NULL",
+ "%h ? HT_IDENT_TO_GCC_IDENT (HT_NODE (%h)) : NULL")))
+ spelling;
};
/* A preprocessing token. This has been carefully packed and should
/* A string, or number. */
struct cpp_string GTY ((tag ("CPP_TOKEN_FLD_STR"))) str;
- /* Argument no. for a CPP_MACRO_ARG. */
+ /* Argument no. (and original spelling) for a CPP_MACRO_ARG. */
struct cpp_macro_arg GTY ((tag ("CPP_TOKEN_FLD_ARG_NO"))) macro_arg;
/* Original token no. for a CPP_PASTE (from a sequence of
extern void _cpp_pop_context (cpp_reader *);
extern void _cpp_push_text_context (cpp_reader *, cpp_hashnode *,
const unsigned char *, size_t);
-extern bool _cpp_save_parameter (cpp_reader *, cpp_macro *, cpp_hashnode *);
+extern bool _cpp_save_parameter (cpp_reader *, cpp_macro *, cpp_hashnode *,
+ cpp_hashnode *);
extern bool _cpp_arguments_ok (cpp_reader *, cpp_macro *, const cpp_hashnode *,
unsigned int);
extern const unsigned char *_cpp_builtin_macro_text (cpp_reader *,
extern cpp_token *_cpp_temp_token (cpp_reader *);
extern const cpp_token *_cpp_lex_token (cpp_reader *);
extern cpp_token *_cpp_lex_direct (cpp_reader *);
+extern unsigned char *_cpp_spell_ident_ucns (unsigned char *, cpp_hashnode *);
extern int _cpp_equiv_tokens (const cpp_token *, const cpp_token *);
extern void _cpp_init_tokenrun (tokenrun *, unsigned int);
extern cpp_hashnode *_cpp_lex_identifier (cpp_reader *, const char *);
/* Lex an identifier starting at BUFFER->CUR - 1. */
static cpp_hashnode *
lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
- struct normalize_state *nst)
+ struct normalize_state *nst, cpp_hashnode **spelling)
{
cpp_hashnode *result;
const uchar *cur;
} while (forms_identifier_p (pfile, false, nst));
result = _cpp_interpret_identifier (pfile, base,
pfile->buffer->cur - base);
+ *spelling = cpp_lookup (pfile, base, pfile->buffer->cur - base);
}
else
{
result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
base, len, hash, HT_ALLOC));
+ *spelling = result;
}
/* Rarely, identifiers require diagnostics when lexed. */
{
struct normalize_state nst = INITIAL_NORMALIZE_STATE;
result->val.node.node = lex_identifier (pfile, buffer->cur - 1, false,
- &nst);
+ &nst,
+ &result->val.node.spelling);
warn_about_normalization (pfile, result, &nst);
}
if (forms_identifier_p (pfile, true, &nst))
{
result->type = CPP_NAME;
- result->val.node.node = lex_identifier (pfile, base, true, &nst);
+ result->val.node.node = lex_identifier (pfile, base, true, &nst,
+ &result->val.node.spelling);
warn_about_normalization (pfile, result, &nst);
break;
}
return digraph_spellings[(int) type - (int) CPP_FIRST_DIGRAPH];
}
+/* Write the spelling of an identifier IDENT, using UCNs, to BUFFER.
+ The buffer must already contain the enough space to hold the
+ token's spelling. Returns a pointer to the character after the
+ last character written. */
+unsigned char *
+_cpp_spell_ident_ucns (unsigned char *buffer, cpp_hashnode *ident)
+{
+ size_t i;
+ const unsigned char *name = NODE_NAME (ident);
+
+ for (i = 0; i < NODE_LEN (ident); i++)
+ if (name[i] & ~0x7F)
+ {
+ i += utf8_to_ucn (buffer, name + i) - 1;
+ buffer += 10;
+ }
+ else
+ *buffer++ = name[i];
+
+ return buffer;
+}
+
/* Write the spelling of a token TOKEN to BUFFER. The buffer must
already contain the enough space to hold the token's spelling.
Returns a pointer to the character after the last character written.
FORSTRING is true if this is to be the spelling after translation
- phase 1 (this is different for UCNs).
+ phase 1 (with the original spelling of extended identifiers), false
+ if extended identifiers should always be written using UCNs (there is
+ no option for always writing them in the internal UTF-8 form).
FIXME: Would be nice if we didn't need the PFILE argument. */
unsigned char *
cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
case SPELL_IDENT:
if (forstring)
{
- memcpy (buffer, NODE_NAME (token->val.node.node),
- NODE_LEN (token->val.node.node));
- buffer += NODE_LEN (token->val.node.node);
+ memcpy (buffer, NODE_NAME (token->val.node.spelling),
+ NODE_LEN (token->val.node.spelling));
+ buffer += NODE_LEN (token->val.node.spelling);
}
else
- {
- size_t i;
- const unsigned char * name = NODE_NAME (token->val.node.node);
-
- for (i = 0; i < NODE_LEN (token->val.node.node); i++)
- if (name[i] & ~0x7F)
- {
- i += utf8_to_ucn (buffer, name + i) - 1;
- buffer += 10;
- }
- else
- *buffer++ = NODE_NAME (token->val.node.node)[i];
- }
+ buffer = _cpp_spell_ident_ucns (buffer, token->val.node.node);
break;
case SPELL_LITERAL:
return (a->type != CPP_PASTE || a->val.token_no == b->val.token_no);
case SPELL_NONE:
return (a->type != CPP_MACRO_ARG
- || a->val.macro_arg.arg_no == b->val.macro_arg.arg_no);
+ || (a->val.macro_arg.arg_no == b->val.macro_arg.arg_no
+ && a->val.macro_arg.spelling == b->val.macro_arg.spelling));
case SPELL_IDENT:
- return a->val.node.node == b->val.node.node;
+ return (a->val.node.node == b->val.node.node
+ && a->val.node.spelling == b->val.node.spelling);
case SPELL_LITERAL:
return (a->val.str.len == b->val.str.len
&& !memcmp (a->val.str.text, b->val.str.text,
#endif
};
+/* Saved data about an identifier being used as a macro argument
+ name. */
+struct macro_arg_saved_data {
+ /* The canonical (UTF-8) spelling of this identifier. */
+ cpp_hashnode *canonical_node;
+ /* The previous value of this identifier. */
+ union _cpp_hashnode_value value;
+};
+
/* Macro expansion. */
static int enter_macro_context (cpp_reader *, cpp_hashnode *,
len = cpp_token_len (*plhs) + cpp_token_len (rhs) + 1;
buf = (unsigned char *) alloca (len);
- end = lhsend = cpp_spell_token (pfile, *plhs, buf, false);
+ end = lhsend = cpp_spell_token (pfile, *plhs, buf, true);
/* Avoid comment headers, since they are still processed in stage 3.
It is simpler to insert a space here, rather than modifying the
*end++ = ' ';
/* In one obscure case we might see padding here. */
if (rhs->type != CPP_PADDING)
- end = cpp_spell_token (pfile, rhs, end, false);
+ end = cpp_spell_token (pfile, rhs, end, true);
*end = '\n';
cpp_push_buffer (pfile, buf, end - buf, /* from_stage3 */ true);
h->flags &= ~(NODE_BUILTIN | NODE_DISABLED | NODE_USED);
}
-/* Save parameter NODE to the parameter list of macro MACRO. Returns
- zero on success, nonzero if the parameter is a duplicate. */
+/* Save parameter NODE (spelling SPELLING) to the parameter list of
+ macro MACRO. Returns zero on success, nonzero if the parameter is
+ a duplicate. */
bool
-_cpp_save_parameter (cpp_reader *pfile, cpp_macro *macro, cpp_hashnode *node)
+_cpp_save_parameter (cpp_reader *pfile, cpp_macro *macro, cpp_hashnode *node,
+ cpp_hashnode *spelling)
{
unsigned int len;
/* Constraint 6.10.3.6 - duplicate parameter names. */
< (macro->paramc + 1) * sizeof (cpp_hashnode *))
_cpp_extend_buff (pfile, &pfile->a_buff, sizeof (cpp_hashnode *));
- ((cpp_hashnode **) BUFF_FRONT (pfile->a_buff))[macro->paramc++] = node;
+ ((cpp_hashnode **) BUFF_FRONT (pfile->a_buff))[macro->paramc++] = spelling;
node->flags |= NODE_MACRO_ARG;
- len = macro->paramc * sizeof (union _cpp_hashnode_value);
+ len = macro->paramc * sizeof (struct macro_arg_saved_data);
if (len > pfile->macro_buffer_len)
{
pfile->macro_buffer = XRESIZEVEC (unsigned char, pfile->macro_buffer,
len);
pfile->macro_buffer_len = len;
}
- ((union _cpp_hashnode_value *) pfile->macro_buffer)[macro->paramc - 1]
- = node->value;
+ struct macro_arg_saved_data save;
+ save.value = node->value;
+ save.canonical_node = node;
+ ((struct macro_arg_saved_data *) pfile->macro_buffer)[macro->paramc - 1]
+ = save;
node->value.arg_index = macro->paramc;
return false;
}
prev_ident = 1;
- if (_cpp_save_parameter (pfile, macro, token->val.node.node))
+ if (_cpp_save_parameter (pfile, macro, token->val.node.node,
+ token->val.node.spelling))
return false;
continue;
if (!prev_ident)
{
_cpp_save_parameter (pfile, macro,
+ pfile->spec_nodes.n__VA_ARGS__,
pfile->spec_nodes.n__VA_ARGS__);
pfile->state.va_args_ok = 1;
if (! CPP_OPTION (pfile, c99)
if (token->type == CPP_NAME
&& (token->val.node.node->flags & NODE_MACRO_ARG) != 0)
{
+ cpp_hashnode *spelling = token->val.node.spelling;
token->type = CPP_MACRO_ARG;
token->val.macro_arg.arg_no = token->val.node.node->value.arg_index;
+ token->val.macro_arg.spelling = spelling;
}
else if (CPP_WTRADITIONAL (pfile) && macro->paramc > 0
&& (token->type == CPP_STRING || token->type == CPP_CHAR))
/* Clear the fast argument lookup indices. */
for (i = macro->paramc; i-- > 0; )
{
- struct cpp_hashnode *node = macro->params[i];
+ struct macro_arg_saved_data *save =
+ &((struct macro_arg_saved_data *) pfile->macro_buffer)[i];
+ struct cpp_hashnode *node = save->canonical_node;
node->flags &= ~ NODE_MACRO_ARG;
- node->value = ((union _cpp_hashnode_value *) pfile->macro_buffer)[i];
+ node->value = save->value;
}
if (!ok)
macro = node->value.macro;
/* Calculate length. */
- len = NODE_LEN (node) + 2; /* ' ' and NUL. */
+ len = NODE_LEN (node) * 10 + 2; /* ' ' and NUL. */
if (macro->fun_like)
{
len += 4; /* "()" plus possible final ".." of named
cpp_token *token = ¯o->exp.tokens[i];
if (token->type == CPP_MACRO_ARG)
- len += NODE_LEN (macro->params[token->val.macro_arg.arg_no - 1]);
+ len += NODE_LEN (token->val.macro_arg.spelling);
else
len += cpp_token_len (token);
/* Fill in the buffer. Start with the macro name. */
buffer = pfile->macro_buffer;
- memcpy (buffer, NODE_NAME (node), NODE_LEN (node));
- buffer += NODE_LEN (node);
+ buffer = _cpp_spell_ident_ucns (buffer, node);
/* Parameter names. */
if (macro->fun_like)
if (token->type == CPP_MACRO_ARG)
{
memcpy (buffer,
- NODE_NAME (macro->params[token->val.macro_arg.arg_no - 1]),
- NODE_LEN (macro->params[token->val.macro_arg.arg_no - 1]));
- buffer += NODE_LEN (macro->params[token->val.macro_arg.arg_no - 1]);
+ NODE_NAME (token->val.macro_arg.spelling),
+ NODE_LEN (token->val.macro_arg.spelling));
+ buffer += NODE_LEN (token->val.macro_arg.spelling);
}
else
- buffer = cpp_spell_token (pfile, token, buffer, false);
+ buffer = cpp_spell_token (pfile, token, buffer, true);
if (token->flags & PASTE_LEFT)
{
if (is_idstart (*cur))
{
+ struct cpp_hashnode *id = lex_identifier (pfile, cur);
ok = false;
- if (_cpp_save_parameter (pfile, macro, lex_identifier (pfile, cur)))
+ if (_cpp_save_parameter (pfile, macro, id, id))
break;
cur = skip_whitespace (pfile, CUR (pfile->context),
true /* skip_comments */);