+2004-01-16 Eric Christopher <echristo@redhat.com>
+ Chandrakala Chavva <cchavva@redhat.com>
+
+ * cppcharset.c (one_iso88591_to_utf8): New function.
+ (convert_iso88591_utf8): Ditto. Use.
+ (conversion_tab): Use.
+ (_cpp_input_to_utf8): New function.
+ (_cpp_init_iconv_buffer): Ditto.
+ (_cpp_close_iconv_buffer): Ditto.
+ * cpphash.h: Prototype new functions.
+ (cpp_buffer): Add input_cset_desc.
+ * cppinit.c: Add input_charset default.
+ * cpplib.c (cpp_push_buffer): Support init and
+ close of iconv.
+ * cpplib.h (cpp_options): Add input_charset.
+
2004-01-16 Kazu Hirata <kazu@cs.umass.edu>
* system.h (ASM_OUTPUT_SECTION_NAME): Poison.
* fixinc/tests/base/sys/stat.h: Adapt for new hackname.
* fixinc/inclhack.def (alpha___extern_prefix,
- alpha___extern_prefix_standards): New hacks to obey
+ alpha___extern_prefix_standards): New hacks to obey
__PRAGMA_EXTERN_PREFIX.
* fixinc/tests/base/testing.h [ALPHA___EXTERN_PREFIX_CHECK]: New
test.
* fixinc/tests/base/standards.h: Likewise.
-
+
* fixincl/inclhack.def (alpha_pthread): Tweak to match more
variations.
New testcase.
* fixinc/tests/base/pthread.h: Handle it.
-
+
* fixincl/inclhack.def (bad_lval): Sort file list.
Add many missing files up to Tru64 UNIX V5.1B.
* gcc/fixinc/tests/base/libgen.h: Renamed to ...
* gcc/fixinc/tests/base/dirent.h: ... this to match new file list
order.
-
+
* fixinc/fixincl.x: Regenerate.
2004-01-16 Mark Mitchell <mark@codesourcery.com>
{
static const uchar masks[6] = { 0x7F, 0x1F, 0x0F, 0x07, 0x02, 0x01 };
static const uchar patns[6] = { 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
-
+
cppchar_t c;
const uchar *inbuf = *inbufp;
size_t nbytes, i;
The return value is either 0 for success, or an errno value for
failure, which may be E2BIG (need more space), EILSEQ (ill-formed
input sequence), ir EINVAL (incomplete input sequence). */
-
+
static inline int
one_utf8_to_utf32 (iconv_t bigend, const uchar **inbufp, size_t *inbytesleftp,
uchar **outbufp, size_t *outbytesleftp)
return 0;
}
+/* The first 256 code points of ISO 8859.1 have the same numeric
+ values as the first 256 code points of Unicode, therefore the
+ incoming ISO 8859.1 character can be passed directly to
+ one_cppchar_to_utf8 (which expects a Unicode value). */
+
+static int
+one_iso88591_to_utf8 (iconv_t bigend ATTRIBUTE_UNUSED, const uchar **inbufp,
+ size_t *inbytesleftp, uchar **outbufp, size_t *outbytesleftp)
+{
+ const uchar *inbuf = *inbufp;
+ int rval;
+
+ if (*inbytesleftp > 1)
+ return EINVAL;
+
+ rval = one_cppchar_to_utf8 ((cppchar_t)*inbuf, outbufp, outbytesleftp);
+ if (rval)
+ return rval;
+
+ *inbufp += 1;
+ *inbytesleftp -= 1;
+
+ return 0;
+}
+
/* Helper routine for the next few functions. The 'const' on
one_conversion means that we promise not to modify what function is
pointed to, which lets the inliner see through it. */
outbuf = to->text + to->asize - outbytesleft;
}
}
-
+
/* These functions convert entire strings between character sets.
They all have the signature
return conversion_loop (one_utf32_to_utf8, cd, from, flen, to);
}
+static bool
+convert_iso88591_utf8 (iconv_t cd, const uchar *from, size_t flen,
+ struct _cpp_strbuf *to)
+{
+ return conversion_loop (one_iso88591_to_utf8, cd, from, flen, to);
+}
+
+
/* Identity conversion, used when we have no alternative. */
static bool
convert_no_conversion (iconv_t cd ATTRIBUTE_UNUSED,
{ "UTF-32BE/UTF-8", convert_utf32_utf8, (iconv_t)1 },
{ "UTF-16LE/UTF-8", convert_utf16_utf8, (iconv_t)0 },
{ "UTF-16BE/UTF-8", convert_utf16_utf8, (iconv_t)1 },
+ { "ISO-8859-1/UTF-8", convert_iso88591_utf8, (iconv_t)0 },
};
/* Subroutine of cpp_init_iconv: initialize and return a
struct cset_converter ret;
char *pair;
size_t i;
-
+
if (!strcasecmp (to, from))
{
ret.func = convert_no_conversion;
if (ret.cd == (iconv_t) -1)
{
if (errno == EINVAL)
- cpp_error (pfile, CPP_DL_ERROR, /* XXX should be DL_SORRY */
+ cpp_error (pfile, CPP_DL_ERROR, /* FIXME should be DL_SORRY */
"conversion from %s to %s not supported by iconv",
from, to);
else
}
else
{
- cpp_error (pfile, CPP_DL_ERROR, /* XXX should be DL_SORRY */
+ cpp_error (pfile, CPP_DL_ERROR, /* FIXME: should be DL_SORRY */
"no iconv implementation, cannot convert from %s to %s",
from, to);
ret.func = convert_no_conversion;
*unsignedp = unsigned_p;
return result;
}
-
+
/* Subroutine of cpp_interpret_charconst which performs the conversion
to a number, for wide strings. STR is the string structure returned
by cpp_interpret_string. PCHARS_SEEN and UNSIGNEDP are as for
return result;
}
+
+uchar *
+_cpp_input_to_utf8 (cpp_reader *pfile, const uchar *input, cppchar_t length)
+{
+ struct _cpp_strbuf tbuf;
+ struct cset_converter cvt = pfile->buffer->input_cset_desc;
+
+ tbuf.asize = MAX (OUTBUF_BLOCK_SIZE, length);
+ tbuf.text = xmalloc (tbuf.asize);
+ tbuf.len = 0;
+
+ if (!APPLY_CONVERSION (cvt, input, length, &tbuf))
+ {
+ cpp_error (pfile, CPP_DL_ERROR, "converting input to source character set.");
+ return NULL;
+ }
+
+ if (length)
+ tbuf.text[tbuf.len] = '\n';
+ else
+ tbuf.text[0] = '\n';
+
+ return tbuf.text;
+}
+
+ /* Check the input file format. At present assuming the input file
+ is in iso-8859-1 format. Convert this input character set to
+ source character set format (UTF-8). */
+
+void
+_cpp_init_iconv_buffer (cpp_reader *pfile, const char *from)
+{
+ pfile->buffer->input_cset_desc = init_iconv_desc (pfile, SOURCE_CHARSET,
+ from);
+}
+
+void
+_cpp_close_iconv_buffer (cpp_reader *pfile)
+{
+ if (HAVE_ICONV
+ && pfile->buffer->input_cset_desc.func == convert_using_iconv)
+ iconv_close (pfile->buffer->input_cset_desc.cd);
+}
const uchar *cur; /* Current location. */
const uchar *line_base; /* Start of current physical line. */
const uchar *next_line; /* Start of to-be-cleaned logical line. */
-
+
const uchar *buf; /* Entire character buffer. */
const uchar *rlimit; /* Writable byte at end of file. */
/* Used for buffer overlays by cpptrad.c. */
const uchar *saved_cur, *saved_rlimit;
+
+ /* Descriptor for converting from the input character set to the
+ source character set. */
+ struct cset_converter input_cset_desc;
};
/* A cpp_reader encapsulates the "state" of a pre-processor run.
extern void _cpp_do_file_change (cpp_reader *, enum lc_reason, const char *,
unsigned int, unsigned int);
extern void _cpp_pop_buffer (cpp_reader *);
+extern uchar *_cpp_input_to_utf8 (cpp_reader *, const unsigned char *, cppchar_t);
+extern void _cpp_init_iconv_buffer (cpp_reader *, const char *);
+extern void _cpp_close_iconv_buffer (cpp_reader *);
/* In cpptrad.c. */
extern bool _cpp_scan_out_logical_line (cpp_reader *, cpp_macro *);
CPP_OPTION (pfile, narrow_charset) = 0;
CPP_OPTION (pfile, wide_charset) = 0;
+ /* Default the input character set to iso-8859-1 for now. */
+ CPP_OPTION (pfile, input_charset) = "ISO-8859-1";
+
/* A fake empty "directory" used as the starting point for files
looked up without a search path. Name cannot be '/' because we
don't want to prepend anything at all to filenames using it. All
/* Undefine a single macro/assertion/whatever. */
static int
-undefine_macros (cpp_reader *pfile, cpp_hashnode *h,
+undefine_macros (cpp_reader *pfile, cpp_hashnode *h,
void *data_p ATTRIBUTE_UNUSED)
{
switch (h->type)
{
case NT_VOID:
break;
-
+
case NT_MACRO:
if (pfile->cb.undef)
(*pfile->cb.undef) (pfile, pfile->directive_line, h);
cpp_string s = { 0, 0 };
if (_cpp_interpret_string_notranslate (pfile, &token->val.str, &s))
new_file = (const char *)s.text;
-
+
new_sysp = 0;
flag = read_flag (pfile, 0);
if (flag == 1)
(*p->u.handler) (pfile);
if (pfile->cb.line_change)
(*pfile->cb.line_change) (pfile, pfile->cur_token, false);
-
+
}
else if (pfile->cb.def_pragma)
{
int from_stage3)
{
cpp_buffer *new = xobnew (&pfile->buffer_ob, cpp_buffer);
+ const char *input = CPP_OPTION (pfile, input_charset);
/* Clears, amongst other things, if_stack and mi_cmacro. */
memset (new, 0, sizeof (cpp_buffer));
new->need_line = true;
pfile->buffer = new;
+ _cpp_init_iconv_buffer (pfile, input);
+
return new;
}
/* In case of a missing #endif. */
pfile->state.skipping = 0;
+ _cpp_close_iconv_buffer (pfile);
+
/* _cpp_do_file_change expects pfile->buffer to be the new one. */
pfile->buffer = buffer->prev;
/* Holds the name of the target wide character set. */
const char *wide_charset;
+ /* Holds the name of the input character set. */
+ const char *input_charset;
+
/* True to warn about precompiled header files we couldn't use. */
bool warn_invalid_pch;
/* Mapping of file names for this directory for MS-DOS and related
platforms. A NULL-terminated array of (from, to) pairs. */
const char **name_map;
-
+
/* The C front end uses these to recognize duplicated
directories in the search path. */
ino_t ino;
{
struct ht_identifier ident;
unsigned int is_directive : 1;
- unsigned int directive_index : 7; /* If is_directive,
+ unsigned int directive_index : 7; /* If is_directive,
then index into directive table.
Otherwise, a NODE_OPERATOR. */
unsigned char rid_code; /* Rid code - for front ends. */