Daily bump.
[gcc.git] / gcc / pretty-print.c
1 /* Various declarations for language-independent pretty-print subroutines.
2 Copyright (C) 2003, 2004, 2005, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4 Contributed by Gabriel Dos Reis <gdr@integrable-solutions.net>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "intl.h"
26 #include "pretty-print.h"
27
28 #if HAVE_ICONV
29 #include <iconv.h>
30 #endif
31
32 /* A pointer to the formatted diagnostic message. */
33 #define pp_formatted_text_data(PP) \
34 ((const char *) obstack_base (pp_base (PP)->buffer->obstack))
35
36 /* Format an integer given by va_arg (ARG, type-specifier T) where
37 type-specifier is a precision modifier as indicated by PREC. F is
38 a string used to construct the appropriate format-specifier. */
39 #define pp_integer_with_precision(PP, ARG, PREC, T, F) \
40 do \
41 switch (PREC) \
42 { \
43 case 0: \
44 pp_scalar (PP, "%" F, va_arg (ARG, T)); \
45 break; \
46 \
47 case 1: \
48 pp_scalar (PP, "%l" F, va_arg (ARG, long T)); \
49 break; \
50 \
51 case 2: \
52 pp_scalar (PP, "%" HOST_LONG_LONG_FORMAT F, va_arg (ARG, long long T)); \
53 break; \
54 \
55 default: \
56 break; \
57 } \
58 while (0)
59
60
61 /* Subroutine of pp_set_maximum_length. Set up PRETTY-PRINTER's
62 internal maximum characters per line. */
63 static void
64 pp_set_real_maximum_length (pretty_printer *pp)
65 {
66 /* If we're told not to wrap lines then do the obvious thing. In case
67 we'll emit prefix only once per message, it is appropriate
68 not to increase unnecessarily the line-length cut-off. */
69 if (!pp_is_wrapping_line (pp)
70 || pp_prefixing_rule (pp) == DIAGNOSTICS_SHOW_PREFIX_ONCE
71 || pp_prefixing_rule (pp) == DIAGNOSTICS_SHOW_PREFIX_NEVER)
72 pp->maximum_length = pp_line_cutoff (pp);
73 else
74 {
75 int prefix_length = pp->prefix ? strlen (pp->prefix) : 0;
76 /* If the prefix is ridiculously too long, output at least
77 32 characters. */
78 if (pp_line_cutoff (pp) - prefix_length < 32)
79 pp->maximum_length = pp_line_cutoff (pp) + 32;
80 else
81 pp->maximum_length = pp_line_cutoff (pp);
82 }
83 }
84
85 /* Clear PRETTY-PRINTER's output state. */
86 static inline void
87 pp_clear_state (pretty_printer *pp)
88 {
89 pp->emitted_prefix = false;
90 pp_indentation (pp) = 0;
91 }
92
93 /* Flush the formatted text of PRETTY-PRINTER onto the attached stream. */
94 void
95 pp_write_text_to_stream (pretty_printer *pp)
96 {
97 const char *text = pp_formatted_text (pp);
98 fputs (text, pp->buffer->stream);
99 pp_clear_output_area (pp);
100 }
101
102 /* Wrap a text delimited by START and END into PRETTY-PRINTER. */
103 static void
104 pp_wrap_text (pretty_printer *pp, const char *start, const char *end)
105 {
106 bool wrapping_line = pp_is_wrapping_line (pp);
107
108 while (start != end)
109 {
110 /* Dump anything bordered by whitespaces. */
111 {
112 const char *p = start;
113 while (p != end && !ISBLANK (*p) && *p != '\n')
114 ++p;
115 if (wrapping_line
116 && p - start >= pp_remaining_character_count_for_line (pp))
117 pp_newline (pp);
118 pp_append_text (pp, start, p);
119 start = p;
120 }
121
122 if (start != end && ISBLANK (*start))
123 {
124 pp_space (pp);
125 ++start;
126 }
127 if (start != end && *start == '\n')
128 {
129 pp_newline (pp);
130 ++start;
131 }
132 }
133 }
134
135 /* Same as pp_wrap_text but wrap text only when in line-wrapping mode. */
136 static inline void
137 pp_maybe_wrap_text (pretty_printer *pp, const char *start, const char *end)
138 {
139 if (pp_is_wrapping_line (pp))
140 pp_wrap_text (pp, start, end);
141 else
142 pp_append_text (pp, start, end);
143 }
144
145 /* Append to the output area of PRETTY-PRINTER a string specified by its
146 STARTing character and LENGTH. */
147 static inline void
148 pp_append_r (pretty_printer *pp, const char *start, int length)
149 {
150 obstack_grow (pp->buffer->obstack, start, length);
151 pp->buffer->line_length += length;
152 }
153
154 /* Insert enough spaces into the output area of PRETTY-PRINTER to bring
155 the column position to the current indentation level, assuming that a
156 newline has just been written to the buffer. */
157 void
158 pp_base_indent (pretty_printer *pp)
159 {
160 int n = pp_indentation (pp);
161 int i;
162
163 for (i = 0; i < n; ++i)
164 pp_space (pp);
165 }
166
167 /* The following format specifiers are recognized as being client independent:
168 %d, %i: (signed) integer in base ten.
169 %u: unsigned integer in base ten.
170 %o: unsigned integer in base eight.
171 %x: unsigned integer in base sixteen.
172 %ld, %li, %lo, %lu, %lx: long versions of the above.
173 %lld, %lli, %llo, %llu, %llx: long long versions.
174 %wd, %wi, %wo, %wu, %wx: HOST_WIDE_INT versions.
175 %c: character.
176 %s: string.
177 %p: pointer.
178 %m: strerror(text->err_no) - does not consume a value from args_ptr.
179 %%: '%'.
180 %<: opening quote.
181 %>: closing quote.
182 %': apostrophe (should only be used in untranslated messages;
183 translations should use appropriate punctuation directly).
184 %.*s: a substring the length of which is specified by an argument
185 integer.
186 %Ns: likewise, but length specified as constant in the format string.
187 Flag 'q': quote formatted text (must come immediately after '%').
188
189 Arguments can be used sequentially, or through %N$ resp. *N$
190 notation Nth argument after the format string. If %N$ / *N$
191 notation is used, it must be used for all arguments, except %m, %%,
192 %<, %> and %', which may not have a number, as they do not consume
193 an argument. When %M$.*N$s is used, M must be N + 1. (This may
194 also be written %M$.*s, provided N is not otherwise used.) The
195 format string must have conversion specifiers with argument numbers
196 1 up to highest argument; each argument may only be used once.
197 A format string can have at most 30 arguments. */
198
199 /* Formatting phases 1 and 2: render TEXT->format_spec plus
200 TEXT->args_ptr into a series of chunks in PP->buffer->args[].
201 Phase 3 is in pp_base_format_text. */
202
203 void
204 pp_base_format (pretty_printer *pp, text_info *text)
205 {
206 output_buffer *buffer = pp->buffer;
207 const char *p;
208 const char **args;
209 struct chunk_info *new_chunk_array;
210
211 unsigned int curarg = 0, chunk = 0, argno;
212 pp_wrapping_mode_t old_wrapping_mode;
213 bool any_unnumbered = false, any_numbered = false;
214 const char **formatters[PP_NL_ARGMAX];
215
216 /* Allocate a new chunk structure. */
217 new_chunk_array = XOBNEW (&buffer->chunk_obstack, struct chunk_info);
218 new_chunk_array->prev = buffer->cur_chunk_array;
219 buffer->cur_chunk_array = new_chunk_array;
220 args = new_chunk_array->args;
221
222 /* Formatting phase 1: split up TEXT->format_spec into chunks in
223 PP->buffer->args[]. Even-numbered chunks are to be output
224 verbatim, odd-numbered chunks are format specifiers.
225 %m, %%, %<, %>, and %' are replaced with the appropriate text at
226 this point. */
227
228 memset (formatters, 0, sizeof formatters);
229
230 for (p = text->format_spec; *p; )
231 {
232 while (*p != '\0' && *p != '%')
233 {
234 obstack_1grow (&buffer->chunk_obstack, *p);
235 p++;
236 }
237
238 if (*p == '\0')
239 break;
240
241 switch (*++p)
242 {
243 case '\0':
244 gcc_unreachable ();
245
246 case '%':
247 obstack_1grow (&buffer->chunk_obstack, '%');
248 p++;
249 continue;
250
251 case '<':
252 obstack_grow (&buffer->chunk_obstack,
253 open_quote, strlen (open_quote));
254 p++;
255 continue;
256
257 case '>':
258 case '\'':
259 obstack_grow (&buffer->chunk_obstack,
260 close_quote, strlen (close_quote));
261 p++;
262 continue;
263
264 case 'm':
265 {
266 const char *errstr = xstrerror (text->err_no);
267 obstack_grow (&buffer->chunk_obstack, errstr, strlen (errstr));
268 }
269 p++;
270 continue;
271
272 default:
273 /* Handled in phase 2. Terminate the plain chunk here. */
274 obstack_1grow (&buffer->chunk_obstack, '\0');
275 gcc_assert (chunk < PP_NL_ARGMAX * 2);
276 args[chunk++] = XOBFINISH (&buffer->chunk_obstack, const char *);
277 break;
278 }
279
280 if (ISDIGIT (*p))
281 {
282 char *end;
283 argno = strtoul (p, &end, 10) - 1;
284 p = end;
285 gcc_assert (*p == '$');
286 p++;
287
288 any_numbered = true;
289 gcc_assert (!any_unnumbered);
290 }
291 else
292 {
293 argno = curarg++;
294 any_unnumbered = true;
295 gcc_assert (!any_numbered);
296 }
297 gcc_assert (argno < PP_NL_ARGMAX);
298 gcc_assert (!formatters[argno]);
299 formatters[argno] = &args[chunk];
300 do
301 {
302 obstack_1grow (&buffer->chunk_obstack, *p);
303 p++;
304 }
305 while (strchr ("qwl+#", p[-1]));
306
307 if (p[-1] == '.')
308 {
309 /* We handle '%.Ns' and '%.*s' or '%M$.*N$s'
310 (where M == N + 1). */
311 if (ISDIGIT (*p))
312 {
313 do
314 {
315 obstack_1grow (&buffer->chunk_obstack, *p);
316 p++;
317 }
318 while (ISDIGIT (p[-1]));
319 gcc_assert (p[-1] == 's');
320 }
321 else
322 {
323 gcc_assert (*p == '*');
324 obstack_1grow (&buffer->chunk_obstack, '*');
325 p++;
326
327 if (ISDIGIT (*p))
328 {
329 char *end;
330 unsigned int argno2 = strtoul (p, &end, 10) - 1;
331 p = end;
332 gcc_assert (argno2 == argno - 1);
333 gcc_assert (!any_unnumbered);
334 gcc_assert (*p == '$');
335
336 p++;
337 formatters[argno2] = formatters[argno];
338 }
339 else
340 {
341 gcc_assert (!any_numbered);
342 formatters[argno+1] = formatters[argno];
343 curarg++;
344 }
345 gcc_assert (*p == 's');
346 obstack_1grow (&buffer->chunk_obstack, 's');
347 p++;
348 }
349 }
350 if (*p == '\0')
351 break;
352
353 obstack_1grow (&buffer->chunk_obstack, '\0');
354 gcc_assert (chunk < PP_NL_ARGMAX * 2);
355 args[chunk++] = XOBFINISH (&buffer->chunk_obstack, const char *);
356 }
357
358 obstack_1grow (&buffer->chunk_obstack, '\0');
359 gcc_assert (chunk < PP_NL_ARGMAX * 2);
360 args[chunk++] = XOBFINISH (&buffer->chunk_obstack, const char *);
361 args[chunk] = 0;
362
363 /* Set output to the argument obstack, and switch line-wrapping and
364 prefixing off. */
365 buffer->obstack = &buffer->chunk_obstack;
366 old_wrapping_mode = pp_set_verbatim_wrapping (pp);
367
368 /* Second phase. Replace each formatter with the formatted text it
369 corresponds to. */
370
371 for (argno = 0; formatters[argno]; argno++)
372 {
373 int precision = 0;
374 bool wide = false;
375 bool plus = false;
376 bool hash = false;
377 bool quote = false;
378
379 /* We do not attempt to enforce any ordering on the modifier
380 characters. */
381
382 for (p = *formatters[argno];; p++)
383 {
384 switch (*p)
385 {
386 case 'q':
387 gcc_assert (!quote);
388 quote = true;
389 continue;
390
391 case '+':
392 gcc_assert (!plus);
393 plus = true;
394 continue;
395
396 case '#':
397 gcc_assert (!hash);
398 hash = true;
399 continue;
400
401 case 'w':
402 gcc_assert (!wide);
403 wide = true;
404 continue;
405
406 case 'l':
407 /* We don't support precision beyond that of "long long". */
408 gcc_assert (precision < 2);
409 precision++;
410 continue;
411 }
412 break;
413 }
414
415 gcc_assert (!wide || precision == 0);
416
417 if (quote)
418 pp_string (pp, open_quote);
419
420 switch (*p)
421 {
422 case 'c':
423 pp_character (pp, va_arg (*text->args_ptr, int));
424 break;
425
426 case 'd':
427 case 'i':
428 if (wide)
429 pp_wide_integer (pp, va_arg (*text->args_ptr, HOST_WIDE_INT));
430 else
431 pp_integer_with_precision
432 (pp, *text->args_ptr, precision, int, "d");
433 break;
434
435 case 'o':
436 if (wide)
437 pp_scalar (pp, "%" HOST_WIDE_INT_PRINT "o",
438 va_arg (*text->args_ptr, unsigned HOST_WIDE_INT));
439 else
440 pp_integer_with_precision
441 (pp, *text->args_ptr, precision, unsigned, "o");
442 break;
443
444 case 's':
445 pp_string (pp, va_arg (*text->args_ptr, const char *));
446 break;
447
448 case 'p':
449 pp_pointer (pp, va_arg (*text->args_ptr, void *));
450 break;
451
452 case 'u':
453 if (wide)
454 pp_scalar (pp, HOST_WIDE_INT_PRINT_UNSIGNED,
455 va_arg (*text->args_ptr, unsigned HOST_WIDE_INT));
456 else
457 pp_integer_with_precision
458 (pp, *text->args_ptr, precision, unsigned, "u");
459 break;
460
461 case 'x':
462 if (wide)
463 pp_scalar (pp, HOST_WIDE_INT_PRINT_HEX,
464 va_arg (*text->args_ptr, unsigned HOST_WIDE_INT));
465 else
466 pp_integer_with_precision
467 (pp, *text->args_ptr, precision, unsigned, "x");
468 break;
469
470 case '.':
471 {
472 int n;
473 const char *s;
474
475 /* We handle '%.Ns' and '%.*s' or '%M$.*N$s'
476 (where M == N + 1). The format string should be verified
477 already from the first phase. */
478 p++;
479 if (ISDIGIT (*p))
480 {
481 char *end;
482 n = strtoul (p, &end, 10);
483 p = end;
484 gcc_assert (*p == 's');
485 }
486 else
487 {
488 gcc_assert (*p == '*');
489 p++;
490 gcc_assert (*p == 's');
491 n = va_arg (*text->args_ptr, int);
492
493 /* This consumes a second entry in the formatters array. */
494 gcc_assert (formatters[argno] == formatters[argno+1]);
495 argno++;
496 }
497
498 s = va_arg (*text->args_ptr, const char *);
499 pp_append_text (pp, s, s + n);
500 }
501 break;
502
503 default:
504 {
505 bool ok;
506
507 gcc_assert (pp_format_decoder (pp));
508 ok = pp_format_decoder (pp) (pp, text, p,
509 precision, wide, plus, hash);
510 gcc_assert (ok);
511 }
512 }
513
514 if (quote)
515 pp_string (pp, close_quote);
516
517 obstack_1grow (&buffer->chunk_obstack, '\0');
518 *formatters[argno] = XOBFINISH (&buffer->chunk_obstack, const char *);
519 }
520
521 #ifdef ENABLE_CHECKING
522 for (; argno < PP_NL_ARGMAX; argno++)
523 gcc_assert (!formatters[argno]);
524 #endif
525
526 /* Revert to normal obstack and wrapping mode. */
527 buffer->obstack = &buffer->formatted_obstack;
528 buffer->line_length = 0;
529 pp_wrapping_mode (pp) = old_wrapping_mode;
530 pp_clear_state (pp);
531 }
532
533 /* Format of a message pointed to by TEXT. */
534 void
535 pp_base_output_formatted_text (pretty_printer *pp)
536 {
537 unsigned int chunk;
538 output_buffer *buffer = pp_buffer (pp);
539 struct chunk_info *chunk_array = buffer->cur_chunk_array;
540 const char **args = chunk_array->args;
541
542 gcc_assert (buffer->obstack == &buffer->formatted_obstack);
543 gcc_assert (buffer->line_length == 0);
544
545 /* This is a third phase, first 2 phases done in pp_base_format_args.
546 Now we actually print it. */
547 for (chunk = 0; args[chunk]; chunk++)
548 pp_string (pp, args[chunk]);
549
550 /* Deallocate the chunk structure and everything after it (i.e. the
551 associated series of formatted strings). */
552 buffer->cur_chunk_array = chunk_array->prev;
553 obstack_free (&buffer->chunk_obstack, chunk_array);
554 }
555
556 /* Helper subroutine of output_verbatim and verbatim. Do the appropriate
557 settings needed by BUFFER for a verbatim formatting. */
558 void
559 pp_base_format_verbatim (pretty_printer *pp, text_info *text)
560 {
561 /* Set verbatim mode. */
562 pp_wrapping_mode_t oldmode = pp_set_verbatim_wrapping (pp);
563
564 /* Do the actual formatting. */
565 pp_format (pp, text);
566 pp_output_formatted_text (pp);
567
568 /* Restore previous settings. */
569 pp_wrapping_mode (pp) = oldmode;
570 }
571
572 /* Flush the content of BUFFER onto the attached stream. */
573 void
574 pp_base_flush (pretty_printer *pp)
575 {
576 pp_write_text_to_stream (pp);
577 pp_clear_state (pp);
578 fputc ('\n', pp->buffer->stream);
579 fflush (pp->buffer->stream);
580 pp_needs_newline (pp) = false;
581 }
582
583 /* Sets the number of maximum characters per line PRETTY-PRINTER can
584 output in line-wrapping mode. A LENGTH value 0 suppresses
585 line-wrapping. */
586 void
587 pp_base_set_line_maximum_length (pretty_printer *pp, int length)
588 {
589 pp_line_cutoff (pp) = length;
590 pp_set_real_maximum_length (pp);
591 }
592
593 /* Clear PRETTY-PRINTER output area text info. */
594 void
595 pp_base_clear_output_area (pretty_printer *pp)
596 {
597 obstack_free (pp->buffer->obstack, obstack_base (pp->buffer->obstack));
598 pp->buffer->line_length = 0;
599 }
600
601 /* Set PREFIX for PRETTY-PRINTER. */
602 void
603 pp_base_set_prefix (pretty_printer *pp, const char *prefix)
604 {
605 pp->prefix = prefix;
606 pp_set_real_maximum_length (pp);
607 pp->emitted_prefix = false;
608 pp_indentation (pp) = 0;
609 }
610
611 /* Free PRETTY-PRINTER's prefix, a previously malloc()'d string. */
612 void
613 pp_base_destroy_prefix (pretty_printer *pp)
614 {
615 if (pp->prefix != NULL)
616 {
617 free (CONST_CAST (char *, pp->prefix));
618 pp->prefix = NULL;
619 }
620 }
621
622 /* Write out PRETTY-PRINTER's prefix. */
623 void
624 pp_base_emit_prefix (pretty_printer *pp)
625 {
626 if (pp->prefix != NULL)
627 {
628 switch (pp_prefixing_rule (pp))
629 {
630 default:
631 case DIAGNOSTICS_SHOW_PREFIX_NEVER:
632 break;
633
634 case DIAGNOSTICS_SHOW_PREFIX_ONCE:
635 if (pp->emitted_prefix)
636 {
637 pp_base_indent (pp);
638 break;
639 }
640 pp_indentation (pp) += 3;
641 /* Fall through. */
642
643 case DIAGNOSTICS_SHOW_PREFIX_EVERY_LINE:
644 {
645 int prefix_length = strlen (pp->prefix);
646 pp_append_r (pp, pp->prefix, prefix_length);
647 pp->emitted_prefix = true;
648 }
649 break;
650 }
651 }
652 }
653
654 /* Construct a PRETTY-PRINTER with PREFIX and of MAXIMUM_LENGTH
655 characters per line. */
656 void
657 pp_construct (pretty_printer *pp, const char *prefix, int maximum_length)
658 {
659 memset (pp, 0, sizeof (pretty_printer));
660 pp->buffer = XCNEW (output_buffer);
661 obstack_init (&pp->buffer->chunk_obstack);
662 obstack_init (&pp->buffer->formatted_obstack);
663 pp->buffer->obstack = &pp->buffer->formatted_obstack;
664 pp->buffer->stream = stderr;
665 pp_line_cutoff (pp) = maximum_length;
666 pp_prefixing_rule (pp) = DIAGNOSTICS_SHOW_PREFIX_ONCE;
667 pp_set_prefix (pp, prefix);
668 pp_translate_identifiers (pp) = true;
669 }
670
671 /* Append a string delimited by START and END to the output area of
672 PRETTY-PRINTER. No line wrapping is done. However, if beginning a
673 new line then emit PRETTY-PRINTER's prefix and skip any leading
674 whitespace if appropriate. The caller must ensure that it is
675 safe to do so. */
676 void
677 pp_base_append_text (pretty_printer *pp, const char *start, const char *end)
678 {
679 /* Emit prefix and skip whitespace if we're starting a new line. */
680 if (pp->buffer->line_length == 0)
681 {
682 pp_emit_prefix (pp);
683 if (pp_is_wrapping_line (pp))
684 while (start != end && *start == ' ')
685 ++start;
686 }
687 pp_append_r (pp, start, end - start);
688 }
689
690 /* Finishes constructing a NULL-terminated character string representing
691 the PRETTY-PRINTED text. */
692 const char *
693 pp_base_formatted_text (pretty_printer *pp)
694 {
695 obstack_1grow (pp->buffer->obstack, '\0');
696 return pp_formatted_text_data (pp);
697 }
698
699 /* Return a pointer to the last character emitted in PRETTY-PRINTER's
700 output area. A NULL pointer means no character available. */
701 const char *
702 pp_base_last_position_in_text (const pretty_printer *pp)
703 {
704 const char *p = NULL;
705 struct obstack *text = pp->buffer->obstack;
706
707 if (obstack_base (text) != obstack_next_free (text))
708 p = ((const char *) obstack_next_free (text)) - 1;
709 return p;
710 }
711
712 /* Return the amount of characters PRETTY-PRINTER can accept to
713 make a full line. Meaningful only in line-wrapping mode. */
714 int
715 pp_base_remaining_character_count_for_line (pretty_printer *pp)
716 {
717 return pp->maximum_length - pp->buffer->line_length;
718 }
719
720
721 /* Format a message into BUFFER a la printf. */
722 void
723 pp_printf (pretty_printer *pp, const char *msg, ...)
724 {
725 text_info text;
726 va_list ap;
727
728 va_start (ap, msg);
729 text.err_no = errno;
730 text.args_ptr = &ap;
731 text.format_spec = msg;
732 text.locus = NULL;
733 pp_format (pp, &text);
734 pp_output_formatted_text (pp);
735 va_end (ap);
736 }
737
738
739 /* Output MESSAGE verbatim into BUFFER. */
740 void
741 pp_verbatim (pretty_printer *pp, const char *msg, ...)
742 {
743 text_info text;
744 va_list ap;
745
746 va_start (ap, msg);
747 text.err_no = errno;
748 text.args_ptr = &ap;
749 text.format_spec = msg;
750 text.locus = NULL;
751 pp_format_verbatim (pp, &text);
752 va_end (ap);
753 }
754
755
756
757 /* Have PRETTY-PRINTER start a new line. */
758 void
759 pp_base_newline (pretty_printer *pp)
760 {
761 obstack_1grow (pp->buffer->obstack, '\n');
762 pp->buffer->line_length = 0;
763 }
764
765 /* Have PRETTY-PRINTER add a CHARACTER. */
766 void
767 pp_base_character (pretty_printer *pp, int c)
768 {
769 if (pp_is_wrapping_line (pp)
770 && pp_remaining_character_count_for_line (pp) <= 0)
771 {
772 pp_newline (pp);
773 if (ISSPACE (c))
774 return;
775 }
776 obstack_1grow (pp->buffer->obstack, c);
777 ++pp->buffer->line_length;
778 }
779
780 /* Append a STRING to the output area of PRETTY-PRINTER; the STRING may
781 be line-wrapped if in appropriate mode. */
782 void
783 pp_base_string (pretty_printer *pp, const char *str)
784 {
785 pp_maybe_wrap_text (pp, str, str + (str ? strlen (str) : 0));
786 }
787
788 /* Maybe print out a whitespace if needed. */
789
790 void
791 pp_base_maybe_space (pretty_printer *pp)
792 {
793 if (pp_base (pp)->padding != pp_none)
794 {
795 pp_space (pp);
796 pp_base (pp)->padding = pp_none;
797 }
798 }
799 \f
800 /* The string starting at P has LEN (at least 1) bytes left; if they
801 start with a valid UTF-8 sequence, return the length of that
802 sequence and set *VALUE to the value of that sequence, and
803 otherwise return 0 and set *VALUE to (unsigned int) -1. */
804
805 static int
806 decode_utf8_char (const unsigned char *p, size_t len, unsigned int *value)
807 {
808 unsigned int t = *p;
809
810 if (len == 0)
811 abort ();
812 if (t & 0x80)
813 {
814 size_t utf8_len = 0;
815 unsigned int ch;
816 size_t i;
817 for (t = *p; t & 0x80; t <<= 1)
818 utf8_len++;
819
820 if (utf8_len > len || utf8_len < 2 || utf8_len > 6)
821 {
822 *value = (unsigned int) -1;
823 return 0;
824 }
825 ch = *p & ((1 << (7 - utf8_len)) - 1);
826 for (i = 1; i < utf8_len; i++)
827 {
828 unsigned int u = p[i];
829 if ((u & 0xC0) != 0x80)
830 {
831 *value = (unsigned int) -1;
832 return 0;
833 }
834 ch = (ch << 6) | (u & 0x3F);
835 }
836 if ( (ch <= 0x7F && utf8_len > 1)
837 || (ch <= 0x7FF && utf8_len > 2)
838 || (ch <= 0xFFFF && utf8_len > 3)
839 || (ch <= 0x1FFFFF && utf8_len > 4)
840 || (ch <= 0x3FFFFFF && utf8_len > 5)
841 || (ch >= 0xD800 && ch <= 0xDFFF))
842 {
843 *value = (unsigned int) -1;
844 return 0;
845 }
846 *value = ch;
847 return utf8_len;
848 }
849 else
850 {
851 *value = t;
852 return 1;
853 }
854 }
855
856 /* Allocator for identifier_to_locale and corresponding function to
857 free memory. */
858
859 void *(*identifier_to_locale_alloc) (size_t) = xmalloc;
860 void (*identifier_to_locale_free) (void *) = free;
861
862 /* Given IDENT, an identifier in the internal encoding, return a
863 version of IDENT suitable for diagnostics in the locale character
864 set: either IDENT itself, or a string, allocated using
865 identifier_to_locale_alloc, converted to the locale character set
866 and using escape sequences if not representable in the locale
867 character set or containing control characters or invalid byte
868 sequences. Existing backslashes in IDENT are not doubled, so the
869 result may not uniquely specify the contents of an arbitrary byte
870 sequence identifier. */
871
872 const char *
873 identifier_to_locale (const char *ident)
874 {
875 const unsigned char *uid = (const unsigned char *) ident;
876 size_t idlen = strlen (ident);
877 bool valid_printable_utf8 = true;
878 bool all_ascii = true;
879 size_t i;
880
881 for (i = 0; i < idlen;)
882 {
883 unsigned int c;
884 size_t utf8_len = decode_utf8_char (&uid[i], idlen - i, &c);
885 if (utf8_len == 0 || c <= 0x1F || (c >= 0x7F && c <= 0x9F))
886 {
887 valid_printable_utf8 = false;
888 break;
889 }
890 if (utf8_len > 1)
891 all_ascii = false;
892 i += utf8_len;
893 }
894
895 /* If IDENT contains invalid UTF-8 sequences (which may occur with
896 attributes putting arbitrary byte sequences in identifiers), or
897 control characters, we use octal escape sequences for all bytes
898 outside printable ASCII. */
899 if (!valid_printable_utf8)
900 {
901 char *ret = (char *) identifier_to_locale_alloc (4 * idlen + 1);
902 char *p = ret;
903 for (i = 0; i < idlen; i++)
904 {
905 if (uid[i] > 0x1F && uid[i] < 0x7F)
906 *p++ = uid[i];
907 else
908 {
909 sprintf (p, "\\%03o", uid[i]);
910 p += 4;
911 }
912 }
913 *p = 0;
914 return ret;
915 }
916
917 /* Otherwise, if it is valid printable ASCII, or printable UTF-8
918 with the locale character set being UTF-8, IDENT is used. */
919 if (all_ascii || locale_utf8)
920 return ident;
921
922 /* Otherwise IDENT is converted to the locale character set if
923 possible. */
924 #if defined ENABLE_NLS && defined HAVE_LANGINFO_CODESET && HAVE_ICONV
925 if (locale_encoding != NULL)
926 {
927 iconv_t cd = iconv_open (locale_encoding, "UTF-8");
928 bool conversion_ok = true;
929 char *ret = NULL;
930 if (cd != (iconv_t) -1)
931 {
932 size_t ret_alloc = 4 * idlen + 1;
933 for (;;)
934 {
935 /* Repeat the whole conversion process as needed with
936 larger buffers so non-reversible transformations can
937 always be detected. */
938 ICONV_CONST char *inbuf = CONST_CAST (char *, ident);
939 char *outbuf;
940 size_t inbytesleft = idlen;
941 size_t outbytesleft = ret_alloc - 1;
942 size_t iconv_ret;
943
944 ret = (char *) identifier_to_locale_alloc (ret_alloc);
945 outbuf = ret;
946
947 if (iconv (cd, 0, 0, 0, 0) == (size_t) -1)
948 {
949 conversion_ok = false;
950 break;
951 }
952
953 iconv_ret = iconv (cd, &inbuf, &inbytesleft,
954 &outbuf, &outbytesleft);
955 if (iconv_ret == (size_t) -1 || inbytesleft != 0)
956 {
957 if (errno == E2BIG)
958 {
959 ret_alloc *= 2;
960 identifier_to_locale_free (ret);
961 ret = NULL;
962 continue;
963 }
964 else
965 {
966 conversion_ok = false;
967 break;
968 }
969 }
970 else if (iconv_ret != 0)
971 {
972 conversion_ok = false;
973 break;
974 }
975 /* Return to initial shift state. */
976 if (iconv (cd, 0, 0, &outbuf, &outbytesleft) == (size_t) -1)
977 {
978 if (errno == E2BIG)
979 {
980 ret_alloc *= 2;
981 identifier_to_locale_free (ret);
982 ret = NULL;
983 continue;
984 }
985 else
986 {
987 conversion_ok = false;
988 break;
989 }
990 }
991 *outbuf = 0;
992 break;
993 }
994 iconv_close (cd);
995 if (conversion_ok)
996 return ret;
997 }
998 }
999 #endif
1000
1001 /* Otherwise, convert non-ASCII characters in IDENT to UCNs. */
1002 {
1003 char *ret = (char *) identifier_to_locale_alloc (10 * idlen + 1);
1004 char *p = ret;
1005 for (i = 0; i < idlen;)
1006 {
1007 unsigned int c;
1008 size_t utf8_len = decode_utf8_char (&uid[i], idlen - i, &c);
1009 if (utf8_len == 1)
1010 *p++ = uid[i];
1011 else
1012 {
1013 sprintf (p, "\\U%08x", c);
1014 p += 10;
1015 }
1016 i += utf8_len;
1017 }
1018 *p = 0;
1019 return ret;
1020 }
1021 }