Fix m32r-elf sim, default hardware to off.
[binutils-gdb.git] / gdb / charset.c
1 /* Character set conversion support for GDB.
2
3 Copyright (C) 2001-2014 Free Software Foundation, Inc.
4
5 This file is part of GDB.
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19
20 #include "defs.h"
21 #include "charset.h"
22 #include "gdbcmd.h"
23 #include "gdb_obstack.h"
24 #include "gdb_wait.h"
25 #include "charset-list.h"
26 #include "vec.h"
27 #include "environ.h"
28 #include "arch-utils.h"
29 #include "gdb_vecs.h"
30 #include <ctype.h>
31
32 #ifdef USE_WIN32API
33 #include <windows.h>
34 #endif
35 \f
36 /* How GDB's character set support works
37
38 GDB has three global settings:
39
40 - The `current host character set' is the character set GDB should
41 use in talking to the user, and which (hopefully) the user's
42 terminal knows how to display properly. Most users should not
43 change this.
44
45 - The `current target character set' is the character set the
46 program being debugged uses.
47
48 - The `current target wide character set' is the wide character set
49 the program being debugged uses, that is, the encoding used for
50 wchar_t.
51
52 There are commands to set each of these, and mechanisms for
53 choosing reasonable default values. GDB has a global list of
54 character sets that it can use as its host or target character
55 sets.
56
57 The header file `charset.h' declares various functions that
58 different pieces of GDB need to perform tasks like:
59
60 - printing target strings and characters to the user's terminal
61 (mostly target->host conversions),
62
63 - building target-appropriate representations of strings and
64 characters the user enters in expressions (mostly host->target
65 conversions),
66
67 and so on.
68
69 To avoid excessive code duplication and maintenance efforts,
70 GDB simply requires a capable iconv function. Users on platforms
71 without a suitable iconv can use the GNU iconv library. */
72
73 \f
74 #ifdef PHONY_ICONV
75
76 /* Provide a phony iconv that does as little as possible. Also,
77 arrange for there to be a single available character set. */
78
79 #undef GDB_DEFAULT_HOST_CHARSET
80 #define GDB_DEFAULT_HOST_CHARSET "ISO-8859-1"
81 #define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1"
82 #define GDB_DEFAULT_TARGET_WIDE_CHARSET "ISO-8859-1"
83 #undef DEFAULT_CHARSET_NAMES
84 #define DEFAULT_CHARSET_NAMES GDB_DEFAULT_HOST_CHARSET ,
85
86 #undef iconv_t
87 #define iconv_t int
88 #undef iconv_open
89 #define iconv_open phony_iconv_open
90 #undef iconv
91 #define iconv phony_iconv
92 #undef iconv_close
93 #define iconv_close phony_iconv_close
94
95 #undef ICONV_CONST
96 #define ICONV_CONST const
97
98 /* Some systems don't have EILSEQ, so we define it here, but not as
99 EINVAL, because callers of `iconv' want to distinguish EINVAL and
100 EILSEQ. This is what iconv.h from libiconv does as well. Note
101 that wchar.h may also define EILSEQ, so this needs to be after we
102 include wchar.h, which happens in defs.h through gdb_wchar.h. */
103 #ifndef EILSEQ
104 #define EILSEQ ENOENT
105 #endif
106
107 static iconv_t
108 phony_iconv_open (const char *to, const char *from)
109 {
110 /* We allow conversions from UTF-32BE, wchar_t, and the host charset.
111 We allow conversions to wchar_t and the host charset. */
112 if (strcmp (from, "UTF-32BE") && strcmp (from, "wchar_t")
113 && strcmp (from, GDB_DEFAULT_HOST_CHARSET))
114 return -1;
115 if (strcmp (to, "wchar_t") && strcmp (to, GDB_DEFAULT_HOST_CHARSET))
116 return -1;
117
118 /* Return 1 if we are converting from UTF-32BE, 0 otherwise. This is
119 used as a flag in calls to iconv. */
120 return !strcmp (from, "UTF-32BE");
121 }
122
123 static int
124 phony_iconv_close (iconv_t arg)
125 {
126 return 0;
127 }
128
129 static size_t
130 phony_iconv (iconv_t utf_flag, const char **inbuf, size_t *inbytesleft,
131 char **outbuf, size_t *outbytesleft)
132 {
133 if (utf_flag)
134 {
135 while (*inbytesleft >= 4)
136 {
137 size_t j;
138 unsigned long c = 0;
139
140 for (j = 0; j < 4; ++j)
141 {
142 c <<= 8;
143 c += (*inbuf)[j] & 0xff;
144 }
145
146 if (c >= 256)
147 {
148 errno = EILSEQ;
149 return -1;
150 }
151 **outbuf = c & 0xff;
152 ++*outbuf;
153 --*outbytesleft;
154
155 ++*inbuf;
156 *inbytesleft -= 4;
157 }
158 if (*inbytesleft < 4)
159 {
160 errno = EINVAL;
161 return -1;
162 }
163 }
164 else
165 {
166 /* In all other cases we simply copy input bytes to the
167 output. */
168 size_t amt = *inbytesleft;
169
170 if (amt > *outbytesleft)
171 amt = *outbytesleft;
172 memcpy (*outbuf, *inbuf, amt);
173 *inbuf += amt;
174 *outbuf += amt;
175 *inbytesleft -= amt;
176 *outbytesleft -= amt;
177 }
178
179 if (*inbytesleft)
180 {
181 errno = E2BIG;
182 return -1;
183 }
184
185 /* The number of non-reversible conversions -- but they were all
186 reversible. */
187 return 0;
188 }
189
190 #endif
191
192
193 \f
194 /* The global lists of character sets and translations. */
195
196
197 #ifndef GDB_DEFAULT_TARGET_CHARSET
198 #define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1"
199 #endif
200
201 #ifndef GDB_DEFAULT_TARGET_WIDE_CHARSET
202 #define GDB_DEFAULT_TARGET_WIDE_CHARSET "UTF-32"
203 #endif
204
205 static const char *auto_host_charset_name = GDB_DEFAULT_HOST_CHARSET;
206 static const char *host_charset_name = "auto";
207 static void
208 show_host_charset_name (struct ui_file *file, int from_tty,
209 struct cmd_list_element *c,
210 const char *value)
211 {
212 if (!strcmp (value, "auto"))
213 fprintf_filtered (file,
214 _("The host character set is \"auto; currently %s\".\n"),
215 auto_host_charset_name);
216 else
217 fprintf_filtered (file, _("The host character set is \"%s\".\n"), value);
218 }
219
220 static const char *target_charset_name = "auto";
221 static void
222 show_target_charset_name (struct ui_file *file, int from_tty,
223 struct cmd_list_element *c, const char *value)
224 {
225 if (!strcmp (value, "auto"))
226 fprintf_filtered (file,
227 _("The target character set is \"auto; "
228 "currently %s\".\n"),
229 gdbarch_auto_charset (get_current_arch ()));
230 else
231 fprintf_filtered (file, _("The target character set is \"%s\".\n"),
232 value);
233 }
234
235 static const char *target_wide_charset_name = "auto";
236 static void
237 show_target_wide_charset_name (struct ui_file *file,
238 int from_tty,
239 struct cmd_list_element *c,
240 const char *value)
241 {
242 if (!strcmp (value, "auto"))
243 fprintf_filtered (file,
244 _("The target wide character set is \"auto; "
245 "currently %s\".\n"),
246 gdbarch_auto_wide_charset (get_current_arch ()));
247 else
248 fprintf_filtered (file, _("The target wide character set is \"%s\".\n"),
249 value);
250 }
251
252 static const char *default_charset_names[] =
253 {
254 DEFAULT_CHARSET_NAMES
255 0
256 };
257
258 static const char **charset_enum;
259
260 \f
261 /* If the target wide character set has big- or little-endian
262 variants, these are the corresponding names. */
263 static const char *target_wide_charset_be_name;
264 static const char *target_wide_charset_le_name;
265
266 /* The architecture for which the BE- and LE-names are valid. */
267 static struct gdbarch *be_le_arch;
268
269 /* A helper function which sets the target wide big- and little-endian
270 character set names, if possible. */
271
272 static void
273 set_be_le_names (struct gdbarch *gdbarch)
274 {
275 int i, len;
276 const char *target_wide;
277
278 if (be_le_arch == gdbarch)
279 return;
280 be_le_arch = gdbarch;
281
282 target_wide_charset_le_name = NULL;
283 target_wide_charset_be_name = NULL;
284
285 target_wide = target_wide_charset_name;
286 if (!strcmp (target_wide, "auto"))
287 target_wide = gdbarch_auto_wide_charset (gdbarch);
288
289 len = strlen (target_wide);
290 for (i = 0; charset_enum[i]; ++i)
291 {
292 if (strncmp (target_wide, charset_enum[i], len))
293 continue;
294 if ((charset_enum[i][len] == 'B'
295 || charset_enum[i][len] == 'L')
296 && charset_enum[i][len + 1] == 'E'
297 && charset_enum[i][len + 2] == '\0')
298 {
299 if (charset_enum[i][len] == 'B')
300 target_wide_charset_be_name = charset_enum[i];
301 else
302 target_wide_charset_le_name = charset_enum[i];
303 }
304 }
305 }
306
307 /* 'Set charset', 'set host-charset', 'set target-charset', 'set
308 target-wide-charset', 'set charset' sfunc's. */
309
310 static void
311 validate (struct gdbarch *gdbarch)
312 {
313 iconv_t desc;
314 const char *host_cset = host_charset ();
315 const char *target_cset = target_charset (gdbarch);
316 const char *target_wide_cset = target_wide_charset_name;
317
318 if (!strcmp (target_wide_cset, "auto"))
319 target_wide_cset = gdbarch_auto_wide_charset (gdbarch);
320
321 desc = iconv_open (target_wide_cset, host_cset);
322 if (desc == (iconv_t) -1)
323 error (_("Cannot convert between character sets `%s' and `%s'"),
324 target_wide_cset, host_cset);
325 iconv_close (desc);
326
327 desc = iconv_open (target_cset, host_cset);
328 if (desc == (iconv_t) -1)
329 error (_("Cannot convert between character sets `%s' and `%s'"),
330 target_cset, host_cset);
331 iconv_close (desc);
332
333 /* Clear the cache. */
334 be_le_arch = NULL;
335 }
336
337 /* This is the sfunc for the 'set charset' command. */
338 static void
339 set_charset_sfunc (char *charset, int from_tty,
340 struct cmd_list_element *c)
341 {
342 /* CAREFUL: set the target charset here as well. */
343 target_charset_name = host_charset_name;
344 validate (get_current_arch ());
345 }
346
347 /* 'set host-charset' command sfunc. We need a wrapper here because
348 the function needs to have a specific signature. */
349 static void
350 set_host_charset_sfunc (char *charset, int from_tty,
351 struct cmd_list_element *c)
352 {
353 validate (get_current_arch ());
354 }
355
356 /* Wrapper for the 'set target-charset' command. */
357 static void
358 set_target_charset_sfunc (char *charset, int from_tty,
359 struct cmd_list_element *c)
360 {
361 validate (get_current_arch ());
362 }
363
364 /* Wrapper for the 'set target-wide-charset' command. */
365 static void
366 set_target_wide_charset_sfunc (char *charset, int from_tty,
367 struct cmd_list_element *c)
368 {
369 validate (get_current_arch ());
370 }
371
372 /* sfunc for the 'show charset' command. */
373 static void
374 show_charset (struct ui_file *file, int from_tty,
375 struct cmd_list_element *c,
376 const char *name)
377 {
378 show_host_charset_name (file, from_tty, c, host_charset_name);
379 show_target_charset_name (file, from_tty, c, target_charset_name);
380 show_target_wide_charset_name (file, from_tty, c,
381 target_wide_charset_name);
382 }
383
384 \f
385 /* Accessor functions. */
386
387 const char *
388 host_charset (void)
389 {
390 if (!strcmp (host_charset_name, "auto"))
391 return auto_host_charset_name;
392 return host_charset_name;
393 }
394
395 const char *
396 target_charset (struct gdbarch *gdbarch)
397 {
398 if (!strcmp (target_charset_name, "auto"))
399 return gdbarch_auto_charset (gdbarch);
400 return target_charset_name;
401 }
402
403 const char *
404 target_wide_charset (struct gdbarch *gdbarch)
405 {
406 enum bfd_endian byte_order = gdbarch_byte_order (gdbarch);
407
408 set_be_le_names (gdbarch);
409 if (byte_order == BFD_ENDIAN_BIG)
410 {
411 if (target_wide_charset_be_name)
412 return target_wide_charset_be_name;
413 }
414 else
415 {
416 if (target_wide_charset_le_name)
417 return target_wide_charset_le_name;
418 }
419
420 if (!strcmp (target_wide_charset_name, "auto"))
421 return gdbarch_auto_wide_charset (gdbarch);
422
423 return target_wide_charset_name;
424 }
425
426 \f
427 /* Host character set management. For the time being, we assume that
428 the host character set is some superset of ASCII. */
429
430 char
431 host_letter_to_control_character (char c)
432 {
433 if (c == '?')
434 return 0177;
435 return c & 0237;
436 }
437
438 /* Convert a host character, C, to its hex value. C must already have
439 been validated using isxdigit. */
440
441 int
442 host_hex_value (char c)
443 {
444 if (isdigit (c))
445 return c - '0';
446 if (c >= 'a' && c <= 'f')
447 return 10 + c - 'a';
448 gdb_assert (c >= 'A' && c <= 'F');
449 return 10 + c - 'A';
450 }
451
452 \f
453 /* Public character management functions. */
454
455 /* A cleanup function which is run to close an iconv descriptor. */
456
457 static void
458 cleanup_iconv (void *p)
459 {
460 iconv_t *descp = p;
461 iconv_close (*descp);
462 }
463
464 void
465 convert_between_encodings (const char *from, const char *to,
466 const gdb_byte *bytes, unsigned int num_bytes,
467 int width, struct obstack *output,
468 enum transliterations translit)
469 {
470 iconv_t desc;
471 struct cleanup *cleanups;
472 size_t inleft;
473 ICONV_CONST char *inp;
474 unsigned int space_request;
475
476 /* Often, the host and target charsets will be the same. */
477 if (!strcmp (from, to))
478 {
479 obstack_grow (output, bytes, num_bytes);
480 return;
481 }
482
483 desc = iconv_open (to, from);
484 if (desc == (iconv_t) -1)
485 perror_with_name (_("Converting character sets"));
486 cleanups = make_cleanup (cleanup_iconv, &desc);
487
488 inleft = num_bytes;
489 inp = (ICONV_CONST char *) bytes;
490
491 space_request = num_bytes;
492
493 while (inleft > 0)
494 {
495 char *outp;
496 size_t outleft, r;
497 int old_size;
498
499 old_size = obstack_object_size (output);
500 obstack_blank (output, space_request);
501
502 outp = (char *) obstack_base (output) + old_size;
503 outleft = space_request;
504
505 r = iconv (desc, &inp, &inleft, &outp, &outleft);
506
507 /* Now make sure that the object on the obstack only includes
508 bytes we have converted. */
509 obstack_blank (output, - (int) outleft);
510
511 if (r == (size_t) -1)
512 {
513 switch (errno)
514 {
515 case EILSEQ:
516 {
517 int i;
518
519 /* Invalid input sequence. */
520 if (translit == translit_none)
521 error (_("Could not convert character "
522 "to `%s' character set"), to);
523
524 /* We emit escape sequence for the bytes, skip them,
525 and try again. */
526 for (i = 0; i < width; ++i)
527 {
528 char octal[5];
529
530 xsnprintf (octal, sizeof (octal), "\\%.3o", *inp & 0xff);
531 obstack_grow_str (output, octal);
532
533 ++inp;
534 --inleft;
535 }
536 }
537 break;
538
539 case E2BIG:
540 /* We ran out of space in the output buffer. Make it
541 bigger next time around. */
542 space_request *= 2;
543 break;
544
545 case EINVAL:
546 /* Incomplete input sequence. FIXME: ought to report this
547 to the caller somehow. */
548 inleft = 0;
549 break;
550
551 default:
552 perror_with_name (_("Internal error while "
553 "converting character sets"));
554 }
555 }
556 }
557
558 do_cleanups (cleanups);
559 }
560
561 \f
562
563 /* An iterator that returns host wchar_t's from a target string. */
564 struct wchar_iterator
565 {
566 /* The underlying iconv descriptor. */
567 iconv_t desc;
568
569 /* The input string. This is updated as convert characters. */
570 const gdb_byte *input;
571 /* The number of bytes remaining in the input. */
572 size_t bytes;
573
574 /* The width of an input character. */
575 size_t width;
576
577 /* The output buffer and its size. */
578 gdb_wchar_t *out;
579 size_t out_size;
580 };
581
582 /* Create a new iterator. */
583 struct wchar_iterator *
584 make_wchar_iterator (const gdb_byte *input, size_t bytes,
585 const char *charset, size_t width)
586 {
587 struct wchar_iterator *result;
588 iconv_t desc;
589
590 desc = iconv_open (INTERMEDIATE_ENCODING, charset);
591 if (desc == (iconv_t) -1)
592 perror_with_name (_("Converting character sets"));
593
594 result = XNEW (struct wchar_iterator);
595 result->desc = desc;
596 result->input = input;
597 result->bytes = bytes;
598 result->width = width;
599
600 result->out = XNEW (gdb_wchar_t);
601 result->out_size = 1;
602
603 return result;
604 }
605
606 static void
607 do_cleanup_iterator (void *p)
608 {
609 struct wchar_iterator *iter = p;
610
611 iconv_close (iter->desc);
612 xfree (iter->out);
613 xfree (iter);
614 }
615
616 struct cleanup *
617 make_cleanup_wchar_iterator (struct wchar_iterator *iter)
618 {
619 return make_cleanup (do_cleanup_iterator, iter);
620 }
621
622 int
623 wchar_iterate (struct wchar_iterator *iter,
624 enum wchar_iterate_result *out_result,
625 gdb_wchar_t **out_chars,
626 const gdb_byte **ptr,
627 size_t *len)
628 {
629 size_t out_request;
630
631 /* Try to convert some characters. At first we try to convert just
632 a single character. The reason for this is that iconv does not
633 necessarily update its outgoing arguments when it encounters an
634 invalid input sequence -- but we want to reliably report this to
635 our caller so it can emit an escape sequence. */
636 out_request = 1;
637 while (iter->bytes > 0)
638 {
639 ICONV_CONST char *inptr = (ICONV_CONST char *) iter->input;
640 char *outptr = (char *) &iter->out[0];
641 const gdb_byte *orig_inptr = iter->input;
642 size_t orig_in = iter->bytes;
643 size_t out_avail = out_request * sizeof (gdb_wchar_t);
644 size_t num;
645 size_t r = iconv (iter->desc, &inptr, &iter->bytes, &outptr, &out_avail);
646
647 iter->input = (gdb_byte *) inptr;
648
649 if (r == (size_t) -1)
650 {
651 switch (errno)
652 {
653 case EILSEQ:
654 /* Invalid input sequence. We still might have
655 converted a character; if so, return it. */
656 if (out_avail < out_request * sizeof (gdb_wchar_t))
657 break;
658
659 /* Otherwise skip the first invalid character, and let
660 the caller know about it. */
661 *out_result = wchar_iterate_invalid;
662 *ptr = iter->input;
663 *len = iter->width;
664 iter->input += iter->width;
665 iter->bytes -= iter->width;
666 return 0;
667
668 case E2BIG:
669 /* We ran out of space. We still might have converted a
670 character; if so, return it. Otherwise, grow the
671 buffer and try again. */
672 if (out_avail < out_request * sizeof (gdb_wchar_t))
673 break;
674
675 ++out_request;
676 if (out_request > iter->out_size)
677 {
678 iter->out_size = out_request;
679 iter->out = xrealloc (iter->out,
680 out_request * sizeof (gdb_wchar_t));
681 }
682 continue;
683
684 case EINVAL:
685 /* Incomplete input sequence. Let the caller know, and
686 arrange for future calls to see EOF. */
687 *out_result = wchar_iterate_incomplete;
688 *ptr = iter->input;
689 *len = iter->bytes;
690 iter->bytes = 0;
691 return 0;
692
693 default:
694 perror_with_name (_("Internal error while "
695 "converting character sets"));
696 }
697 }
698
699 /* We converted something. */
700 num = out_request - out_avail / sizeof (gdb_wchar_t);
701 *out_result = wchar_iterate_ok;
702 *out_chars = iter->out;
703 *ptr = orig_inptr;
704 *len = orig_in - iter->bytes;
705 return num;
706 }
707
708 /* Really done. */
709 *out_result = wchar_iterate_eof;
710 return -1;
711 }
712
713 \f
714 /* The charset.c module initialization function. */
715
716 extern initialize_file_ftype _initialize_charset; /* -Wmissing-prototype */
717
718 static VEC (char_ptr) *charsets;
719
720 #ifdef PHONY_ICONV
721
722 static void
723 find_charset_names (void)
724 {
725 VEC_safe_push (char_ptr, charsets, GDB_DEFAULT_HOST_CHARSET);
726 VEC_safe_push (char_ptr, charsets, NULL);
727 }
728
729 #else /* PHONY_ICONV */
730
731 /* Sometimes, libiconv redefines iconvlist as libiconvlist -- but
732 provides different symbols in the static and dynamic libraries.
733 So, configure may see libiconvlist but not iconvlist. But, calling
734 iconvlist is the right thing to do and will work. Hence we do a
735 check here but unconditionally call iconvlist below. */
736 #if defined (HAVE_ICONVLIST) || defined (HAVE_LIBICONVLIST)
737
738 /* A helper function that adds some character sets to the vector of
739 all character sets. This is a callback function for iconvlist. */
740
741 static int
742 add_one (unsigned int count, const char *const *names, void *data)
743 {
744 unsigned int i;
745
746 for (i = 0; i < count; ++i)
747 VEC_safe_push (char_ptr, charsets, xstrdup (names[i]));
748
749 return 0;
750 }
751
752 static void
753 find_charset_names (void)
754 {
755 iconvlist (add_one, NULL);
756 VEC_safe_push (char_ptr, charsets, NULL);
757 }
758
759 #else
760
761 /* Return non-zero if LINE (output from iconv) should be ignored.
762 Older iconv programs (e.g. 2.2.2) include the human readable
763 introduction even when stdout is not a tty. Newer versions omit
764 the intro if stdout is not a tty. */
765
766 static int
767 ignore_line_p (const char *line)
768 {
769 /* This table is used to filter the output. If this text appears
770 anywhere in the line, it is ignored (strstr is used). */
771 static const char * const ignore_lines[] =
772 {
773 "The following",
774 "not necessarily",
775 "the FROM and TO",
776 "listed with several",
777 NULL
778 };
779 int i;
780
781 for (i = 0; ignore_lines[i] != NULL; ++i)
782 {
783 if (strstr (line, ignore_lines[i]) != NULL)
784 return 1;
785 }
786
787 return 0;
788 }
789
790 static void
791 find_charset_names (void)
792 {
793 struct pex_obj *child;
794 char *args[3];
795 int err, status;
796 int fail = 1;
797 int flags;
798 struct gdb_environ *iconv_env;
799 char *iconv_program;
800
801 /* Older iconvs, e.g. 2.2.2, don't omit the intro text if stdout is
802 not a tty. We need to recognize it and ignore it. This text is
803 subject to translation, so force LANGUAGE=C. */
804 iconv_env = make_environ ();
805 init_environ (iconv_env);
806 set_in_environ (iconv_env, "LANGUAGE", "C");
807 set_in_environ (iconv_env, "LC_ALL", "C");
808
809 child = pex_init (PEX_USE_PIPES, "iconv", NULL);
810
811 #ifdef ICONV_BIN
812 {
813 char *iconv_dir = relocate_gdb_directory (ICONV_BIN,
814 ICONV_BIN_RELOCATABLE);
815 iconv_program = concat (iconv_dir, SLASH_STRING, "iconv", NULL);
816 xfree (iconv_dir);
817 }
818 #else
819 iconv_program = xstrdup ("iconv");
820 #endif
821 args[0] = iconv_program;
822 args[1] = "-l";
823 args[2] = NULL;
824 flags = PEX_STDERR_TO_STDOUT;
825 #ifndef ICONV_BIN
826 flags |= PEX_SEARCH;
827 #endif
828 /* Note that we simply ignore errors here. */
829 if (!pex_run_in_environment (child, flags,
830 args[0], args, environ_vector (iconv_env),
831 NULL, NULL, &err))
832 {
833 FILE *in = pex_read_output (child, 0);
834
835 /* POSIX says that iconv -l uses an unspecified format. We
836 parse the glibc and libiconv formats; feel free to add others
837 as needed. */
838
839 while (in != NULL && !feof (in))
840 {
841 /* The size of buf is chosen arbitrarily. */
842 char buf[1024];
843 char *start, *r;
844 int len;
845
846 r = fgets (buf, sizeof (buf), in);
847 if (!r)
848 break;
849 len = strlen (r);
850 if (len <= 3)
851 continue;
852 if (ignore_line_p (r))
853 continue;
854
855 /* Strip off the newline. */
856 --len;
857 /* Strip off one or two '/'s. glibc will print lines like
858 "8859_7//", but also "10646-1:1993/UCS4/". */
859 if (buf[len - 1] == '/')
860 --len;
861 if (buf[len - 1] == '/')
862 --len;
863 buf[len] = '\0';
864
865 /* libiconv will print multiple entries per line, separated
866 by spaces. Older iconvs will print multiple entries per
867 line, indented by two spaces, and separated by ", "
868 (i.e. the human readable form). */
869 start = buf;
870 while (1)
871 {
872 int keep_going;
873 char *p;
874
875 /* Skip leading blanks. */
876 for (p = start; *p && *p == ' '; ++p)
877 ;
878 start = p;
879 /* Find the next space, comma, or end-of-line. */
880 for ( ; *p && *p != ' ' && *p != ','; ++p)
881 ;
882 /* Ignore an empty result. */
883 if (p == start)
884 break;
885 keep_going = *p;
886 *p = '\0';
887 VEC_safe_push (char_ptr, charsets, xstrdup (start));
888 if (!keep_going)
889 break;
890 /* Skip any extra spaces. */
891 for (start = p + 1; *start && *start == ' '; ++start)
892 ;
893 }
894 }
895
896 if (pex_get_status (child, 1, &status)
897 && WIFEXITED (status) && !WEXITSTATUS (status))
898 fail = 0;
899
900 }
901
902 xfree (iconv_program);
903 pex_free (child);
904 free_environ (iconv_env);
905
906 if (fail)
907 {
908 /* Some error occurred, so drop the vector. */
909 free_char_ptr_vec (charsets);
910 charsets = NULL;
911 }
912 else
913 VEC_safe_push (char_ptr, charsets, NULL);
914 }
915
916 #endif /* HAVE_ICONVLIST || HAVE_LIBICONVLIST */
917 #endif /* PHONY_ICONV */
918
919 /* The "auto" target charset used by default_auto_charset. */
920 static const char *auto_target_charset_name = GDB_DEFAULT_TARGET_CHARSET;
921
922 const char *
923 default_auto_charset (void)
924 {
925 return auto_target_charset_name;
926 }
927
928 const char *
929 default_auto_wide_charset (void)
930 {
931 return GDB_DEFAULT_TARGET_WIDE_CHARSET;
932 }
933
934
935 #ifdef USE_INTERMEDIATE_ENCODING_FUNCTION
936 /* Macro used for UTF or UCS endianness suffix. */
937 #if WORDS_BIGENDIAN
938 #define ENDIAN_SUFFIX "BE"
939 #else
940 #define ENDIAN_SUFFIX "LE"
941 #endif
942
943 /* The code below serves to generate a compile time error if
944 gdb_wchar_t type is not of size 2 nor 4, despite the fact that
945 macro __STDC_ISO_10646__ is defined.
946 This is better than a gdb_assert call, because GDB cannot handle
947 strings correctly if this size is different. */
948
949 extern char your_gdb_wchar_t_is_bogus[(sizeof (gdb_wchar_t) == 2
950 || sizeof (gdb_wchar_t) == 4)
951 ? 1 : -1];
952
953 /* intermediate_encoding returns the charset used internally by
954 GDB to convert between target and host encodings. As the test above
955 compiled, sizeof (gdb_wchar_t) is either 2 or 4 bytes.
956 UTF-16/32 is tested first, UCS-2/4 is tested as a second option,
957 otherwise an error is generated. */
958
959 const char *
960 intermediate_encoding (void)
961 {
962 iconv_t desc;
963 static const char *stored_result = NULL;
964 char *result;
965
966 if (stored_result)
967 return stored_result;
968 result = xstrprintf ("UTF-%d%s", (int) (sizeof (gdb_wchar_t) * 8),
969 ENDIAN_SUFFIX);
970 /* Check that the name is supported by iconv_open. */
971 desc = iconv_open (result, host_charset ());
972 if (desc != (iconv_t) -1)
973 {
974 iconv_close (desc);
975 stored_result = result;
976 return result;
977 }
978 /* Not valid, free the allocated memory. */
979 xfree (result);
980 /* Second try, with UCS-2 type. */
981 result = xstrprintf ("UCS-%d%s", (int) sizeof (gdb_wchar_t),
982 ENDIAN_SUFFIX);
983 /* Check that the name is supported by iconv_open. */
984 desc = iconv_open (result, host_charset ());
985 if (desc != (iconv_t) -1)
986 {
987 iconv_close (desc);
988 stored_result = result;
989 return result;
990 }
991 /* Not valid, free the allocated memory. */
992 xfree (result);
993 /* No valid charset found, generate error here. */
994 error (_("Unable to find a vaild charset for string conversions"));
995 }
996
997 #endif /* USE_INTERMEDIATE_ENCODING_FUNCTION */
998
999 void
1000 _initialize_charset (void)
1001 {
1002 /* The first element is always "auto". */
1003 VEC_safe_push (char_ptr, charsets, xstrdup ("auto"));
1004 find_charset_names ();
1005
1006 if (VEC_length (char_ptr, charsets) > 1)
1007 charset_enum = (const char **) VEC_address (char_ptr, charsets);
1008 else
1009 charset_enum = default_charset_names;
1010
1011 #ifndef PHONY_ICONV
1012 #ifdef HAVE_LANGINFO_CODESET
1013 /* The result of nl_langinfo may be overwritten later. This may
1014 leak a little memory, if the user later changes the host charset,
1015 but that doesn't matter much. */
1016 auto_host_charset_name = xstrdup (nl_langinfo (CODESET));
1017 /* Solaris will return `646' here -- but the Solaris iconv then does
1018 not accept this. Darwin (and maybe FreeBSD) may return "" here,
1019 which GNU libiconv doesn't like (infinite loop). */
1020 if (!strcmp (auto_host_charset_name, "646") || !*auto_host_charset_name)
1021 auto_host_charset_name = "ASCII";
1022 auto_target_charset_name = auto_host_charset_name;
1023 #elif defined (USE_WIN32API)
1024 {
1025 /* "CP" + x<=5 digits + paranoia. */
1026 static char w32_host_default_charset[16];
1027
1028 snprintf (w32_host_default_charset, sizeof w32_host_default_charset,
1029 "CP%d", GetACP());
1030 auto_host_charset_name = w32_host_default_charset;
1031 auto_target_charset_name = auto_host_charset_name;
1032 }
1033 #endif
1034 #endif
1035
1036 add_setshow_enum_cmd ("charset", class_support,
1037 charset_enum, &host_charset_name, _("\
1038 Set the host and target character sets."), _("\
1039 Show the host and target character sets."), _("\
1040 The `host character set' is the one used by the system GDB is running on.\n\
1041 The `target character set' is the one used by the program being debugged.\n\
1042 You may only use supersets of ASCII for your host character set; GDB does\n\
1043 not support any others.\n\
1044 To see a list of the character sets GDB supports, type `set charset <TAB>'."),
1045 /* Note that the sfunc below needs to set
1046 target_charset_name, because the 'set
1047 charset' command sets two variables. */
1048 set_charset_sfunc,
1049 show_charset,
1050 &setlist, &showlist);
1051
1052 add_setshow_enum_cmd ("host-charset", class_support,
1053 charset_enum, &host_charset_name, _("\
1054 Set the host character set."), _("\
1055 Show the host character set."), _("\
1056 The `host character set' is the one used by the system GDB is running on.\n\
1057 You may only use supersets of ASCII for your host character set; GDB does\n\
1058 not support any others.\n\
1059 To see a list of the character sets GDB supports, type `set host-charset <TAB>'."),
1060 set_host_charset_sfunc,
1061 show_host_charset_name,
1062 &setlist, &showlist);
1063
1064 add_setshow_enum_cmd ("target-charset", class_support,
1065 charset_enum, &target_charset_name, _("\
1066 Set the target character set."), _("\
1067 Show the target character set."), _("\
1068 The `target character set' is the one used by the program being debugged.\n\
1069 GDB translates characters and strings between the host and target\n\
1070 character sets as needed.\n\
1071 To see a list of the character sets GDB supports, type `set target-charset'<TAB>"),
1072 set_target_charset_sfunc,
1073 show_target_charset_name,
1074 &setlist, &showlist);
1075
1076 add_setshow_enum_cmd ("target-wide-charset", class_support,
1077 charset_enum, &target_wide_charset_name,
1078 _("\
1079 Set the target wide character set."), _("\
1080 Show the target wide character set."), _("\
1081 The `target wide character set' is the one used by the program being debugged.\
1082 \nIn particular it is the encoding used by `wchar_t'.\n\
1083 GDB translates characters and strings between the host and target\n\
1084 character sets as needed.\n\
1085 To see a list of the character sets GDB supports, type\n\
1086 `set target-wide-charset'<TAB>"),
1087 set_target_wide_charset_sfunc,
1088 show_target_wide_charset_name,
1089 &setlist, &showlist);
1090 }