*** empty log message ***

[binutils-gdb.git] / gdb / c-lang.c
diff --git a/gdb/c-lang.c b/gdb/c-lang.c

index 067e42985afa60aa365b5232b35751840854aa5c..40c417256bf9f475b64f17079cdf9b0433aae647 100644 (file)
--- a/gdb/c-lang.c
+++ b/gdb/c-lang.c
@@ -1,7 +1,7 @@
  /* C language support routines for GDB, the GNU debugger.
  
     Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2002, 2003,
-   2004, 2005, 2007, 2008 Free Software Foundation, Inc.
+   2004, 2005, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
  
     This file is part of GDB.
  
@@ -33,48 +33,334 @@
  #include "demangle.h"
  #include "cp-abi.h"
  #include "cp-support.h"
+#include "gdb_obstack.h"
+#include <ctype.h>
  
  extern void _initialize_c_language (void);
-static void c_emit_char (int c, struct ui_file * stream, int quoter);
  
-/* Print the character C on STREAM as part of the contents of a literal
-   string whose delimiter is QUOTER.  Note that that format for printing
-   characters and strings is language specific. */
+/* Given a C string type, STR_TYPE, return the corresponding target
+   character set name.  */
  
-static void
-c_emit_char (int c, struct ui_file *stream, int quoter)
+static const char *
+charset_for_string_type (enum c_string_type str_type,
+                        struct gdbarch *gdbarch)
  {
-  const char *escape;
-  int host_char;
+  switch (str_type & ~C_CHAR)
+    {
+    case C_STRING:
+      return target_charset (gdbarch);
+    case C_WIDE_STRING:
+      return target_wide_charset (gdbarch);
+    case C_STRING_16:
+      /* FIXME: UTF-16 is not always correct.  */
+      if (gdbarch_byte_order (gdbarch) == BFD_ENDIAN_BIG)
+       return "UTF-16BE";
+      else
+       return "UTF-16LE";
+    case C_STRING_32:
+      /* FIXME: UTF-32 is not always correct.  */
+      if (gdbarch_byte_order (gdbarch) == BFD_ENDIAN_BIG)
+       return "UTF-32BE";
+      else
+       return "UTF-32LE";
+    }
+  internal_error (__FILE__, __LINE__, "unhandled c_string_type");
+}
  
-  c &= 0xFF;                   /* Avoid sign bit follies */
+/* Classify ELTTYPE according to what kind of character it is.  Return
+   the enum constant representing the character type.  Also set
+   *ENCODING to the name of the character set to use when converting
+   characters of this type in target BYTE_ORDER to the host character set.  */
+
+static enum c_string_type
+classify_type (struct type *elttype, struct gdbarch *gdbarch,
+              const char **encoding)
+{
+  enum c_string_type result;
  
-  escape = c_target_char_has_backslash_escape (c);
-  if (escape)
+  /* We loop because ELTTYPE may be a typedef, and we want to
+     successively peel each typedef until we reach a type we
+     understand.  We don't use CHECK_TYPEDEF because that will strip
+     all typedefs at once -- but in C, wchar_t is itself a typedef, so
+     that would do the wrong thing.  */
+  while (elttype)
      {
-      if (quoter == '"' && strcmp (escape, "0") == 0)
-       /* Print nulls embedded in double quoted strings as \000 to
-          prevent ambiguity.  */
-       fprintf_filtered (stream, "\\000");
+      char *name = TYPE_NAME (elttype);
+
+      if (TYPE_CODE (elttype) == TYPE_CODE_CHAR || !name)
+       {
+         result = C_CHAR;
+         goto done;
+       }
+
+      if (!strcmp (name, "wchar_t"))
+       {
+         result = C_WIDE_CHAR;
+         goto done;
+       }
+
+      if (!strcmp (name, "char16_t"))
+       {
+         result = C_CHAR_16;
+         goto done;
+       }
+
+      if (!strcmp (name, "char32_t"))
+       {
+         result = C_CHAR_32;
+         goto done;
+       }
+
+      if (TYPE_CODE (elttype) != TYPE_CODE_TYPEDEF)
+       break;
+
+      /* Call for side effects.  */
+      check_typedef (elttype);
+
+      if (TYPE_TARGET_TYPE (elttype))
+       elttype = TYPE_TARGET_TYPE (elttype);
        else
-       fprintf_filtered (stream, "\\%s", escape);
+       {
+         /* Perhaps check_typedef did not update the target type.  In
+            this case, force the lookup again and hope it works out.
+            It never will for C, but it might for C++.  */
+         CHECK_TYPEDEF (elttype);
+       }
      }
-  else if (target_char_to_host (c, &host_char)
-           && host_char_print_literally (host_char))
+
+  /* Punt.  */
+  result = C_CHAR;
+
+ done:
+  if (encoding)
+    *encoding = charset_for_string_type (result, gdbarch);
+
+  return result;
+}
+
+/* Return true if print_wchar can display W without resorting to a
+   numeric escape, false otherwise.  */
+
+static int
+wchar_printable (gdb_wchar_t w)
+{
+  return (gdb_iswprint (w)
+         || w == LCST ('\a') || w == LCST ('\b')
+         || w == LCST ('\f') || w == LCST ('\n')
+         || w == LCST ('\r') || w == LCST ('\t')
+         || w == LCST ('\v'));
+}
+
+/* A helper function that converts the contents of STRING to wide
+   characters and then appends them to OUTPUT.  */
+
+static void
+append_string_as_wide (const char *string, struct obstack *output)
+{
+  for (; *string; ++string)
+    {
+      gdb_wchar_t w = gdb_btowc (*string);
+      obstack_grow (output, &w, sizeof (gdb_wchar_t));
+    }
+}
+
+/* Print a wide character W to OUTPUT.  ORIG is a pointer to the
+   original (target) bytes representing the character, ORIG_LEN is the
+   number of valid bytes.  WIDTH is the number of bytes in a base
+   characters of the type.  OUTPUT is an obstack to which wide
+   characters are emitted.  QUOTER is a (narrow) character indicating
+   the style of quotes surrounding the character to be printed.
+   NEED_ESCAPE is an in/out flag which is used to track numeric
+   escapes across calls.  */
+
+static void
+print_wchar (gdb_wint_t w, const gdb_byte *orig, int orig_len,
+            int width, enum bfd_endian byte_order, struct obstack *output,
+            int quoter, int *need_escapep)
+{
+  int need_escape = *need_escapep;
+
+  *need_escapep = 0;
+  if (gdb_iswprint (w) && (!need_escape || (!gdb_iswdigit (w)
+                                           && w != LCST ('8')
+                                           && w != LCST ('9'))))
      {
-      if (host_char == '\\' || host_char == quoter)
-        fputs_filtered ("\\", stream);
-      fprintf_filtered (stream, "%c", host_char);
+      gdb_wchar_t wchar = w;
+
+      if (w == gdb_btowc (quoter) || w == LCST ('\\'))
+       obstack_grow_wstr (output, LCST ("\\"));
+      obstack_grow (output, &wchar, sizeof (gdb_wchar_t));
      }
    else
-    fprintf_filtered (stream, "\\%.3o", (unsigned int) c);
+    {
+      switch (w)
+       {
+       case LCST ('\a'):
+         obstack_grow_wstr (output, LCST ("\\a"));
+         break;
+       case LCST ('\b'):
+         obstack_grow_wstr (output, LCST ("\\b"));
+         break;
+       case LCST ('\f'):
+         obstack_grow_wstr (output, LCST ("\\f"));
+         break;
+       case LCST ('\n'):
+         obstack_grow_wstr (output, LCST ("\\n"));
+         break;
+       case LCST ('\r'):
+         obstack_grow_wstr (output, LCST ("\\r"));
+         break;
+       case LCST ('\t'):
+         obstack_grow_wstr (output, LCST ("\\t"));
+         break;
+       case LCST ('\v'):
+         obstack_grow_wstr (output, LCST ("\\v"));
+         break;
+       default:
+         {
+           int i;
+
+           for (i = 0; i + width <= orig_len; i += width)
+             {
+               char octal[30];
+               ULONGEST value;
+
+               value = extract_unsigned_integer (&orig[i], width, byte_order);
+               /* If the value fits in 3 octal digits, print it that
+                  way.  Otherwise, print it as a hex escape.  */
+               if (value <= 0777)
+                 sprintf (octal, "\\%.3o", (int) (value & 0777));
+               else
+                 sprintf (octal, "\\x%lx", (long) value);
+               append_string_as_wide (octal, output);
+             }
+           /* If we somehow have extra bytes, print them now.  */
+           while (i < orig_len)
+             {
+               char octal[5];
+
+               sprintf (octal, "\\%.3o", orig[i] & 0xff);
+               append_string_as_wide (octal, output);
+               ++i;
+             }
+
+           *need_escapep = 1;
+         }
+         break;
+       }
+    }
  }
  
+/* Print the character C on STREAM as part of the contents of a literal
+   string whose delimiter is QUOTER.  Note that that format for printing
+   characters and strings is language specific. */
+
  void
-c_printchar (int c, struct ui_file *stream)
+c_emit_char (int c, struct type *type,
+            struct ui_file *stream, int quoter)
  {
+  enum bfd_endian byte_order = gdbarch_byte_order (get_type_arch (type));
+  struct obstack wchar_buf, output;
+  struct cleanup *cleanups;
+  const char *encoding;
+  gdb_byte *buf;
+  struct wchar_iterator *iter;
+  int need_escape = 0;
+
+  classify_type (type, get_type_arch (type), &encoding);
+
+  buf = alloca (TYPE_LENGTH (type));
+  pack_long (buf, type, c);
+
+  iter = make_wchar_iterator (buf, TYPE_LENGTH (type), encoding,
+                             TYPE_LENGTH (type));
+  cleanups = make_cleanup_wchar_iterator (iter);
+
+  /* This holds the printable form of the wchar_t data.  */
+  obstack_init (&wchar_buf);
+  make_cleanup_obstack_free (&wchar_buf);
+
+  while (1)
+    {
+      int num_chars;
+      gdb_wchar_t *chars;
+      const gdb_byte *buf;
+      size_t buflen;
+      int print_escape = 1;
+      enum wchar_iterate_result result;
+
+      num_chars = wchar_iterate (iter, &result, &chars, &buf, &buflen);
+      if (num_chars < 0)
+       break;
+      if (num_chars > 0)
+       {
+         /* If all characters are printable, print them.  Otherwise,
+            we're going to have to print an escape sequence.  We
+            check all characters because we want to print the target
+            bytes in the escape sequence, and we don't know character
+            boundaries there.  */
+         int i;
+
+         print_escape = 0;
+         for (i = 0; i < num_chars; ++i)
+           if (!wchar_printable (chars[i]))
+             {
+               print_escape = 1;
+               break;
+             }
+
+         if (!print_escape)
+           {
+             for (i = 0; i < num_chars; ++i)
+               print_wchar (chars[i], buf, buflen, TYPE_LENGTH (type),
+                            byte_order, &wchar_buf, quoter, &need_escape);
+           }
+       }
+
+      /* This handles the NUM_CHARS == 0 case as well.  */
+      if (print_escape)
+       print_wchar (gdb_WEOF, buf, buflen, TYPE_LENGTH (type), byte_order,
+                    &wchar_buf, quoter, &need_escape);
+    }
+
+  /* The output in the host encoding.  */
+  obstack_init (&output);
+  make_cleanup_obstack_free (&output);
+
+  convert_between_encodings (INTERMEDIATE_ENCODING, host_charset (),
+                            obstack_base (&wchar_buf),
+                            obstack_object_size (&wchar_buf),
+                            1, &output, translit_char);
+  obstack_1grow (&output, '\0');
+
+  fputs_filtered (obstack_base (&output), stream);
+
+  do_cleanups (cleanups);
+}
+
+void
+c_printchar (int c, struct type *type, struct ui_file *stream)
+{
+  enum c_string_type str_type;
+
+  str_type = classify_type (type, get_type_arch (type), NULL);
+  switch (str_type)
+    {
+    case C_CHAR:
+      break;
+    case C_WIDE_CHAR:
+      fputc_filtered ('L', stream);
+      break;
+    case C_CHAR_16:
+      fputc_filtered ('u', stream);
+      break;
+    case C_CHAR_32:
+      fputc_filtered ('U', stream);
+      break;
+    }
+
    fputc_filtered ('\'', stream);
-  LA_EMIT_CHAR (c, stream, '\'');
+  LA_EMIT_CHAR (c, type, stream, '\'');
    fputc_filtered ('\'', stream);
  }
  
@@ -85,87 +371,217 @@ c_printchar (int c, struct ui_file *stream)
     printing LENGTH characters, or if FORCE_ELLIPSES.  */
  
  void
-c_printstr (struct ui_file *stream, const gdb_byte *string,
-           unsigned int length, int width, int force_ellipses,
+c_printstr (struct ui_file *stream, struct type *type, const gdb_byte *string,
+           unsigned int length, const char *user_encoding, int force_ellipses,
             const struct value_print_options *options)
  {
+  enum bfd_endian byte_order = gdbarch_byte_order (get_type_arch (type));
    unsigned int i;
    unsigned int things_printed = 0;
    int in_quotes = 0;
    int need_comma = 0;
+  int width = TYPE_LENGTH (type);
+  struct obstack wchar_buf, output;
+  struct cleanup *cleanup;
+  enum c_string_type str_type;
+  const char *type_encoding;
+  const char *encoding;
+  struct wchar_iterator *iter;
+  int finished = 0;
+  int need_escape = 0;
+
+  if (length == -1)
+    {
+      unsigned long current_char = 1;
+
+      for (i = 0; current_char; ++i)
+       {
+         QUIT;
+         current_char = extract_unsigned_integer (string + i * width,
+                                                  width, byte_order);
+       }
+      length = i;
+    }
  
    /* If the string was not truncated due to `set print elements', and
       the last byte of it is a null, we don't print that, in traditional C
       style.  */
    if (!force_ellipses
        && length > 0
-      && (extract_unsigned_integer (string + (length - 1) * width, width)
-          == '\0'))
+      && (extract_unsigned_integer (string + (length - 1) * width,
+                                   width, byte_order) == 0))
      length--;
  
+  str_type = (classify_type (type, get_type_arch (type), &type_encoding)
+             & ~C_CHAR);
+  switch (str_type)
+    {
+    case C_STRING:
+      break;
+    case C_WIDE_STRING:
+      fputs_filtered ("L", stream);
+      break;
+    case C_STRING_16:
+      fputs_filtered ("u", stream);
+      break;
+    case C_STRING_32:
+      fputs_filtered ("U", stream);
+      break;
+    }
+
+  encoding = (user_encoding && *user_encoding) ? user_encoding : type_encoding;
+
    if (length == 0)
      {
        fputs_filtered ("\"\"", stream);
        return;
      }
  
-  for (i = 0; i < length && things_printed < options->print_max; ++i)
+  /* Arrange to iterate over the characters, in wchar_t form.  */
+  iter = make_wchar_iterator (string, length * width, encoding, width);
+  cleanup = make_cleanup_wchar_iterator (iter);
+
+  /* WCHAR_BUF is the obstack we use to represent the string in
+     wchar_t form.  */
+  obstack_init (&wchar_buf);
+  make_cleanup_obstack_free (&wchar_buf);
+
+  while (!finished && things_printed < options->print_max)
      {
-      /* Position of the character we are examining
-         to see whether it is repeated.  */
-      unsigned int rep1;
-      /* Number of repetitions we have detected so far.  */
-      unsigned int reps;
-      unsigned long current_char;
+      int num_chars;
+      enum wchar_iterate_result result;
+      gdb_wchar_t *chars;
+      const gdb_byte *buf;
+      size_t buflen;
  
        QUIT;
  
        if (need_comma)
         {
-         fputs_filtered (", ", stream);
+         obstack_grow_wstr (&wchar_buf, LCST (", "));
           need_comma = 0;
         }
  
-      current_char = extract_unsigned_integer (string + i * width, width);
+      num_chars = wchar_iterate (iter, &result, &chars, &buf, &buflen);
+      /* We only look at repetitions when we were able to convert a
+        single character in isolation.  This makes the code simpler
+        and probably does the sensible thing in the majority of
+        cases.  */
+      while (num_chars == 1 && things_printed < options->print_max)
+       {
+         /* Count the number of repetitions.  */
+         unsigned int reps = 0;
+         gdb_wchar_t current_char = chars[0];
+         const gdb_byte *orig_buf = buf;
+         int orig_len = buflen;
+
+         if (need_comma)
+           {
+             obstack_grow_wstr (&wchar_buf, LCST (", "));
+             need_comma = 0;
+           }
  
-      rep1 = i + 1;
-      reps = 1;
-      while (rep1 < length
-            && extract_unsigned_integer (string + rep1 * width, width)
-            == current_char)
+         while (num_chars == 1 && current_char == chars[0])
+           {
+             num_chars = wchar_iterate (iter, &result, &chars, &buf, &buflen);
+             ++reps;
+           }
+
+         /* Emit CURRENT_CHAR according to the repetition count and
+            options.  */
+         if (reps > options->repeat_count_threshold)
+           {
+             if (in_quotes)
+               {
+                 if (options->inspect_it)
+                   obstack_grow_wstr (&wchar_buf, LCST ("\\\", "));
+                 else
+                   obstack_grow_wstr (&wchar_buf, LCST ("\", "));
+                 in_quotes = 0;
+               }
+             obstack_grow_wstr (&wchar_buf, LCST ("'"));
+             need_escape = 0;
+             print_wchar (current_char, orig_buf, orig_len, width,
+                          byte_order, &wchar_buf, '\'', &need_escape);
+             obstack_grow_wstr (&wchar_buf, LCST ("'"));
+             {
+               /* Painful gyrations.  */
+               int j;
+               char *s = xstrprintf (_(" <repeats %u times>"), reps);
+
+               for (j = 0; s[j]; ++j)
+                 {
+                   gdb_wchar_t w = gdb_btowc (s[j]);
+                   obstack_grow (&wchar_buf, &w, sizeof (gdb_wchar_t));
+                 }
+               xfree (s);
+             }
+             things_printed += options->repeat_count_threshold;
+             need_comma = 1;
+           }
+         else
+           {
+             /* Saw the character one or more times, but fewer than
+                the repetition threshold.  */
+             if (!in_quotes)
+               {
+                 if (options->inspect_it)
+                   obstack_grow_wstr (&wchar_buf, LCST ("\\\""));
+                 else
+                   obstack_grow_wstr (&wchar_buf, LCST ("\""));
+                 in_quotes = 1;
+                 need_escape = 0;
+               }
+
+             while (reps-- > 0)
+               {
+                 print_wchar (current_char, orig_buf, orig_len, width,
+                              byte_order, &wchar_buf, '"', &need_escape);
+                 ++things_printed;
+               }
+           }
+       }
+
+      /* NUM_CHARS and the other outputs from wchar_iterate are valid
+        here regardless of which branch was taken above.  */
+      if (num_chars < 0)
         {
-         ++rep1;
-         ++reps;
+         /* Hit EOF.  */
+         finished = 1;
+         break;
         }
  
-      if (reps > options->repeat_count_threshold)
+      switch (result)
         {
-         if (in_quotes)
+       case wchar_iterate_invalid:
+         if (!in_quotes)
             {
               if (options->inspect_it)
-               fputs_filtered ("\\\", ", stream);
+               obstack_grow_wstr (&wchar_buf, LCST ("\\\""));
               else
-               fputs_filtered ("\", ", stream);
-             in_quotes = 0;
+               obstack_grow_wstr (&wchar_buf, LCST ("\""));
+             in_quotes = 1;
             }
-         LA_PRINT_CHAR (current_char, stream);
-         fprintf_filtered (stream, _(" <repeats %u times>"), reps);
-         i = rep1 - 1;
-         things_printed += options->repeat_count_threshold;
-         need_comma = 1;
-       }
-      else
-       {
-         if (!in_quotes)
+         need_escape = 0;
+         print_wchar (gdb_WEOF, buf, buflen, width, byte_order, &wchar_buf,
+                      '"', &need_escape);
+         break;
+
+       case wchar_iterate_incomplete:
+         if (in_quotes)
             {
               if (options->inspect_it)
-               fputs_filtered ("\\\"", stream);
+               obstack_grow_wstr (&wchar_buf, LCST ("\\\","));
               else
-               fputs_filtered ("\"", stream);
-             in_quotes = 1;
+               obstack_grow_wstr (&wchar_buf, LCST ("\","));
+             in_quotes = 0;
             }
-         LA_EMIT_CHAR (current_char, stream, '"');
-         ++things_printed;
+         obstack_grow_wstr (&wchar_buf, LCST (" <incomplete sequence "));
+         print_wchar (gdb_WEOF, buf, buflen, width, byte_order, &wchar_buf,
+                      0, &need_escape);
+         obstack_grow_wstr (&wchar_buf, LCST (">"));
+         finished = 1;
+         break;
         }
      }
  
@@ -173,120 +589,462 @@ c_printstr (struct ui_file *stream, const gdb_byte *string,
    if (in_quotes)
      {
        if (options->inspect_it)
-       fputs_filtered ("\\\"", stream);
+       obstack_grow_wstr (&wchar_buf, LCST ("\\\""));
        else
-       fputs_filtered ("\"", stream);
+       obstack_grow_wstr (&wchar_buf, LCST ("\""));
      }
  
-  if (force_ellipses || i < length)
-    fputs_filtered ("...", stream);
+  if (force_ellipses || !finished)
+    obstack_grow_wstr (&wchar_buf, LCST ("..."));
+
+  /* OUTPUT is where we collect `char's for printing.  */
+  obstack_init (&output);
+  make_cleanup_obstack_free (&output);
+
+  convert_between_encodings (INTERMEDIATE_ENCODING, host_charset (),
+                            obstack_base (&wchar_buf),
+                            obstack_object_size (&wchar_buf),
+                            1, &output, translit_char);
+  obstack_1grow (&output, '\0');
+
+  fputs_filtered (obstack_base (&output), stream);
+
+  do_cleanups (cleanup);
  }
+
+/* Obtain a C string from the inferior storing it in a newly allocated
+   buffer in BUFFER, which should be freed by the caller.   If the
+   in- and out-parameter *LENGTH is specified at -1, the string is read
+   until a null character of the appropriate width is found, otherwise
+   the string is read to the length of characters specified.
+   The size of a character is determined by the length of the target
+   type of the pointer or  array.  If VALUE is an array with a known
+   length, the function will  not read past the end of the array.
+   On completion, *LENGTH will be set to the size of the string read in
+   characters.  (If a length of -1 is specified, the length returned
+   will not include the null character).  CHARSET is always set to the
+   target charset.  */
+
+void
+c_get_string (struct value *value, gdb_byte **buffer, int *length,
+             struct type **char_type, const char **charset)
+{
+  int err, width;
+  unsigned int fetchlimit;
+  struct type *type = check_typedef (value_type (value));
+  struct type *element_type = TYPE_TARGET_TYPE (type);
+  int req_length = *length;
+  enum bfd_endian byte_order = gdbarch_byte_order (get_type_arch (type));
+  enum c_string_type kind;
+
+  if (element_type == NULL)
+    goto error;
+
+  if (TYPE_CODE (type) == TYPE_CODE_ARRAY)
+    {
+      /* If we know the size of the array, we can use it as a limit on the
+        number of characters to be fetched.  */
+      if (TYPE_NFIELDS (type) == 1
+         && TYPE_CODE (TYPE_FIELD_TYPE (type, 0)) == TYPE_CODE_RANGE)
+       {
+         LONGEST low_bound, high_bound;
+
+         get_discrete_bounds (TYPE_FIELD_TYPE (type, 0),
+                              &low_bound, &high_bound);
+         fetchlimit = high_bound - low_bound + 1;
+       }
+      else
+       fetchlimit = UINT_MAX;
+    }
+  else if (TYPE_CODE (type) == TYPE_CODE_PTR)
+    fetchlimit = UINT_MAX;
+  else
+    /* We work only with arrays and pointers.  */
+    goto error;
+
+  if (! c_textual_element_type (element_type, 0))
+    goto error;
+  kind = classify_type (element_type,
+                       get_type_arch (element_type),
+                       charset);
+  width = TYPE_LENGTH (element_type);
+
+  /* If the string lives in GDB's memory instead of the inferior's, then we
+     just need to copy it to BUFFER.  Also, since such strings are arrays
+     with known size, FETCHLIMIT will hold the size of the array.  */
+  if ((VALUE_LVAL (value) == not_lval
+       || VALUE_LVAL (value) == lval_internalvar)
+      && fetchlimit != UINT_MAX)
+    {
+      int i;
+      const gdb_byte *contents = value_contents (value);
+
+      /* If a length is specified, use that.  */
+      if (*length >= 0)
+       i  = *length;
+      else
+       /* Otherwise, look for a null character.  */
+       for (i = 0; i < fetchlimit; i++)
+         if (extract_unsigned_integer (contents + i * width, width,
+                                       byte_order) == 0)
+           break;
+  
+      /* I is now either a user-defined length, the number of non-null
+        characters, or FETCHLIMIT.  */
+      *length = i * width;
+      *buffer = xmalloc (*length);
+      memcpy (*buffer, contents, *length);
+      err = 0;
+    }
+  else
+    {
+      err = read_string (value_as_address (value), *length, width, fetchlimit,
+                        byte_order, buffer, length);
+      if (err)
+       {
+         xfree (*buffer);
+         error (_("Error reading string from inferior: %s"),
+                safe_strerror (err));
+       }
+    }
+
+  /* If the LENGTH is specified at -1, we want to return the string
+     length up to the terminating null character.  If an actual length
+     was specified, we want to return the length of exactly what was
+     read.  */
+  if (req_length == -1)
+    /* If the last character is null, subtract it from LENGTH.  */
+    if (*length > 0
+       && extract_unsigned_integer (*buffer + *length - width, width,
+                                    byte_order) == 0)
+      *length -= width;
+  
+  /* The read_string function will return the number of bytes read.
+     If length returned from read_string was > 0, return the number of
+     characters read by dividing the number of bytes by width.  */
+  if (*length != 0)
+     *length = *length / width;
+
+  *char_type = element_type;
+
+  return;
+
+ error:
+  {
+    char *type_str;
+
+    type_str = type_to_string (type);
+    if (type_str)
+      {
+       make_cleanup (xfree, type_str);
+       error (_("Trying to read string with inappropriate type `%s'."),
+              type_str);
+      }
+    else
+      error (_("Trying to read string with inappropriate type."));
+  }
+}
+
  \f
-/* Preprocessing and parsing C and C++ expressions.  */
+/* Evaluating C and C++ expressions.  */
+
+/* Convert a UCN.  The digits of the UCN start at P and extend no
+   farther than LIMIT.  DEST_CHARSET is the name of the character set
+   into which the UCN should be converted.  The results are written to
+   OUTPUT.  LENGTH is the maximum length of the UCN, either 4 or 8.
+   Returns a pointer to just after the final digit of the UCN.  */
  
+static char *
+convert_ucn (char *p, char *limit, const char *dest_charset,
+            struct obstack *output, int length)
+{
+  unsigned long result = 0;
+  gdb_byte data[4];
+  int i;
  
-/* When we find that lexptr (the global var defined in parse.c) is
-   pointing at a macro invocation, we expand the invocation, and call
-   scan_macro_expansion to save the old lexptr here and point lexptr
-   into the expanded text.  When we reach the end of that, we call
-   end_macro_expansion to pop back to the value we saved here.  The
-   macro expansion code promises to return only fully-expanded text,
-   so we don't need to "push" more than one level.
+  for (i = 0; i < length && p < limit && isxdigit (*p); ++i, ++p)
+    result = (result << 4) + host_hex_value (*p);
  
-   This is disgusting, of course.  It would be cleaner to do all macro
-   expansion beforehand, and then hand that to lexptr.  But we don't
-   really know where the expression ends.  Remember, in a command like
+  for (i = 3; i >= 0; --i)
+    {
+      data[i] = result & 0xff;
+      result >>= 8;
+    }
  
-     (gdb) break *ADDRESS if CONDITION
+  convert_between_encodings ("UTF-32BE", dest_charset, data, 4, 4, output,
+                            translit_none);
  
-   we evaluate ADDRESS in the scope of the current frame, but we
-   evaluate CONDITION in the scope of the breakpoint's location.  So
-   it's simply wrong to try to macro-expand the whole thing at once.  */
-static char *macro_original_text;
-static char *macro_expanded_text;
+  return p;
+}
  
+/* Emit a character, VALUE, which was specified numerically, to
+   OUTPUT.  TYPE is the target character type.  */
  
-void
-scan_macro_expansion (char *expansion)
+static void
+emit_numeric_character (struct type *type, unsigned long value,
+                       struct obstack *output)
+{
+  gdb_byte *buffer;
+
+  buffer = alloca (TYPE_LENGTH (type));
+  pack_long (buffer, type, value);
+  obstack_grow (output, buffer, TYPE_LENGTH (type));
+}
+
+/* Convert an octal escape sequence.  TYPE is the target character
+   type.  The digits of the escape sequence begin at P and extend no
+   farther than LIMIT.  The result is written to OUTPUT.  Returns a
+   pointer to just after the final digit of the escape sequence.  */
+
+static char *
+convert_octal (struct type *type, char *p, char *limit, struct obstack *output)
  {
-  /* We'd better not be trying to push the stack twice.  */
-  gdb_assert (! macro_original_text);
-  gdb_assert (! macro_expanded_text);
+  int i;
+  unsigned long value = 0;
+
+  for (i = 0;
+       i < 3 && p < limit && isdigit (*p) && *p != '8' && *p != '9';
+       ++i)
+    {
+      value = 8 * value + host_hex_value (*p);
+      ++p;
+    }
  
-  /* Save the old lexptr value, so we can return to it when we're done
-     parsing the expanded text.  */
-  macro_original_text = lexptr;
-  lexptr = expansion;
+  emit_numeric_character (type, value, output);
  
-  /* Save the expanded text, so we can free it when we're finished.  */
-  macro_expanded_text = expansion;
+  return p;
  }
  
+/* Convert a hex escape sequence.  TYPE is the target character type.
+   The digits of the escape sequence begin at P and extend no farther
+   than LIMIT.  The result is written to OUTPUT.  Returns a pointer to
+   just after the final digit of the escape sequence.  */
  
-int
-scanning_macro_expansion (void)
+static char *
+convert_hex (struct type *type, char *p, char *limit, struct obstack *output)
  {
-  return macro_original_text != 0;
+  unsigned long value = 0;
+
+  while (p < limit && isxdigit (*p))
+    {
+      value = 16 * value + host_hex_value (*p);
+      ++p;
+    }
+
+  emit_numeric_character (type, value, output);
+
+  return p;
  }
  
+#define ADVANCE                                        \
+  do {                                         \
+    ++p;                                       \
+    if (p == limit)                            \
+      error (_("Malformed escape sequence"));  \
+  } while (0)
+
+/* Convert an escape sequence to a target format.  TYPE is the target
+   character type to use, and DEST_CHARSET is the name of the target
+   character set.  The backslash of the escape sequence is at *P, and
+   the escape sequence will not extend past LIMIT.  The results are
+   written to OUTPUT.  Returns a pointer to just past the final
+   character of the escape sequence.  */
  
-void 
-finished_macro_expansion (void)
+static char *
+convert_escape (struct type *type, const char *dest_charset,
+               char *p, char *limit, struct obstack *output)
  {
-  /* There'd better be something to pop back to, and we better have
-     saved a pointer to the start of the expanded text.  */
-  gdb_assert (macro_original_text);
-  gdb_assert (macro_expanded_text);
-
-  /* Pop back to the original text.  */
-  lexptr = macro_original_text;
-  macro_original_text = 0;
-
-  /* Free the expanded text.  */
-  xfree (macro_expanded_text);
-  macro_expanded_text = 0;
+  /* Skip the backslash.  */
+  ADVANCE;
+
+  switch (*p)
+    {
+    case '\\':
+      obstack_1grow (output, '\\');
+      ++p;
+      break;
+
+    case 'x':
+      ADVANCE;
+      if (!isxdigit (*p))
+       error (_("\\x used with no following hex digits."));
+      p = convert_hex (type, p, limit, output);
+      break;
+
+    case '0':
+    case '1':
+    case '2':
+    case '3':
+    case '4':
+    case '5':
+    case '6':
+    case '7':
+      p = convert_octal (type, p, limit, output);
+      break;
+
+    case 'u':
+    case 'U':
+      {
+       int length = *p == 'u' ? 4 : 8;
+
+       ADVANCE;
+       if (!isxdigit (*p))
+         error (_("\\u used with no following hex digits"));
+       p = convert_ucn (p, limit, dest_charset, output, length);
+      }
+    }
+
+  return p;
  }
  
+/* Given a single string from a (C-specific) OP_STRING list, convert
+   it to a target string, handling escape sequences specially.  The
+   output is written to OUTPUT.  DATA is the input string, which has
+   length LEN.  DEST_CHARSET is the name of the target character set,
+   and TYPE is the type of target character to use.  */
  
  static void
-scan_macro_cleanup (void *dummy)
+parse_one_string (struct obstack *output, char *data, int len,
+                 const char *dest_charset, struct type *type)
  {
-  if (macro_original_text)
-    finished_macro_expansion ();
-}
+  char *limit;
  
+  limit = data + len;
  
-/* We set these global variables before calling c_parse, to tell it
-   how it to find macro definitions for the expression at hand.  */
-macro_lookup_ftype *expression_macro_lookup_func;
-void *expression_macro_lookup_baton;
+  while (data < limit)
+    {
+      char *p = data;
  
+      /* Look for next escape, or the end of the input.  */
+      while (p < limit && *p != '\\')
+       ++p;
+      /* If we saw a run of characters, convert them all.  */
+      if (p > data)
+       convert_between_encodings (host_charset (), dest_charset,
+                                  data, p - data, 1, output, translit_none);
+      /* If we saw an escape, convert it.  */
+      if (p < limit)
+       p = convert_escape (type, dest_charset, p, limit, output);
+      data = p;
+    }
+}
  
-static int
-c_preprocess_and_parse (void)
+/* Expression evaluator for the C language family.  Most operations
+   are delegated to evaluate_subexp_standard; see that function for a
+   description of the arguments.  */
+
+static struct value *
+evaluate_subexp_c (struct type *expect_type, struct expression *exp,
+                  int *pos, enum noside noside)
  {
-  /* Set up a lookup function for the macro expander.  */
-  struct macro_scope *scope = 0;
-  struct cleanup *back_to = make_cleanup (free_current_contents, &scope);
+  enum exp_opcode op = exp->elts[*pos].opcode;
  
-  if (expression_context_block)
-    scope = sal_macro_scope (find_pc_line (expression_context_pc, 0));
-  else
-    scope = default_macro_scope ();
-  if (! scope)
-    scope = user_macro_scope ();
+  switch (op)
+    {
+    case OP_STRING:
+      {
+       int oplen, limit;
+       struct type *type;
+       struct obstack output;
+       struct cleanup *cleanup;
+       struct value *result;
+       enum c_string_type dest_type;
+       const char *dest_charset;
  
-  expression_macro_lookup_func = standard_macro_lookup;
-  expression_macro_lookup_baton = (void *) scope;
+       obstack_init (&output);
+       cleanup = make_cleanup_obstack_free (&output);
  
-  gdb_assert (! macro_original_text);
-  make_cleanup (scan_macro_cleanup, 0);
+       ++*pos;
+       oplen = longest_to_int (exp->elts[*pos].longconst);
  
-  {
-    int result = c_parse ();
-    do_cleanups (back_to);
-    return result;
-  }
+       ++*pos;
+       limit = *pos + BYTES_TO_EXP_ELEM (oplen + 1);
+       dest_type
+         = (enum c_string_type) longest_to_int (exp->elts[*pos].longconst);
+       switch (dest_type & ~C_CHAR)
+         {
+         case C_STRING:
+           type = language_string_char_type (exp->language_defn,
+                                             exp->gdbarch);
+           break;
+         case C_WIDE_STRING:
+           type = lookup_typename (exp->language_defn, exp->gdbarch,
+                                   "wchar_t", NULL, 0);
+           break;
+         case C_STRING_16:
+           type = lookup_typename (exp->language_defn, exp->gdbarch,
+                                   "char16_t", NULL, 0);
+           break;
+         case C_STRING_32:
+           type = lookup_typename (exp->language_defn, exp->gdbarch,
+                                   "char32_t", NULL, 0);
+           break;
+         default:
+           internal_error (__FILE__, __LINE__, "unhandled c_string_type");
+         }
+
+       /* Ensure TYPE_LENGTH is valid for TYPE.  */
+       check_typedef (type);
+
+       dest_charset = charset_for_string_type (dest_type, exp->gdbarch);
+
+       ++*pos;
+       while (*pos < limit)
+         {
+           int len;
+
+           len = longest_to_int (exp->elts[*pos].longconst);
+
+           ++*pos;
+           if (noside != EVAL_SKIP)
+             parse_one_string (&output, &exp->elts[*pos].string, len,
+                               dest_charset, type);
+           *pos += BYTES_TO_EXP_ELEM (len);
+         }
+
+       /* Skip the trailing length and opcode.  */
+       *pos += 2;
+
+       if (noside == EVAL_SKIP)
+         {
+           /* Return a dummy value of the appropriate type.  */
+           if ((dest_type & C_CHAR) != 0)
+             result = allocate_value (type);
+           else
+             result = value_cstring ("", 0, type);
+           do_cleanups (cleanup);
+           return result;
+         }
+
+       if ((dest_type & C_CHAR) != 0)
+         {
+           LONGEST value;
+
+           if (obstack_object_size (&output) != TYPE_LENGTH (type))
+             error (_("Could not convert character constant to target character set"));
+           value = unpack_long (type, obstack_base (&output));
+           result = value_from_longest (type, value);
+         }
+       else
+         {
+           int i;
+
+           /* Write the terminating character.  */
+           for (i = 0; i < TYPE_LENGTH (type); ++i)
+             obstack_1grow (&output, 0);
+           result = value_cstring (obstack_base (&output),
+                                   obstack_object_size (&output),
+                                   type);
+         }
+       do_cleanups (cleanup);
+       return result;
+      }
+      break;
+
+    default:
+      break;
+    }
+  return evaluate_subexp_standard (expect_type, exp, pos, noside);
  }
  
  
@@ -357,6 +1115,7 @@ c_language_arch_info (struct gdbarch *gdbarch,
                       struct language_arch_info *lai)
  {
    const struct builtin_type *builtin = builtin_type (gdbarch);
+
    lai->string_char_type = builtin->builtin_char;
    lai->primitive_type_vector
      = GDBARCH_OBSTACK_CALLOC (gdbarch, nr_c_primitive_types + 1,
@@ -385,6 +1144,16 @@ c_language_arch_info (struct gdbarch *gdbarch,
    lai->bool_type_default = builtin->builtin_int;
  }
  
+const struct exp_descriptor exp_descriptor_c = 
+{
+  print_subexp_standard,
+  operator_length_standard,
+  operator_check_standard,
+  op_name_standard,
+  dump_subexp_body_standard,
+  evaluate_subexp_c
+};
+
  const struct language_defn c_language_defn =
  {
    "c",                         /* Language name */
@@ -394,8 +1163,8 @@ const struct language_defn c_language_defn =
    case_sensitive_on,
    array_row_major,
    macro_expansion_c,
-  &exp_descriptor_standard,
-  c_preprocess_and_parse,
+  &exp_descriptor_c,
+  c_parse,
    c_error,
    null_post_parser,
    c_printchar,                 /* Print a character constant */
@@ -419,6 +1188,7 @@ const struct language_defn c_language_defn =
    c_language_arch_info,
    default_print_array_index,
    default_pass_by_reference,
+  c_get_string,
    LANG_MAGIC
  };
  
@@ -452,6 +1222,7 @@ cplus_language_arch_info (struct gdbarch *gdbarch,
                           struct language_arch_info *lai)
  {
    const struct builtin_type *builtin = builtin_type (gdbarch);
+
    lai->string_char_type = builtin->builtin_char;
    lai->primitive_type_vector
      = GDBARCH_OBSTACK_CALLOC (gdbarch, nr_cplus_primitive_types + 1,
@@ -512,8 +1283,8 @@ const struct language_defn cplus_language_defn =
    case_sensitive_on,
    array_row_major,
    macro_expansion_c,
-  &exp_descriptor_standard,
-  c_preprocess_and_parse,
+  &exp_descriptor_c,
+  c_parse,
    c_error,
    null_post_parser,
    c_printchar,                 /* Print a character constant */
@@ -537,6 +1308,7 @@ const struct language_defn cplus_language_defn =
    cplus_language_arch_info,
    default_print_array_index,
    cp_pass_by_reference,
+  c_get_string,
    LANG_MAGIC
  };
  
@@ -549,8 +1321,8 @@ const struct language_defn asm_language_defn =
    case_sensitive_on,
    array_row_major,
    macro_expansion_c,
-  &exp_descriptor_standard,
-  c_preprocess_and_parse,
+  &exp_descriptor_c,
+  c_parse,
    c_error,
    null_post_parser,
    c_printchar,                 /* Print a character constant */
@@ -574,6 +1346,7 @@ const struct language_defn asm_language_defn =
    c_language_arch_info, /* FIXME: la_language_arch_info.  */
    default_print_array_index,
    default_pass_by_reference,
+  c_get_string,
    LANG_MAGIC
  };
  
@@ -591,8 +1364,8 @@ const struct language_defn minimal_language_defn =
    case_sensitive_on,
    array_row_major,
    macro_expansion_c,
-  &exp_descriptor_standard,
-  c_preprocess_and_parse,
+  &exp_descriptor_c,
+  c_parse,
    c_error,
    null_post_parser,
    c_printchar,                 /* Print a character constant */
@@ -616,6 +1389,7 @@ const struct language_defn minimal_language_defn =
    c_language_arch_info,
    default_print_array_index,
    default_pass_by_reference,
+  c_get_string,
    LANG_MAGIC
  };