lex.c (java_new_lexer): Initialize out_first and out_last fields.

author Tom Tromey <tromey@cygnus.com>

Tue, 10 Oct 2000 22:02:12 +0000 (22:02 +0000)

committer Tom Tromey <tromey@gcc.gnu.org>

Tue, 10 Oct 2000 22:02:12 +0000 (22:02 +0000)
author Tom Tromey <tromey@cygnus.com>
Tue, 10 Oct 2000 22:02:12 +0000 (22:02 +0000)
committer Tom Tromey <tromey@gcc.gnu.org>
Tue, 10 Oct 2000 22:02:12 +0000 (22:02 +0000)
diff --git a/gcc/java/ChangeLog b/gcc/java/ChangeLog

index b63d7511bf0c4ab012110c19b9903bcc322e072a..99fa85432429dd7ad84d476cdf5875c739ed05f2 100644 (file)
--- a/gcc/java/ChangeLog
+++ b/gcc/java/ChangeLog
@@ -1,3 +1,9 @@
+2000-10-10  Tom Tromey  <tromey@cygnus.com>
+
+       * lex.c (java_new_lexer): Initialize out_first and out_last
+       fields.
+       * lex.h (java_lexer): Added out_buffer, out_first, out_last.
+
  2000-10-07  Alexandre Petit-Bianco  <apbianco@cygnus.com>
  
         Patch contributed by Corey Minyard.
diff --git a/gcc/java/lex.c b/gcc/java/lex.c

index be1b1392baecabdff7075d4dfbd1e918fa6499db..8fb6a15a6fd3f877271bddae6a827e8b88739188 100644 (file)
--- a/gcc/java/lex.c
+++ b/gcc/java/lex.c
@@ -219,6 +219,8 @@ java_new_lexer (finput, encoding)
      }
    lex->first = -1;
    lex->last = -1;
+  lex->out_first = -1;
+  lex->out_last = -1;
  #else /* HAVE_ICONV */
    if (strcmp (encoding, DEFAULT_ENCODING))
      enc_error = 1;
@@ -253,81 +255,99 @@ java_read_char (lex)
  
  #ifdef HAVE_ICONV
    {
-    char out[2];
-    size_t ir, inbytesleft, in_save, out_count;
+    size_t ir, inbytesleft, in_save, out_count, out_save;
      char *inp, *outp;
+    unicode_t result;
  
-    while (1)
+    /* If there is data which has already been converted, use it.  */
+    if (lex->out_first == -1 || lex->out_first >= lex->out_last)
        {
-       /* See if we need to read more data.  If FIRST == 0 then the
-          previous conversion attempt ended in the middle of a
-          character at the end of the buffer.  Otherwise we only have
-          to read if the buffer is empty.  */
-       if (lex->first == 0 || lex->first >= lex->last)
-         {
-           int r;
+       lex->out_first = 0;
+       lex->out_last = 0;
  
-           if (lex->first >= lex->last)
+       while (1)
+         {
+           /* See if we need to read more data.  If FIRST == 0 then
+              the previous conversion attempt ended in the middle of
+              a character at the end of the buffer.  Otherwise we
+              only have to read if the buffer is empty.  */
+           if (lex->first == 0 || lex->first >= lex->last)
               {
-               lex->first = 0;
-               lex->last = 0;
+               int r;
+
+               if (lex->first >= lex->last)
+                 {
+                   lex->first = 0;
+                   lex->last = 0;
+                 }
+               if (feof (lex->finput))
+                 return UEOF;
+               r = fread (&lex->buffer[lex->last], 1,
+                          sizeof (lex->buffer) - lex->last,
+                          lex->finput);
+               lex->last += r;
               }
-           if (feof (lex->finput))
-             return UEOF;
-           r = fread (&lex->buffer[lex->last], 1,
-                      sizeof (lex->buffer) - lex->last,
-                      lex->finput);
-           lex->last += r;
-         }
  
-       inbytesleft = lex->last - lex->first;
-
-       if (inbytesleft == 0)
-         {
-           /* We've tried to read and there is nothing left.  */
-           return UEOF;
-         }
+           inbytesleft = lex->last - lex->first;
+           out_count = sizeof (lex->out_buffer) - lex->out_last;
  
-       in_save = inbytesleft;
-       out_count = 2;
-       inp = &lex->buffer[lex->first];
-       outp = out;
-       ir = iconv (lex->handle, (const char **) &inp, &inbytesleft,
-                   &outp, &out_count);
-       lex->first += in_save - inbytesleft;
-
-       if (out_count == 0)
-         {
-           /* Success.  We assume that UCS-2 is big-endian.  This
-              appears to be an ok assumption.  */
-           unicode_t result;
-           result = (((unsigned char) out[0]) << 8) | (unsigned char) out[1];
-           return result;
-         }
-
-       if (ir == (size_t) -1)
-         {
-           if (errno == EINVAL)
+           if (inbytesleft == 0)
               {
-               /* This is ok.  This means that the end of our buffer
-                  is in the middle of a character sequence.  We just
-                  move the valid part of the buffer to the beginning
-                  to force a read.  */
-               /* We use bcopy() because it should work for
-                  overlapping strings.  Use memmove() instead... */
-               bcopy (&lex->buffer[lex->first], &lex->buffer[0],
-                      lex->last - lex->first);
-               lex->last -= lex->first;
-               lex->first = 0;
+               /* We've tried to read and there is nothing left.  */
+               return UEOF;
               }
-           else
+
+           in_save = inbytesleft;
+           out_save = out_count;
+           inp = &lex->buffer[lex->first];
+           outp = &lex->out_buffer[lex->out_last];
+           ir = iconv (lex->handle, (const char **) &inp, &inbytesleft,
+                       &outp, &out_count);
+           lex->first += in_save - inbytesleft;
+           lex->out_last += out_save - out_count;
+
+           /* If we converted anything at all, move along.  */
+           if (out_count != out_save)
+             break;
+
+           if (ir == (size_t) -1)
               {
-               /* A more serious error.  */
-               java_lex_error ("unrecognized character in input stream", 0);
-               return UEOF;
+               if (errno == EINVAL)
+                 {
+                   /* This is ok.  This means that the end of our buffer
+                      is in the middle of a character sequence.  We just
+                      move the valid part of the buffer to the beginning
+                      to force a read.  */
+                   /* We use bcopy() because it should work for
+                      overlapping strings.  Use memmove() instead... */
+                   bcopy (&lex->buffer[lex->first], &lex->buffer[0],
+                          lex->last - lex->first);
+                   lex->last -= lex->first;
+                   lex->first = 0;
+                 }
+               else
+                 {
+                   /* A more serious error.  */
+                   java_lex_error ("unrecognized character in input stream",
+                                   0);
+                   return UEOF;
+                 }
               }
           }
        }
+
+    if (lex->out_first == -1 || lex->out_first >= lex->out_last)
+      {
+       /* Don't have any data.  */
+       return UEOF;
+      }
+
+    /* Success.  We assume that UCS-2 is big-endian.  This appears to
+       be an ok assumption.  */
+    result = ((((unsigned char) lex->out_buffer[lex->out_first]) << 8)
+             | (unsigned char) lex->out_buffer[lex->out_first + 1]);
+    lex->out_first += 2;
+    return result;
    }
  #else /* HAVE_ICONV */
    {
diff --git a/gcc/java/lex.h b/gcc/java/lex.h

index 68cb1c0e5eb2bff16ca9fe2e6b376da3c5cd5fed..71a030d2fefd50a04f9fe5794260542d98b039de 100644 (file)
--- a/gcc/java/lex.h
+++ b/gcc/java/lex.h
@@ -128,6 +128,20 @@ typedef struct java_lexer
    /* Index of last valid character in buffer, plus one.  -1 if no
       valid characters in buffer.  */
    int last;
+
+  /* This is a buffer of characters already converted by iconv.  We
+     use `char' here because we're assuming that iconv() converts to
+     big-endian UCS-2, and then we convert it ourselves.  */
+  char out_buffer[1024];
+
+  /* Index of first valid output character.  -1 if no valid
+     characters.  */
+  int out_first;
+
+  /* Index of last valid output character, plus one.  -1 if no valid
+     characters.  */
+  int out_last;
+
  #endif /* HAVE_ICONV */
  } java_lexer;
author	Tom Tromey <tromey@cygnus.com>
	Tue, 10 Oct 2000 22:02:12 +0000 (22:02 +0000)
committer	Tom Tromey <tromey@gcc.gnu.org>
	Tue, 10 Oct 2000 22:02:12 +0000 (22:02 +0000)
gcc/java/ChangeLog		patch \| blob \| history
gcc/java/lex.c		patch \| blob \| history
gcc/java/lex.h		patch \| blob \| history