lex.c (java_new_lexer): Initialize out_first and out_last fields.
authorTom Tromey <tromey@cygnus.com>
Tue, 10 Oct 2000 22:02:12 +0000 (22:02 +0000)
committerTom Tromey <tromey@gcc.gnu.org>
Tue, 10 Oct 2000 22:02:12 +0000 (22:02 +0000)
* lex.c (java_new_lexer): Initialize out_first and out_last
fields.
* lex.h (java_lexer): Added out_buffer, out_first, out_last.

From-SVN: r36830

gcc/java/ChangeLog
gcc/java/lex.c
gcc/java/lex.h

index b63d7511bf0c4ab012110c19b9903bcc322e072a..99fa85432429dd7ad84d476cdf5875c739ed05f2 100644 (file)
@@ -1,3 +1,9 @@
+2000-10-10  Tom Tromey  <tromey@cygnus.com>
+
+       * lex.c (java_new_lexer): Initialize out_first and out_last
+       fields.
+       * lex.h (java_lexer): Added out_buffer, out_first, out_last.
+
 2000-10-07  Alexandre Petit-Bianco  <apbianco@cygnus.com>
 
        Patch contributed by Corey Minyard.
index be1b1392baecabdff7075d4dfbd1e918fa6499db..8fb6a15a6fd3f877271bddae6a827e8b88739188 100644 (file)
@@ -219,6 +219,8 @@ java_new_lexer (finput, encoding)
     }
   lex->first = -1;
   lex->last = -1;
+  lex->out_first = -1;
+  lex->out_last = -1;
 #else /* HAVE_ICONV */
   if (strcmp (encoding, DEFAULT_ENCODING))
     enc_error = 1;
@@ -253,81 +255,99 @@ java_read_char (lex)
 
 #ifdef HAVE_ICONV
   {
-    char out[2];
-    size_t ir, inbytesleft, in_save, out_count;
+    size_t ir, inbytesleft, in_save, out_count, out_save;
     char *inp, *outp;
+    unicode_t result;
 
-    while (1)
+    /* If there is data which has already been converted, use it.  */
+    if (lex->out_first == -1 || lex->out_first >= lex->out_last)
       {
-       /* See if we need to read more data.  If FIRST == 0 then the
-          previous conversion attempt ended in the middle of a
-          character at the end of the buffer.  Otherwise we only have
-          to read if the buffer is empty.  */
-       if (lex->first == 0 || lex->first >= lex->last)
-         {
-           int r;
+       lex->out_first = 0;
+       lex->out_last = 0;
 
-           if (lex->first >= lex->last)
+       while (1)
+         {
+           /* See if we need to read more data.  If FIRST == 0 then
+              the previous conversion attempt ended in the middle of
+              a character at the end of the buffer.  Otherwise we
+              only have to read if the buffer is empty.  */
+           if (lex->first == 0 || lex->first >= lex->last)
              {
-               lex->first = 0;
-               lex->last = 0;
+               int r;
+
+               if (lex->first >= lex->last)
+                 {
+                   lex->first = 0;
+                   lex->last = 0;
+                 }
+               if (feof (lex->finput))
+                 return UEOF;
+               r = fread (&lex->buffer[lex->last], 1,
+                          sizeof (lex->buffer) - lex->last,
+                          lex->finput);
+               lex->last += r;
              }
-           if (feof (lex->finput))
-             return UEOF;
-           r = fread (&lex->buffer[lex->last], 1,
-                      sizeof (lex->buffer) - lex->last,
-                      lex->finput);
-           lex->last += r;
-         }
 
-       inbytesleft = lex->last - lex->first;
-
-       if (inbytesleft == 0)
-         {
-           /* We've tried to read and there is nothing left.  */
-           return UEOF;
-         }
+           inbytesleft = lex->last - lex->first;
+           out_count = sizeof (lex->out_buffer) - lex->out_last;
 
-       in_save = inbytesleft;
-       out_count = 2;
-       inp = &lex->buffer[lex->first];
-       outp = out;
-       ir = iconv (lex->handle, (const char **) &inp, &inbytesleft,
-                   &outp, &out_count);
-       lex->first += in_save - inbytesleft;
-
-       if (out_count == 0)
-         {
-           /* Success.  We assume that UCS-2 is big-endian.  This
-              appears to be an ok assumption.  */
-           unicode_t result;
-           result = (((unsigned char) out[0]) << 8) | (unsigned char) out[1];
-           return result;
-         }
-
-       if (ir == (size_t) -1)
-         {
-           if (errno == EINVAL)
+           if (inbytesleft == 0)
              {
-               /* This is ok.  This means that the end of our buffer
-                  is in the middle of a character sequence.  We just
-                  move the valid part of the buffer to the beginning
-                  to force a read.  */
-               /* We use bcopy() because it should work for
-                  overlapping strings.  Use memmove() instead... */
-               bcopy (&lex->buffer[lex->first], &lex->buffer[0],
-                      lex->last - lex->first);
-               lex->last -= lex->first;
-               lex->first = 0;
+               /* We've tried to read and there is nothing left.  */
+               return UEOF;
              }
-           else
+
+           in_save = inbytesleft;
+           out_save = out_count;
+           inp = &lex->buffer[lex->first];
+           outp = &lex->out_buffer[lex->out_last];
+           ir = iconv (lex->handle, (const char **) &inp, &inbytesleft,
+                       &outp, &out_count);
+           lex->first += in_save - inbytesleft;
+           lex->out_last += out_save - out_count;
+
+           /* If we converted anything at all, move along.  */
+           if (out_count != out_save)
+             break;
+
+           if (ir == (size_t) -1)
              {
-               /* A more serious error.  */
-               java_lex_error ("unrecognized character in input stream", 0);
-               return UEOF;
+               if (errno == EINVAL)
+                 {
+                   /* This is ok.  This means that the end of our buffer
+                      is in the middle of a character sequence.  We just
+                      move the valid part of the buffer to the beginning
+                      to force a read.  */
+                   /* We use bcopy() because it should work for
+                      overlapping strings.  Use memmove() instead... */
+                   bcopy (&lex->buffer[lex->first], &lex->buffer[0],
+                          lex->last - lex->first);
+                   lex->last -= lex->first;
+                   lex->first = 0;
+                 }
+               else
+                 {
+                   /* A more serious error.  */
+                   java_lex_error ("unrecognized character in input stream",
+                                   0);
+                   return UEOF;
+                 }
              }
          }
       }
+
+    if (lex->out_first == -1 || lex->out_first >= lex->out_last)
+      {
+       /* Don't have any data.  */
+       return UEOF;
+      }
+
+    /* Success.  We assume that UCS-2 is big-endian.  This appears to
+       be an ok assumption.  */
+    result = ((((unsigned char) lex->out_buffer[lex->out_first]) << 8)
+             | (unsigned char) lex->out_buffer[lex->out_first + 1]);
+    lex->out_first += 2;
+    return result;
   }
 #else /* HAVE_ICONV */
   {
index 68cb1c0e5eb2bff16ca9fe2e6b376da3c5cd5fed..71a030d2fefd50a04f9fe5794260542d98b039de 100644 (file)
@@ -128,6 +128,20 @@ typedef struct java_lexer
   /* Index of last valid character in buffer, plus one.  -1 if no
      valid characters in buffer.  */
   int last;
+
+  /* This is a buffer of characters already converted by iconv.  We
+     use `char' here because we're assuming that iconv() converts to
+     big-endian UCS-2, and then we convert it ourselves.  */
+  char out_buffer[1024];
+
+  /* Index of first valid output character.  -1 if no valid
+     characters.  */
+  int out_first;
+
+  /* Index of last valid output character, plus one.  -1 if no valid
+     characters.  */
+  int out_last;
+
 #endif /* HAVE_ICONV */
 } java_lexer;