re PR libgcj/20435 (regex pattern compiling bug)
authorZiga Mahkovec <ziga.mahkovec@klika.si>
Wed, 1 Jun 2005 22:11:00 +0000 (00:11 +0200)
committerZiga Mahkovec <ziga@gcc.gnu.org>
Wed, 1 Jun 2005 22:11:00 +0000 (22:11 +0000)
2005-06-01  Ziga Mahkovec  <ziga.mahkovec@klika.si>

PR libgcj/20435:
* gnu/regexp/RESyntax.java (RE_POSSESSIVE_OPS): New field.
(static): Add possessive matching to JAVA_1_4 syntax.
* gnu/regexp/RETokenRepeated.java (possessive): New field.
(makePossessive, isPossessive): New methods.
(match): Don't back off during possessive matching.
* gnu/regexp/RE.java (initalize): Accept possessive quantifier.
* java/util/regex/Pattern.java (constructor): Switch syntax from PERL5
to JAVA_1_4.

From-SVN: r100466

libjava/ChangeLog
libjava/gnu/regexp/RE.java
libjava/gnu/regexp/RESyntax.java
libjava/gnu/regexp/RETokenRepeated.java
libjava/java/util/regex/Pattern.java

index 46c292e2c6e3b9b3c30f09291d3325b431094f79..7b3a363b10968968b88ff899a6230e802db21b61 100644 (file)
@@ -1,3 +1,15 @@
+2005-06-01  Ziga Mahkovec  <ziga.mahkovec@klika.si>
+
+       PR libgcj/20435:
+       * gnu/regexp/RESyntax.java (RE_POSSESSIVE_OPS): New field.
+       (static): Add possessive matching to JAVA_1_4 syntax.
+       * gnu/regexp/RETokenRepeated.java (possessive): New field.
+       (makePossessive, isPossessive): New methods.
+       (match): Don't back off during possessive matching.
+       * gnu/regexp/RE.java (initalize): Accept possessive quantifier.
+       * java/util/regex/Pattern.java (constructor): Switch syntax from PERL5
+       to JAVA_1_4.
+
 2005-06-01  Keith Seitz  <keiths@redhat.com>
 
        * gnu/classpath/jdwp/transport/JdwpPacket.java: New file.
index c8c8a3eb9ff0dc700e226648ec6a9bd783ea9b03..541e8cb950fd40b478d20462800ae4c7cab57262 100644 (file)
@@ -629,20 +629,29 @@ public class RE extends REToken {
        currentToken = setRepeated(currentToken,0,Integer.MAX_VALUE,index);
       }
 
-      // ONE-OR-MORE REPEAT OPERATOR
+      // ONE-OR-MORE REPEAT OPERATOR / POSSESSIVE MATCHING OPERATOR
       //  + | \+ depending on RE_BK_PLUS_QM
       //  not available if RE_LIMITED_OPS is set
 
       else if ((unit.ch == '+') && !syntax.get(RESyntax.RE_LIMITED_OPS) && (!syntax.get(RESyntax.RE_BK_PLUS_QM) ^ (unit.bk || quot))) {
        if (currentToken == null)
           throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index);
-       if (currentToken instanceof RETokenRepeated)
-          throw new REException(getLocalizedMessage("repeat.chained"),REException.REG_BADRPT,index);
-       if (currentToken instanceof RETokenWordBoundary || currentToken instanceof RETokenWordBoundary)
+       
+       // Check for possessive matching on RETokenRepeated
+       if (currentToken instanceof RETokenRepeated) {
+         RETokenRepeated tokenRep = (RETokenRepeated)currentToken;
+         if (syntax.get(RESyntax.RE_POSSESSIVE_OPS) && !tokenRep.isPossessive() && !tokenRep.isStingy())
+           tokenRep.makePossessive();
+         else
+           throw new REException(getLocalizedMessage("repeat.chained"),REException.REG_BADRPT,index);
+
+       }
+       else if (currentToken instanceof RETokenWordBoundary || currentToken instanceof RETokenWordBoundary)
          throw new REException(getLocalizedMessage("repeat.assertion"),REException.REG_BADRPT,index);
-       if (currentToken.getMinimumLength() == 0)
+       else if (currentToken.getMinimumLength() == 0)
          throw new REException(getLocalizedMessage("repeat.empty.token"),REException.REG_BADRPT,index);
-       currentToken = setRepeated(currentToken,1,Integer.MAX_VALUE,index);
+       else
+         currentToken = setRepeated(currentToken,1,Integer.MAX_VALUE,index);
       }
 
       // ZERO-OR-ONE REPEAT OPERATOR / STINGY MATCHING OPERATOR
@@ -655,13 +664,14 @@ public class RE extends REToken {
 
        // Check for stingy matching on RETokenRepeated
        if (currentToken instanceof RETokenRepeated) {
-          if (syntax.get(RESyntax.RE_STINGY_OPS) && !((RETokenRepeated)currentToken).isStingy())
-            ((RETokenRepeated)currentToken).makeStingy();
-          else
-            throw new REException(getLocalizedMessage("repeat.chained"),REException.REG_BADRPT,index);
-        }
-        else if (currentToken instanceof RETokenWordBoundary || currentToken instanceof RETokenWordBoundary)
-          throw new REException(getLocalizedMessage("repeat.assertion"),REException.REG_BADRPT,index);
+         RETokenRepeated tokenRep = (RETokenRepeated)currentToken;
+         if (syntax.get(RESyntax.RE_STINGY_OPS) && !tokenRep.isStingy() && !tokenRep.isPossessive())
+           tokenRep.makeStingy();
+         else
+           throw new REException(getLocalizedMessage("repeat.chained"),REException.REG_BADRPT,index);
+       }
+       else if (currentToken instanceof RETokenWordBoundary || currentToken instanceof RETokenWordBoundary)
+         throw new REException(getLocalizedMessage("repeat.assertion"),REException.REG_BADRPT,index);
        else
          currentToken = setRepeated(currentToken,0,1,index);
       }
index 649bd0df584fa013f07015b5bf281d588f7a48a9..7cb3e1400b809019c023b0409cd5fa18bd5b442b 100644 (file)
@@ -197,7 +197,12 @@ public final class RESyntax implements Serializable {
    */
   public static final int RE_CHAR_CLASS_ESC_IN_LISTS   = 24;
 
-  private static final int BIT_TOTAL                   = 25;
+  /**
+   * Syntax bit.  Possessive matching is allowed (++, *+, ?+, {x,y}+).
+   */
+  public static final int RE_POSSESSIVE_OPS            = 25;
+
+  private static final int BIT_TOTAL                   = 26;
 
   /**
    * Predefined syntax.
@@ -425,6 +430,7 @@ public final class RESyntax implements Serializable {
 
       RE_SYNTAX_JAVA_1_4 = new RESyntax(RE_SYNTAX_PERL5)
          // XXX
+         .set(RE_POSSESSIVE_OPS)         // *+,?+,++,{}+
          .makeFinal();
   }
 
index 8c7892712205938674941ad1a354d26272e23c3d..821e4c55c0f1e6218f162ffc51b26a0b5031bf34 100644 (file)
@@ -44,6 +44,7 @@ final class RETokenRepeated extends REToken {
     private REToken token;
     private int min,max;
     private boolean stingy;
+    private boolean possessive;
     
     RETokenRepeated(int subIndex, REToken token, int min, int max) {
        super(subIndex);
@@ -61,6 +62,16 @@ final class RETokenRepeated extends REToken {
     boolean isStingy() {
        return stingy;
     }
+
+    /** Sets possessive matching mode to true. */
+    void makePossessive() {
+        possessive = true;
+    }
+
+    /** Queries if this token has possessive matching enabled. */
+    boolean isPossessive() {
+        return possessive;
+    }
     
     /**
      * The minimum length of a repeated token is the minimum length
@@ -172,6 +183,8 @@ final class RETokenRepeated extends REToken {
                }
            }
            // else did not match rest of the tokens, try again on smaller sample
+           // or break out when performing possessive matching
+           if (possessive) break;
        }
        if (allResults != null) {
            mymatch.assignFrom(allResults); // does this get all?
index 06418a22a3430f214bfd33e99af015027e793fd9..455171c5bbb2cfd33b6063389887df9b076aef32 100644 (file)
@@ -84,8 +84,7 @@ public final class Pattern implements Serializable
     // if ((flags & UNICODE_CASE) != 0) gnuFlags =
     // if ((flags & CANON_EQ) != 0) gnuFlags =
 
-    // Eventually there will be such a thing as JDK 1_4 syntax
-    RESyntax syntax = RESyntax.RE_SYNTAX_PERL5;
+    RESyntax syntax = RESyntax.RE_SYNTAX_JAVA_1_4;
     if ((flags & UNIX_LINES) != 0)
       {
        // Use a syntax set with \n for linefeeds?