classpath-patches
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[cp-patches] RFC: gnu.regexp: support embedded flags


From: Ito Kazumitsu
Subject: [cp-patches] RFC: gnu.regexp: support embedded flags
Date: Sat, 14 Jan 2006 10:04:50 +0900 (JST)

This fixes the bug #22884.

ChangeLog:
2006-01-14  Ito Kazumitsu  <address@hidden>

        Fixes bug #22884
        * gnu/regexp/RE.java(initialize): Parse embedded flags.
        * gnu/regexp/RESyntax.java(RE_EMBEDDED_FLAGS): New syntax bit.

Index: classpath/gnu/regexp/RE.java
===================================================================
RCS file: /cvsroot/classpath/classpath/gnu/regexp/RE.java,v
retrieving revision 1.9
diff -u -r1.9 RE.java
--- classpath/gnu/regexp/RE.java        12 Jan 2006 22:13:49 -0000      1.9
+++ classpath/gnu/regexp/RE.java        14 Jan 2006 00:55:45 -0000
@@ -333,6 +333,11 @@
     char ch;
     boolean quot = false;
 
+    // Saved syntax and flags.
+    RESyntax savedSyntax = null;
+    int savedCflags = 0;
+    boolean flagsSaved = false;
+
     while (index < pLength) {
       // read the next character unit (including backslash escapes)
       index = getCharUnit(pattern,index,unit,quot);
@@ -525,6 +530,86 @@
               index += 2;
             }
             break;
+         case 'i':
+         case 'd':
+         case 'm':
+         case 's':
+         // case 'u':  not supported
+         // case 'x':  not supported
+         case '-':
+            if (!syntax.get(RESyntax.RE_EMBEDDED_FLAGS)) break;
+           // Set or reset syntax flags.
+           int flagIndex = index + 1;
+           int endFlag = -1;
+           RESyntax newSyntax = new RESyntax(syntax);
+           int newCflags = cflags;
+           boolean negate = false;
+           while (flagIndex < pLength && endFlag < 0) {
+               switch(pattern[flagIndex]) {
+               case 'i':
+                 if (negate)
+                   newCflags &= ~REG_ICASE;
+                 else
+                   newCflags |= REG_ICASE;
+                 flagIndex++;
+                 break;
+               case 'd':
+                 if (negate)
+                   newSyntax.setLineSeparator(RESyntax.DEFAULT_LINE_SEPARATOR);
+                 else
+                   newSyntax.setLineSeparator("\n");
+                 flagIndex++;
+                 break;
+               case 'm':
+                 if (negate)
+                   newCflags &= ~REG_MULTILINE;
+                 else
+                   newCflags |= REG_MULTILINE;
+                 flagIndex++;
+                 break;
+               case 's':
+                 if (negate)
+                   newCflags &= ~REG_DOT_NEWLINE;
+                 else
+                   newCflags |= REG_DOT_NEWLINE;
+                 flagIndex++;
+                 break;
+               // case 'u': not supported
+               // case 'x': not supported
+               case '-':
+                 negate = true;
+                 flagIndex++;
+                 break;
+               case ':':
+               case ')':
+                 endFlag = pattern[flagIndex];
+                 break;
+               default:
+                 throw new REException(getLocalizedMessage("repeat.no.token"), 
REException.REG_BADRPT, index);
+               }
+           }
+           if (endFlag == ')') {
+               syntax = newSyntax;
+               cflags = newCflags;
+               insens = ((cflags & REG_ICASE) > 0);
+               // This can be treated as though it were a comment.
+               comment = true;
+               index = flagIndex - 1;
+               break;
+           }
+           if (endFlag == ':') {
+               savedSyntax = syntax;
+               savedCflags = cflags;
+               flagsSaved = true;
+               syntax = newSyntax;
+               cflags = newCflags;
+               insens = ((cflags & REG_ICASE) > 0);
+               index = flagIndex -1;
+               // Fall through to the next case.
+           }
+           else {
+               throw new REException(getLocalizedMessage("unmatched.paren"), 
REException.REG_ESUBREG,index);
+           }
          case ':':
            if (syntax.get(RESyntax.RE_PURE_GROUPING)) {
              pure = true;
@@ -616,6 +701,12 @@
          }
 
          index = nextIndex;
+         if (flagsSaved) {
+             syntax = savedSyntax;
+             cflags = savedCflags;
+             insens = ((cflags & REG_ICASE) > 0);
+             flagsSaved = false;
+         }
        } // not a comment
       } // subexpression
     
Index: classpath/gnu/regexp/RESyntax.java
===================================================================
RCS file: /cvsroot/classpath/classpath/gnu/regexp/RESyntax.java,v
retrieving revision 1.3
diff -u -r1.3 RESyntax.java
--- classpath/gnu/regexp/RESyntax.java  2 Jul 2005 20:32:15 -0000       1.3
+++ classpath/gnu/regexp/RESyntax.java  14 Jan 2006 00:55:45 -0000
@@ -202,7 +202,12 @@
    */
   public static final int RE_POSSESSIVE_OPS            = 25;
 
-  private static final int BIT_TOTAL                   = 26;
+  /**
+   * Syntax bit.  Allow embedded flags, (?is-x), as in Perl5.
+   */
+  public static final int RE_EMBEDDED_FLAGS            = 26;
+
+  private static final int BIT_TOTAL                   = 27;
 
   /**
    * Predefined syntax.
@@ -422,6 +427,7 @@
          .set(RE_STRING_ANCHORS)         // \A,\Z
          .set(RE_CHAR_CLASS_ESC_IN_LISTS)// \d,\D,\w,\W,\s,\S within []
          .set(RE_COMMENTS)              // (?#)
+         .set(RE_EMBEDDED_FLAGS)         // (?imsx-imsx)
          .makeFinal();
       
       RE_SYNTAX_PERL5_S = new RESyntax(RE_SYNTAX_PERL5)

reply via email to

[Prev in Thread] Current Thread [Next in Thread]