Index: src/java/org/apache/regexp/RE.java =================================================================== RCS file: /home/cvspublic/jakarta-regexp/src/java/org/apache/regexp/RE.java,v retrieving revision 1.21 diff -u -r1.21 RE.java --- src/java/org/apache/regexp/RE.java 27 Feb 2004 02:41:20 -0000 1.21 +++ src/java/org/apache/regexp/RE.java 29 Feb 2004 06:16:10 -0000 @@ -1591,6 +1591,12 @@ public static final int REPLACE_BACKREFERENCES = 0x0002; /** + * Flag bit that indicates that subst should consider \ in substtitution + * string as escape symbol. + */ + public static final int REPLACE_WITH_ESCAPES = 0x0004; + + /** * Substitutes a string for this regular expression in another string. * This method works like the Perl function of the same name. * Given a regular expression of "a*b", a String to substituteIn of @@ -1645,6 +1651,7 @@ // Start at position 0 and search the whole string int pos = 0; int len = substituteIn.length(); + Vector substParts = parseSubstString(substitution, flags); // Try a match at each position while (pos < len && match(substituteIn, pos)) @@ -1652,54 +1659,8 @@ // Append string before match ret.append(substituteIn.substring(pos, getParenStart(0))); - if ((flags & REPLACE_BACKREFERENCES) != 0) - { - // Process backreferences - int lCurrentPosition = 0; - int lLastPosition = -2; - int lLength = substitution.length(); - boolean bAddedPrefix = false; - - while ((lCurrentPosition = substitution.indexOf("$", lCurrentPosition)) >= 0) - { - if ((lCurrentPosition == 0 || substitution.charAt(lCurrentPosition - 1) != '\\') - && lCurrentPosition+1 < lLength) - { - char c = substitution.charAt(lCurrentPosition + 1); - if (c >= '0' && c <= '9') - { - if (bAddedPrefix == false) - { - // Append everything between the beginning of the - // substitution string and the current $ sign - ret.append(substitution.substring(0, lCurrentPosition)); - bAddedPrefix = true; - } - else - { - // Append everything between the last and the current $ sign - ret.append(substitution.substring(lLastPosition + 2, lCurrentPosition)); - } - - // Append the parenthesized expression - // Note: if a parenthesized expression of the requested - // index is not available "null" is added to the string - ret.append(getParen(c - '0')); - lLastPosition = lCurrentPosition; - } - } - - // Move forward, skipping past match - lCurrentPosition++; - } - - // Append everything after the last $ sign - ret.append(substitution.substring(lLastPosition + 2, lLength)); - } - else - { - // Append substitution without processing backreferences - ret.append(substitution); + for (int i = 0; i < substParts.size(); i++) { + ret.append(substParts.get(i).toString()); } // Move forward, skipping past match @@ -1774,5 +1735,71 @@ } return false; + } + + /** + * Parse given substitution string and returns vector of its parts + * (plain strings and BackRefSubstParts) which will be used in subst(). + * @param substitution string to parse + * @param flags subst() flags + * @return vector of substitution string parts + */ + private Vector parseSubstString(String substitution, int flags) { + final Vector parts = new Vector(); + final boolean useBackrefs = ((flags & REPLACE_BACKREFERENCES) != 0); + final boolean useEscapes = ((flags & REPLACE_WITH_ESCAPES) != 0); + final int len = substitution.length(); + StringBuffer part = new StringBuffer(); + for (int i = 0; i < len; i++) { + final char curChar = substitution.charAt(i); + if (useEscapes && curChar == '\\' && (i + 1) < len) + { + // if we have escape char just add next character + part.append(substitution.charAt(++i)); + } + else if (curChar == '\\' && (i + 1) < len + && substitution.charAt(i + 1) == '$') + { + // we do not use escaped characters, but next is $ + // add both \ and $ + part.append(curChar); + part.append(substitution.charAt(++i)); + } + else if (!useBackrefs || curChar != '$' || (i + 1) == len ) + { + // plain char + part.append(curChar); + } + else + { + // we have backref + // add previous substitution part + parts.add(part.toString()); + // add backref subst part + char paren = substitution.charAt(++i); + parts.add(new BackRefSubstPart(paren - '0')); + // clear current part + part = new StringBuffer(); + } + } + parts.add(part.toString()); + + return parts; + } + + /** + * Represents backreference in substitution string. + * To get current string for this beckref just call toString() + */ + private class BackRefSubstPart { + final int paren; + + BackRefSubstPart(int paren) { + this.paren = paren; + } + + public String toString() { + return RE.this.getParen(paren); + } } } Index: src/java/org/apache/regexp/RETest.java =================================================================== RCS file: /home/cvspublic/jakarta-regexp/src/java/org/apache/regexp/RETest.java,v retrieving revision 1.13 diff -u -r1.13 RETest.java --- src/java/org/apache/regexp/RETest.java 27 Feb 2004 02:41:20 -0000 1.13 +++ src/java/org/apache/regexp/RETest.java 29 Feb 2004 06:16:15 -0000 @@ -501,6 +501,29 @@ actual = r.subst("\r\na\r\n", "b", RE.REPLACE_BACKREFERENCES); assertEquals("Wrong subst() result", "\r\nb\r\n", actual); + + r = new RE("\\."); + actual = r.subst(".", "\\$0", RE.REPLACE_BACKREFERENCES); + System.err.println(actual); + assertEquals("Wrong subst() result", "\\$0", actual); + + // Test subst with backrefs and escaped \ + r = new RE("[.?]"); + actual = r.subst(".", "\\\\$0", + RE.REPLACE_BACKREFERENCES | RE.REPLACE_WITH_ESCAPES); + System.err.println(actual); + assertEquals("Wrong subst() result", "\\.", actual); + + actual = r.subst(".", "\\\\\\$0", + RE.REPLACE_BACKREFERENCES | RE.REPLACE_WITH_ESCAPES); + System.err.println(actual); + assertEquals("Wrong subst() result", "\\$0", actual); + + // Test subst() without backref but with $ sing + r = new RE("[.?]"); + actual = r.subst(".", "$0"); + System.err.println(actual); + assertEquals("Wrong subst() result", "$0", actual); } public void assertEquals(String message, String expected, String actual)