View | Details | Raw Unified | Return to bug 19329
Collapse All | Expand All

(-)docs/RETest.txt (-1 / +213 lines)
Lines 1066-1069 Link Here
1066
[a-z]{0,3}
1066
[a-z]{0,3}
1067
123abcdefg123
1067
123abcdefg123
1068
YES
1068
YES
1069
abc
1069
1070
#175
1071
ab{0,1}a
1072
aa
1073
YES
1074
aa
1075
1076
#176
1077
ab{0,1}a
1078
aba
1079
YES
1080
aba
1081
1082
#177
1083
ab{0,1}a
1084
abba
1085
NO
1086
1087
#178
1088
ab{0,2}a
1089
aa
1090
YES
1091
aa
1092
1093
#179
1094
ab{0,2}a
1095
aba
1096
YES
1097
aba
1098
1099
#180
1100
ab{0,2}a
1101
abba
1102
YES
1103
abba
1104
1105
#181
1106
ab{0,2}a
1107
abbba
1108
NO
1109
1110
#182
1111
ab{1,1}a
1112
aa
1113
NO
1114
1115
#183
1116
ab{1,1}a
1117
aba
1118
YES
1119
aba
1120
1121
#184
1122
ab{1,1}a
1123
abba
1124
NO
1125
1126
#185
1127
ab{1,2}a
1128
aa
1129
NO
1130
1131
#186
1132
ab{1,2}a
1133
aba
1134
YES
1135
aba
1136
1137
#187
1138
ab{1,2}a
1139
abba
1140
YES
1141
abba
1142
1143
#188
1144
ab{1,2}a
1145
abbba
1146
NO
1147
1148
#189
1149
ab{0,}a
1150
aa
1151
YES
1152
aa
1153
1154
#190
1155
ab{0,}a
1156
aba
1157
YES
1158
aba
1159
1160
#191
1161
ab{0,}a
1162
abba
1163
YES
1164
abba
1165
1166
#192
1167
ab{1,}a
1168
aa
1169
NO
1170
1171
#193
1172
ab{1,}a
1173
aba
1174
YES
1175
aba
1176
1177
#194
1178
ab{1,}a
1179
abba
1180
YES
1181
abba
1182
1183
#195
1184
ab{1}a
1185
aa
1186
NO
1187
1188
#196
1189
ab{1}a
1190
aba
1191
YES
1192
aba
1193
1194
#197
1195
ab{1}a
1196
abba
1197
NO
1198
1199
#198
1200
ab{0}a
1201
aa
1202
YES
1203
aa
1204
1205
#199
1206
ab{0}a
1207
aba
1208
NO
1209
1210
#200
1211
ab{2}a
1212
aa
1213
NO
1214
1215
#201
1216
ab{2}a
1217
aba
1218
NO
1219
1220
#202
1221
ab{2}a
1222
abba
1223
YES
1224
abba
1225
1226
#203
1227
ab{2}a
1228
abbba
1229
NO
1230
1231
#204
1232
[ \-]
1233
 -
1234
YES
1235
 -
1236
1237
#205
1238
[a-z0-9\.\-]+
1239
{regexp-1.2}
1240
YES
1241
regexp-1.2
1242
1243
#206
1244
[a-z0-9\-\.]+
1245
{regexp-1.2}
1246
YES
1247
regexp-1.2
1248
1249
#207
1250
[a-z\-0-9\.]+
1251
{regexp-1.2}
1252
YES
1253
regexp-1.2
1254
1255
#208
1256
\w+
1257
a_b
1258
YES
1259
a_b
1260
1261
#209
1262
([0123])??((((1st)|(2nd))|(3rd))|(\dth))
1263
1st
1264
YES
1265
1st
1266
1267
1st
1268
1st
1269
1st
1270
1st
1271
1272
#210
1273
[^\s\]'<>(),;:\.\[]
1274
-
1275
YES
1276
-
1277
1278
#211
1279
^\(?(\d{3})\)?[\- ]?(\d{3})[\- ]?(\d{4})$
1280
(425) 576+1202
1281
NO
(-)src/java/org/apache/regexp/RE.java (-17 / +19 lines)
Lines 57-62 Link Here
57
 *
57
 *
58
 */ 
58
 */ 
59
 
59
 
60
import java.io.Serializable;
60
import java.util.Vector;
61
import java.util.Vector;
61
62
62
/**
63
/**
Lines 365-371 Link Here
365
 * @author <a href="mailto:jonl@muppetlabs.com">Jonathan Locke</a>
366
 * @author <a href="mailto:jonl@muppetlabs.com">Jonathan Locke</a>
366
 * @version $Id: RE.java,v 1.11 2002/12/13 18:11:57 jon Exp $
367
 * @version $Id: RE.java,v 1.11 2002/12/13 18:11:57 jon Exp $
367
 */
368
 */
368
public class RE
369
public class RE implements Serializable
369
{
370
{
370
    /**
371
    /**
371
     * Specifies normal, case-sensitive matching behaviour.
372
     * Specifies normal, case-sensitive matching behaviour.
Lines 465-489 Link Here
465
466
466
    // State of current program
467
    // State of current program
467
    REProgram program;                            // Compiled regular expression 'program'
468
    REProgram program;                            // Compiled regular expression 'program'
468
    CharacterIterator search;                                // The string being matched against
469
    transient CharacterIterator search;           // The string being matched against
469
    int idx;                                      // Current index in string being searched
470
    int matchFlags;                               // Match behaviour flags
470
    int matchFlags;                               // Match behaviour flags
471
    int maxParen = MAX_PAREN;
471
    int maxParen = MAX_PAREN;
472
472
473
    // Parenthesized subexpressions
473
    // Parenthesized subexpressions
474
    int parenCount;                               // Number of subexpressions matched (num open parens + 1)
474
    transient int parenCount;                     // Number of subexpressions matched (num open parens + 1)
475
    int start0;                                   // Cache of start[0]
475
    transient int start0;                         // Cache of start[0]
476
    int end0;                                     // Cache of start[0]
476
    transient int end0;                           // Cache of start[0]
477
    int start1;                                   // Cache of start[1]
477
    transient int start1;                         // Cache of start[1]
478
    int end1;                                     // Cache of start[1]
478
    transient int end1;                           // Cache of start[1]
479
    int start2;                                   // Cache of start[2]
479
    transient int start2;                         // Cache of start[2]
480
    int end2;                                     // Cache of start[2]
480
    transient int end2;                           // Cache of start[2]
481
    int[] startn;                                 // Lazy-alloced array of sub-expression starts
481
    transient int[] startn;                       // Lazy-alloced array of sub-expression starts
482
    int[] endn;                                   // Lazy-alloced array of sub-expression ends
482
    transient int[] endn;                         // Lazy-alloced array of sub-expression ends
483
483
484
    // Backreferences
484
    // Backreferences
485
    int[] startBackref;                           // Lazy-alloced array of backref starts
485
    transient int[] startBackref;                 // Lazy-alloced array of backref starts
486
    int[] endBackref;                             // Lazy-alloced array of backref ends
486
    transient int[] endBackref;                   // Lazy-alloced array of backref ends
487
487
488
    /**
488
    /**
489
     * Constructs a regular expression matcher from a String by compiling it
489
     * Constructs a regular expression matcher from a String by compiling it
Lines 1098-1109 Link Here
1098
                                return -1;
1098
                                return -1;
1099
                            }
1099
                            }
1100
1100
1101
                            char c = search.charAt(idx);
1102
1101
                            // Switch on escape
1103
                            // Switch on escape
1102
                            switch (opdata)
1104
                            switch (opdata)
1103
                            {
1105
                            {
1104
                                case E_ALNUM:
1106
                                case E_ALNUM:
1105
                                case E_NALNUM:
1107
                                case E_NALNUM:
1106
                                    if (!(Character.isLetterOrDigit(search.charAt(idx)) == (opdata == E_ALNUM)))
1108
                                    if (!((Character.isLetterOrDigit(c) || c == '_') == (opdata == E_ALNUM)))
1107
                                    {
1109
                                    {
1108
                                        return -1;
1110
                                        return -1;
1109
                                    }
1111
                                    }
Lines 1111-1117 Link Here
1111
1113
1112
                                case E_DIGIT:
1114
                                case E_DIGIT:
1113
                                case E_NDIGIT:
1115
                                case E_NDIGIT:
1114
                                    if (!(Character.isDigit(search.charAt(idx)) == (opdata == E_DIGIT)))
1116
                                    if (!(Character.isDigit(c) == (opdata == E_DIGIT)))
1115
                                    {
1117
                                    {
1116
                                        return -1;
1118
                                        return -1;
1117
                                    }
1119
                                    }
Lines 1119-1125 Link Here
1119
1121
1120
                                case E_SPACE:
1122
                                case E_SPACE:
1121
                                case E_NSPACE:
1123
                                case E_NSPACE:
1122
                                    if (!(Character.isWhitespace(search.charAt(idx)) == (opdata == E_SPACE)))
1124
                                    if (!(Character.isWhitespace(c) == (opdata == E_SPACE)))
1123
                                    {
1125
                                    {
1124
                                        return -1;
1126
                                        return -1;
1125
                                    }
1127
                                    }
(-)src/java/org/apache/regexp/RECompiler.java (-61 / +65 lines)
Lines 2-11 Link Here
2
2
3
/*
3
/*
4
 * ====================================================================
4
 * ====================================================================
5
 * 
5
 *
6
 * The Apache Software License, Version 1.1
6
 * The Apache Software License, Version 1.1
7
 *
7
 *
8
 * Copyright (c) 1999 The Apache Software Foundation.  All rights 
8
 * Copyright (c) 1999 The Apache Software Foundation.  All rights
9
 * reserved.
9
 * reserved.
10
 *
10
 *
11
 * Redistribution and use in source and binary forms, with or without
11
 * Redistribution and use in source and binary forms, with or without
Lines 13-19 Link Here
13
 * are met:
13
 * are met:
14
 *
14
 *
15
 * 1. Redistributions of source code must retain the above copyright
15
 * 1. Redistributions of source code must retain the above copyright
16
 *    notice, this list of conditions and the following disclaimer. 
16
 *    notice, this list of conditions and the following disclaimer.
17
 *
17
 *
18
 * 2. Redistributions in binary form must reproduce the above copyright
18
 * 2. Redistributions in binary form must reproduce the above copyright
19
 *    notice, this list of conditions and the following disclaimer in
19
 *    notice, this list of conditions and the following disclaimer in
Lines 21-35 Link Here
21
 *    distribution.
21
 *    distribution.
22
 *
22
 *
23
 * 3. The end-user documentation included with the redistribution, if
23
 * 3. The end-user documentation included with the redistribution, if
24
 *    any, must include the following acknowlegement:  
24
 *    any, must include the following acknowlegement:
25
 *       "This product includes software developed by the 
25
 *       "This product includes software developed by the
26
 *        Apache Software Foundation (http://www.apache.org/)."
26
 *        Apache Software Foundation (http://www.apache.org/)."
27
 *    Alternately, this acknowlegement may appear in the software itself,
27
 *    Alternately, this acknowlegement may appear in the software itself,
28
 *    if and wherever such third-party acknowlegements normally appear.
28
 *    if and wherever such third-party acknowlegements normally appear.
29
 *
29
 *
30
 * 4. The names "The Jakarta Project", "Jakarta-Regexp", and "Apache Software
30
 * 4. The names "The Jakarta Project", "Jakarta-Regexp", and "Apache Software
31
 *    Foundation" must not be used to endorse or promote products derived
31
 *    Foundation" must not be used to endorse or promote products derived
32
 *    from this software without prior written permission. For written 
32
 *    from this software without prior written permission. For written
33
 *    permission, please contact apache@apache.org.
33
 *    permission, please contact apache@apache.org.
34
 *
34
 *
35
 * 5. Products derived from this software may not be called "Apache"
35
 * 5. Products derived from this software may not be called "Apache"
Lines 55-61 Link Here
55
 * information on the Apache Software Foundation, please see
55
 * information on the Apache Software Foundation, please see
56
 * <http://www.apache.org/>.
56
 * <http://www.apache.org/>.
57
 *
57
 *
58
 */ 
58
 */
59
59
60
import org.apache.regexp.RE;
60
import org.apache.regexp.RE;
61
import java.util.Hashtable;
61
import java.util.Hashtable;
Lines 100-106 Link Here
100
    // {m,n} stacks
100
    // {m,n} stacks
101
    static final int maxBrackets = 10;                  // Maximum number of bracket pairs
101
    static final int maxBrackets = 10;                  // Maximum number of bracket pairs
102
    static final int bracketUnbounded = -1;             // Unbounded value
102
    static final int bracketUnbounded = -1;             // Unbounded value
103
    static final int bracketFinished = -2;              // Unbounded value
104
    int brackets = 0;                                   // Number of bracket sets
103
    int brackets = 0;                                   // Number of bracket sets
105
    int[] bracketStart = null;                          // Starting point
104
    int[] bracketStart = null;                          // Starting point
106
    int[] bracketEnd = null;                            // Ending point
105
    int[] bracketEnd = null;                            // Ending point
Lines 373-389 Link Here
373
        try
372
        try
374
        {
373
        {
375
            bracketOpt[brackets] = Integer.parseInt(number.toString()) - bracketMin[brackets];
374
            bracketOpt[brackets] = Integer.parseInt(number.toString()) - bracketMin[brackets];
376
            if (bracketMin[brackets] < 1)
377
            {
378
                bracketOpt[brackets]--;
379
            }
380
        }
375
        }
381
        catch (NumberFormatException e)
376
        catch (NumberFormatException e)
382
        {
377
        {
383
            syntaxError("Expected valid number");
378
            syntaxError("Expected valid number");
384
        }
379
        }
385
380
386
        // Optional repetitions must be > 0
381
        // Optional repetitions must be >= 0
387
        if (bracketOpt[brackets] < 0)
382
        if (bracketOpt[brackets] < 0)
388
        {
383
        {
389
            syntaxError("Bad range");
384
            syntaxError("Bad range");
Lines 461-467 Link Here
461
                            c = Character.toLowerCase(c);
456
                            c = Character.toLowerCase(c);
462
                            if (c >= 'a' && c <= 'f')
457
                            if (c >= 'a' && c <= 'f')
463
                            {
458
                            {
464
                                // Compute new value 
459
                                // Compute new value
465
                                val = (val << 4) + (c - 'a') + 10;
460
                                val = (val << 4) + (c - 'a') + 10;
466
                            }
461
                            }
467
                            else
462
                            else
Lines 555-561 Link Here
555
            {
550
            {
556
                idx++;
551
                idx++;
557
            }
552
            }
558
            
553
559
            // Should be a ":]" to terminate the POSIX character class
554
            // Should be a ":]" to terminate the POSIX character class
560
            if ((idx + 1) < len && pattern.charAt(idx) == ':' && pattern.charAt(idx + 1) == ']')
555
            if ((idx + 1) < len && pattern.charAt(idx) == ':' && pattern.charAt(idx + 1) == ']')
561
            {
556
            {
Lines 714-720 Link Here
714
            else
709
            else
715
            {
710
            {
716
                // If simple character and not start of range, include it
711
                // If simple character and not start of range, include it
717
                if ((idx + 1) >= len || pattern.charAt(idx + 1) != '-')
712
                if (idx >= len || pattern.charAt(idx) != '-')
718
                {
713
                {
719
                    range.include(simpleChar, include);
714
                    range.include(simpleChar, include);
720
                }
715
                }
Lines 1025-1031 Link Here
1025
                            break;
1020
                            break;
1026
                        }
1021
                        }
1027
                    }
1022
                    }
1028
                    
1023
1029
                    // If its not in the list we parse the {m,n}
1024
                    // If its not in the list we parse the {m,n}
1030
                    if (!found)
1025
                    if (!found)
1031
                    {
1026
                    {
Lines 1038-1054 Link Here
1038
                        bracketEnd[brackets] = idx;
1033
                        bracketEnd[brackets] = idx;
1039
                        i = brackets++;
1034
                        i = brackets++;
1040
                    }
1035
                    }
1041
                    
1036
1042
                    // If there's a min, rewind stream and reparse
1037
                    // Process min first
1043
                    if (--bracketMin[i] > 0)
1038
                    if (bracketMin[i]-- > 0)
1044
                    {
1039
                    {
1045
                        // Rewind stream and run it through again
1040
                        if (bracketMin[i] > 0 || bracketOpt[i] != 0) {
1046
                        idx = idxBeforeTerminal;
1041
                            // Rewind stream and run it through again - more matchers coming
1042
                            idx = idxBeforeTerminal;
1043
                        } else {
1044
                            // Bug #1030: No optinal matches - no need to rewind
1045
                            idx = bracketEnd[i];
1046
                        }
1047
                        break;
1047
                        break;
1048
                    }
1048
                    }
1049
                    
1049
1050
                    // Do the right thing for maximum ({m,})
1050
                    // Do the right thing for maximum ({m,})
1051
                    if (bracketOpt[i] == bracketFinished)
1051
                    if (bracketOpt[i] == bracketUnbounded)
1052
                    {
1052
                    {
1053
                        // Drop through now and closure expression.
1053
                        // Drop through now and closure expression.
1054
                        // We are done with the {m,} expr, so skip rest
1054
                        // We are done with the {m,} expr, so skip rest
Lines 1057-1093 Link Here
1057
                        idx = bracketEnd[i];
1057
                        idx = bracketEnd[i];
1058
                    }
1058
                    }
1059
                    else
1059
                    else
1060
                        if (bracketOpt[i] == bracketUnbounded)
1060
                        if (bracketOpt[i]-- > 0)
1061
                        {
1061
                        {
1062
                            idx = idxBeforeTerminal;
1062
                            if (bracketOpt[i] > 0)
1063
                            bracketOpt[i] = bracketFinished;
1064
                            break;
1065
                        }
1066
                        else
1067
                            if (bracketOpt[i]-- > 0)
1068
                            {
1063
                            {
1069
                                // Drop through to optionally close and then 'play it again sam!'
1064
                                // More optional matchers - 'play it again sam!'
1070
                                idx = idxBeforeTerminal;
1065
                                idx = idxBeforeTerminal;
1071
                                closureType = '?';
1066
                            } else {
1072
                            }
1067
                                // Bug #1030: We are done - this one is last and optional
1073
                            else
1074
                            {
1075
                                // We are done. skip the rest of {m,n} expr
1076
                                idx = bracketEnd[i];
1068
                                idx = bracketEnd[i];
1077
                                break;
1078
                            }
1069
                            }
1070
                            // Drop through to optionally close
1071
                            closureType = '?';
1072
                        }
1073
                        else
1074
                        {
1075
                            // Rollback terminal - neither min nor opt matchers present
1076
                            lenInstruction = ret;
1077
                            node(RE.OP_NOTHING, 0);
1078
1079
                            // We are done. skip the rest of {m,n} expr
1080
                            idx = bracketEnd[i];
1081
                            break;
1082
                        }
1079
                }
1083
                }
1080
                
1084
1081
                // Fall through!
1085
                // Fall through!
1082
                
1086
1083
                case '?':
1087
                case '?':
1084
                case '*':
1088
                case '*':
1085
                    
1089
1086
                    if (!greedy)
1090
                    if (!greedy)
1087
                    {
1091
                    {
1088
                        break;
1092
                        break;
1089
                    }
1093
                    }
1090
                    
1094
1091
                    if (closureType == '?')
1095
                    if (closureType == '?')
1092
                    {
1096
                    {
1093
                        // X? is compiled as (X|)
1097
                        // X? is compiled as (X|)
Lines 1097-1103 Link Here
1097
                        setNextOfEnd(ret, nothing);                       // point (second) branch to OP_NOTHING
1101
                        setNextOfEnd(ret, nothing);                       // point (second) branch to OP_NOTHING
1098
                        setNextOfEnd(ret + RE.nodeSize, nothing);         // point the end of X to OP_NOTHING node
1102
                        setNextOfEnd(ret + RE.nodeSize, nothing);         // point the end of X to OP_NOTHING node
1099
                    }
1103
                    }
1100
                    
1104
1101
                    if (closureType == '*')
1105
                    if (closureType == '*')
1102
                    {
1106
                    {
1103
                        // X* is compiled as (X{gotoX}|)
1107
                        // X* is compiled as (X{gotoX}|)
Lines 1109-1115 Link Here
1109
                        setNextOfEnd(ret, node(RE.OP_NOTHING, 0));                // OP_NOTHING
1113
                        setNextOfEnd(ret, node(RE.OP_NOTHING, 0));                // OP_NOTHING
1110
                    }
1114
                    }
1111
                    break;
1115
                    break;
1112
                    
1116
1113
                case '+':
1117
                case '+':
1114
                {
1118
                {
1115
                    // X+ is compiled as X({gotoX}|)
1119
                    // X+ is compiled as X({gotoX}|)
Lines 1134-1141 Link Here
1134
                case '?':
1138
                case '?':
1135
                    nodeInsert(RE.OP_RELUCTANTMAYBE, 0, ret);
1139
                    nodeInsert(RE.OP_RELUCTANTMAYBE, 0, ret);
1136
                    break;
1140
                    break;
1137
                    
1141
1138
                case '*':       
1142
                case '*':
1139
                    nodeInsert(RE.OP_RELUCTANTSTAR, 0, ret);
1143
                    nodeInsert(RE.OP_RELUCTANTSTAR, 0, ret);
1140
                    break;
1144
                    break;
1141
1145
Lines 1145-1151 Link Here
1145
            }
1149
            }
1146
1150
1147
            // Point to the expr after the closure
1151
            // Point to the expr after the closure
1148
            setNextOfEnd(ret, lenInstruction);          
1152
            setNextOfEnd(ret, lenInstruction);
1149
        }
1153
        }
1150
        return ret;
1154
        return ret;
1151
    }
1155
    }
Lines 1347-1353 Link Here
1347
        int[] minRange = new int[size];     // Range minima
1351
        int[] minRange = new int[size];     // Range minima
1348
        int[] maxRange = new int[size];     // Range maxima
1352
        int[] maxRange = new int[size];     // Range maxima
1349
        int num = 0;                        // Number of range array elements in use
1353
        int num = 0;                        // Number of range array elements in use
1350
        
1354
1351
        /**
1355
        /**
1352
         * Deletes the range at a given index from the range lists
1356
         * Deletes the range at a given index from the range lists
1353
         * @param index Index of range to delete from minRange and maxRange arrays.
1357
         * @param index Index of range to delete from minRange and maxRange arrays.
Lines 1359-1365 Link Here
1359
            {
1363
            {
1360
                return;
1364
                return;
1361
            }
1365
            }
1362
            
1366
1363
            // Move elements down
1367
            // Move elements down
1364
            while (index++ < num)
1368
            while (index++ < num)
1365
            {
1369
            {
Lines 1369-1379 Link Here
1369
                    maxRange[index-1] = maxRange[index];
1373
                    maxRange[index-1] = maxRange[index];
1370
                }
1374
                }
1371
            }
1375
            }
1372
            
1376
1373
            // One less element now
1377
            // One less element now
1374
            num--;
1378
            num--;
1375
        }
1379
        }
1376
        
1380
1377
        /**
1381
        /**
1378
         * Merges a range into the range list, coalescing ranges if possible.
1382
         * Merges a range into the range list, coalescing ranges if possible.
1379
         * @param min Minimum end of range
1383
         * @param min Minimum end of range
Lines 1389-1395 Link Here
1389
                {
1393
                {
1390
                    return;
1394
                    return;
1391
                }
1395
                }
1392
                
1396
1393
                // Min-max subsumes minRange[i]-maxRange[i]
1397
                // Min-max subsumes minRange[i]-maxRange[i]
1394
                else if (min <= minRange[i] && max >= maxRange[i])
1398
                else if (min <= minRange[i] && max >= maxRange[i])
1395
                {
1399
                {
Lines 1397-1403 Link Here
1397
                    merge(min, max);
1401
                    merge(min, max);
1398
                    return;
1402
                    return;
1399
                }
1403
                }
1400
                
1404
1401
                // Min is in the range, but max is outside
1405
                // Min is in the range, but max is outside
1402
                else if (min >= minRange[i] && min <= maxRange[i])
1406
                else if (min >= minRange[i] && min <= maxRange[i])
1403
                {
1407
                {
Lines 1406-1412 Link Here
1406
                    merge(min, max);
1410
                    merge(min, max);
1407
                    return;
1411
                    return;
1408
                }
1412
                }
1409
                
1413
1410
                // Max is in the range, but min is outside
1414
                // Max is in the range, but min is outside
1411
                else if (max >= minRange[i] && max <= maxRange[i])
1415
                else if (max >= minRange[i] && max <= maxRange[i])
1412
                {
1416
                {
Lines 1416-1422 Link Here
1416
                    return;
1420
                    return;
1417
                }
1421
                }
1418
            }
1422
            }
1419
            
1423
1420
            // Must not overlap any other ranges
1424
            // Must not overlap any other ranges
1421
            if (num >= size)
1425
            if (num >= size)
1422
            {
1426
            {
Lines 1432-1438 Link Here
1432
            maxRange[num] = max;
1436
            maxRange[num] = max;
1433
            num++;
1437
            num++;
1434
        }
1438
        }
1435
        
1439
1436
        /**
1440
        /**
1437
         * Removes a range by deleting or shrinking all other ranges
1441
         * Removes a range by deleting or shrinking all other ranges
1438
         * @param min Minimum end of range
1442
         * @param min Minimum end of range
Lines 1450-1456 Link Here
1450
                    i--;
1454
                    i--;
1451
                    return;
1455
                    return;
1452
                }
1456
                }
1453
                
1457
1454
                // min-max is subsumed by minRange[i]-maxRange[i]
1458
                // min-max is subsumed by minRange[i]-maxRange[i]
1455
                else if (min >= minRange[i] && max <= maxRange[i])
1459
                else if (min >= minRange[i] && max <= maxRange[i])
1456
                {
1460
                {
Lines 1467-1480 Link Here
1467
                    }
1471
                    }
1468
                    return;
1472
                    return;
1469
                }
1473
                }
1470
                
1474
1471
                // minRange is in the range, but maxRange is outside
1475
                // minRange is in the range, but maxRange is outside
1472
                else if (minRange[i] >= min && minRange[i] <= max)
1476
                else if (minRange[i] >= min && minRange[i] <= max)
1473
                {
1477
                {
1474
                    minRange[i] = max + 1;
1478
                    minRange[i] = max + 1;
1475
                    return;
1479
                    return;
1476
                }
1480
                }
1477
                
1481
1478
                // maxRange is in the range, but minRange is outside
1482
                // maxRange is in the range, but minRange is outside
1479
                else if (maxRange[i] >= min && maxRange[i] <= max)
1483
                else if (maxRange[i] >= min && maxRange[i] <= max)
1480
                {
1484
                {
Lines 1483-1489 Link Here
1483
                }
1487
                }
1484
            }
1488
            }
1485
        }
1489
        }
1486
        
1490
1487
        /**
1491
        /**
1488
         * Includes (or excludes) the range from min to max, inclusive.
1492
         * Includes (or excludes) the range from min to max, inclusive.
1489
         * @param min Minimum end of range
1493
         * @param min Minimum end of range
Lines 1501-1507 Link Here
1501
                remove(min, max);
1505
                remove(min, max);
1502
            }
1506
            }
1503
        }
1507
        }
1504
        
1508
1505
        /**
1509
        /**
1506
         * Includes a range with the same min and max
1510
         * Includes a range with the same min and max
1507
         * @param minmax Minimum and maximum end of range (inclusive)
1511
         * @param minmax Minimum and maximum end of range (inclusive)
(-)src/java/org/apache/regexp/REProgram.java (-3 / +2 lines)
Lines 57-64 Link Here
57
 *
57
 *
58
 */ 
58
 */ 
59
59
60
import org.apache.regexp.RE;
60
import java.io.Serializable;
61
import java.util.Hashtable;
62
61
63
/**
62
/**
64
 * A class that holds compiled regular expressions.  This is exposed mainly
63
 * A class that holds compiled regular expressions.  This is exposed mainly
Lines 72-78 Link Here
72
 * @author <a href="mailto:jonl@muppetlabs.com">Jonathan Locke</a>
71
 * @author <a href="mailto:jonl@muppetlabs.com">Jonathan Locke</a>
73
 * @version $Id: REProgram.java,v 1.2 2002/12/03 20:57:52 jon Exp $
72
 * @version $Id: REProgram.java,v 1.2 2002/12/03 20:57:52 jon Exp $
74
 */
73
 */
75
public class REProgram
74
public class REProgram implements Serializable
76
{
75
{
77
    static final int OPT_HASBACKREFS = 1;
76
    static final int OPT_HASBACKREFS = 1;
78
77
(-)src/java/org/apache/regexp/RETest.java (-14 / +79 lines)
Lines 62-67 Link Here
62
import java.io.InputStreamReader;
62
import java.io.InputStreamReader;
63
import java.io.PrintWriter;
63
import java.io.PrintWriter;
64
import java.io.File;
64
import java.io.File;
65
import java.io.ByteArrayOutputStream;
66
import java.io.ObjectOutputStream;
67
import java.io.ByteArrayInputStream;
68
import java.io.ObjectInputStream;
65
69
66
/**
70
/**
67
 * Data driven (and optionally interactive) testing harness to exercise regular
71
 * Data driven (and optionally interactive) testing harness to exercise regular
Lines 88-95 Link Here
88
     * Main program entrypoint.  If an argument is given, it will be compiled
92
     * Main program entrypoint.  If an argument is given, it will be compiled
89
     * and interactive matching will ensue.  If no argument is given, the
93
     * and interactive matching will ensue.  If no argument is given, the
90
     * file RETest.txt will be used as automated testing input.
94
     * file RETest.txt will be used as automated testing input.
91
     * @param arg Command line arguments (optional regular expression)
95
     * @param args Command line arguments (optional regular expression)
92
    */
96
     */
93
    public static void main(String[] args)
97
    public static void main(String[] args)
94
    {
98
    {
95
        try
99
        try
Lines 104-112 Link Here
104
108
105
    /**
109
    /**
106
     * Testing entrypoint.
110
     * Testing entrypoint.
107
     * @param arg Command line arguments
111
     * @param args Command line arguments
108
     * @exception Exception thrown in case of error
112
     * @exception Exception thrown in case of error
109
    */
113
     */
110
    public static boolean test( String[] args ) throws Exception
114
    public static boolean test( String[] args ) throws Exception
111
    {
115
    {
112
        RETest test = new RETest();
116
        RETest test = new RETest();
Lines 132-138 Link Here
132
136
133
    /**
137
    /**
134
     * Constructor
138
     * Constructor
135
    */
139
     */
136
    public RETest()
140
    public RETest()
137
    {
141
    {
138
    }
142
    }
Lines 140-146 Link Here
140
    /**
144
    /**
141
     * Compile and test matching against a single expression
145
     * Compile and test matching against a single expression
142
     * @param expr Expression to compile and test
146
     * @param expr Expression to compile and test
143
    */
147
     */
144
    void runInteractiveTests(String expr)
148
    void runInteractiveTests(String expr)
145
    {
149
    {
146
        try
150
        try
Lines 198-204 Link Here
198
    /**
202
    /**
199
     * Exit with a fatal error.
203
     * Exit with a fatal error.
200
     * @param s Last famous words before exiting
204
     * @param s Last famous words before exiting
201
    */
205
     */
202
    void die(String s)
206
    void die(String s)
203
    {
207
    {
204
        say("FATAL ERROR: " + s);
208
        say("FATAL ERROR: " + s);
Lines 230-236 Link Here
230
    /**
234
    /**
231
     * Show a success
235
     * Show a success
232
     * @param s Success story
236
     * @param s Success story
233
    */
237
     */
234
    void success(String s)
238
    void success(String s)
235
    {
239
    {
236
        if (showSuccesses)
240
        if (showSuccesses)
Lines 243-249 Link Here
243
    /**
247
    /**
244
     * Say something to standard out
248
     * Say something to standard out
245
     * @param s What to say
249
     * @param s What to say
246
    */
250
     */
247
    void say(String s)
251
    void say(String s)
248
    {
252
    {
249
        System.out.println (s);
253
        System.out.println (s);
Lines 251-257 Link Here
251
255
252
    /**
256
    /**
253
     * Show an expression
257
     * Show an expression
254
    */
258
     */
255
    void show()
259
    void show()
256
    {
260
    {
257
        say("" + NEW_LINE + "-----------------------" + NEW_LINE + "");
261
        say("" + NEW_LINE + "-----------------------" + NEW_LINE + "");
Lines 261-267 Link Here
261
    /**
265
    /**
262
     * Dump parenthesized subexpressions found by a regular expression matcher object
266
     * Dump parenthesized subexpressions found by a regular expression matcher object
263
     * @param r Matcher object with results to show
267
     * @param r Matcher object with results to show
264
    */
268
     */
265
    void showParens(RE r)
269
    void showParens(RE r)
266
    {
270
    {
267
        // Loop through each paren
271
        // Loop through each paren
Lines 297-304 Link Here
297
301
298
    /**
302
    /**
299
     * Run automated tests in RETest.txt file (from Perl 4.0 test battery)
303
     * Run automated tests in RETest.txt file (from Perl 4.0 test battery)
300
    * @exception Exception thrown in case of error
304
     * @exception Exception thrown in case of error
301
    */
305
     */
302
    void runAutomatedTests(String testDocument) throws Exception
306
    void runAutomatedTests(String testDocument) throws Exception
303
    {
307
    {
304
        long ms = System.currentTimeMillis();
308
        long ms = System.currentTimeMillis();
Lines 328-333 Link Here
328
        String s1 = r.subst("aaaabfooaaabgarplyaaabwackyb", "-");
332
        String s1 = r.subst("aaaabfooaaabgarplyaaabwackyb", "-");
329
        System.out.println ("s = " + s1);
333
        System.out.println ("s = " + s1);
330
334
335
        // Some unit tests
336
        runAutomatedTests();
337
331
        // Test from script file
338
        // Test from script file
332
        File testInput = new File(testDocument);
339
        File testInput = new File(testDocument);
333
        if (! testInput.exists())
340
        if (! testInput.exists())
Lines 458-464 Link Here
458
                                say("   Paren " + p + " : " + r.getParen(p));
465
                                say("   Paren " + p + " : " + r.getParen(p));
459
466
460
                                // Compare expected result with actual
467
                                // Compare expected result with actual
461
                                if (!register.equals(r.getParen(p)))
468
                                if (!register.equals(r.getParen(p)) && register.length() > 0 && r.getParen(p) != null)
462
                                {
469
                                {
463
                                    // Register isn't what it was supposed to be
470
                                    // Register isn't what it was supposed to be
464
                                    fail("Register " + p + " should be = \"" + register + "\", but is \"" + r.getParen(p) + "\" instead.");
471
                                    fail("Register " + p + " should be = \"" + register + "\", but is \"" + r.getParen(p) + "\" instead.");
Lines 521-525 Link Here
521
528
522
        // Print final results
529
        // Print final results
523
        System.out.println( NEW_LINE + "Tests complete.  " + n + " tests, " + failures + " failure(s).");
530
        System.out.println( NEW_LINE + "Tests complete.  " + n + " tests, " + failures + " failure(s).");
531
    }
532
533
    /**
534
     * Run automated unit test
535
     * @exception Exception thrown in case of error
536
     */
537
    void runAutomatedTests() throws Exception
538
    {
539
        // Serialization test 1: Compile regexp and serialize/deserialize it
540
        RE r = new RE("(a*)b");
541
        say("Serialized/deserialized (a*)b");
542
        ByteArrayOutputStream out = new ByteArrayOutputStream(128);
543
        new ObjectOutputStream(out).writeObject(r);
544
        ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray());
545
        r = (RE)new ObjectInputStream(in).readObject();
546
        if (!r.match("aaab")) {
547
            fail("Did not match 'aaab' with deserialized RE.");
548
        }
549
        say("aaaab = true");
550
        showParens(r);
551
552
        // Serialization test 2: serialize/deserialize used regexp
553
        out.reset();
554
        say("Deserialized (a*)b");
555
        new ObjectOutputStream(out).writeObject(r);
556
        in = new ByteArrayInputStream(out.toByteArray());
557
        r = (RE)new ObjectInputStream(in).readObject();
558
        if (r.getParenCount() != 0) {
559
            fail("Has parens after deserialization.");
560
        }
561
        if (!r.match("aaab")) {
562
            fail("Did not match 'aaab' with deserialized RE.");
563
        }
564
        say("aaaab = true");
565
        showParens(r);
566
567
        // Test MATCH_CASEINDEPENDENT
568
        r = new RE("abc(\\w*)");
569
        say("MATCH_CASEINDEPENDENT abc(\\w*)");
570
        r.setMatchFlags(RE.MATCH_CASEINDEPENDENT);
571
        say("abc(d*)");
572
        if (!r.match("abcddd")) {
573
            fail("Did not match 'abcddd'.");
574
        }
575
        say("abcddd = true");
576
        showParens(r);
577
578
        if (!r.match("aBcDDdd")) {
579
            fail("Did not match 'aBcDDdd'.");
580
        }
581
        say("aBcDDdd = true");
582
        showParens(r);
583
584
        if (!r.match("ABCDDDDD")) {
585
            fail("Did not match 'ABCDDDDD'.");
586
        }
587
        say("ABCDDDDD = true");
588
        showParens(r);
524
    }
589
    }
525
}
590
}
(-)xdocs/RETest.txt (-1 / +213 lines)
Lines 1066-1069 Link Here
1066
[a-z]{0,3}
1066
[a-z]{0,3}
1067
123abcdefg123
1067
123abcdefg123
1068
YES
1068
YES
1069
abc
1069
1070
#175
1071
ab{0,1}a
1072
aa
1073
YES
1074
aa
1075
1076
#176
1077
ab{0,1}a
1078
aba
1079
YES
1080
aba
1081
1082
#177
1083
ab{0,1}a
1084
abba
1085
NO
1086
1087
#178
1088
ab{0,2}a
1089
aa
1090
YES
1091
aa
1092
1093
#179
1094
ab{0,2}a
1095
aba
1096
YES
1097
aba
1098
1099
#180
1100
ab{0,2}a
1101
abba
1102
YES
1103
abba
1104
1105
#181
1106
ab{0,2}a
1107
abbba
1108
NO
1109
1110
#182
1111
ab{1,1}a
1112
aa
1113
NO
1114
1115
#183
1116
ab{1,1}a
1117
aba
1118
YES
1119
aba
1120
1121
#184
1122
ab{1,1}a
1123
abba
1124
NO
1125
1126
#185
1127
ab{1,2}a
1128
aa
1129
NO
1130
1131
#186
1132
ab{1,2}a
1133
aba
1134
YES
1135
aba
1136
1137
#187
1138
ab{1,2}a
1139
abba
1140
YES
1141
abba
1142
1143
#188
1144
ab{1,2}a
1145
abbba
1146
NO
1147
1148
#189
1149
ab{0,}a
1150
aa
1151
YES
1152
aa
1153
1154
#190
1155
ab{0,}a
1156
aba
1157
YES
1158
aba
1159
1160
#191
1161
ab{0,}a
1162
abba
1163
YES
1164
abba
1165
1166
#192
1167
ab{1,}a
1168
aa
1169
NO
1170
1171
#193
1172
ab{1,}a
1173
aba
1174
YES
1175
aba
1176
1177
#194
1178
ab{1,}a
1179
abba
1180
YES
1181
abba
1182
1183
#195
1184
ab{1}a
1185
aa
1186
NO
1187
1188
#196
1189
ab{1}a
1190
aba
1191
YES
1192
aba
1193
1194
#197
1195
ab{1}a
1196
abba
1197
NO
1198
1199
#198
1200
ab{0}a
1201
aa
1202
YES
1203
aa
1204
1205
#199
1206
ab{0}a
1207
aba
1208
NO
1209
1210
#200
1211
ab{2}a
1212
aa
1213
NO
1214
1215
#201
1216
ab{2}a
1217
aba
1218
NO
1219
1220
#202
1221
ab{2}a
1222
abba
1223
YES
1224
abba
1225
1226
#203
1227
ab{2}a
1228
abbba
1229
NO
1230
1231
#204
1232
[ \-]
1233
 -
1234
YES
1235
 -
1236
1237
#205
1238
[a-z0-9\.\-]+
1239
{regexp-1.2}
1240
YES
1241
regexp-1.2
1242
1243
#206
1244
[a-z0-9\-\.]+
1245
{regexp-1.2}
1246
YES
1247
regexp-1.2
1248
1249
#207
1250
[a-z\-0-9\.]+
1251
{regexp-1.2}
1252
YES
1253
regexp-1.2
1254
1255
#208
1256
\w+
1257
a_b
1258
YES
1259
a_b
1260
1261
#209
1262
([0123])??((((1st)|(2nd))|(3rd))|(\dth))
1263
1st
1264
YES
1265
1st
1266
1267
1st
1268
1st
1269
1st
1270
1st
1271
1272
#210
1273
[^\s\]'<>(),;:\.\[]
1274
-
1275
YES
1276
-
1277
1278
#211
1279
^\(?(\d{3})\)?[\- ]?(\d{3})[\- ]?(\d{4})$
1280
(425) 576+1202
1281
NO

Return to bug 19329