Lines 2-11
Link Here
|
2 |
|
2 |
|
3 |
/* |
3 |
/* |
4 |
* ==================================================================== |
4 |
* ==================================================================== |
5 |
* |
5 |
* |
6 |
* The Apache Software License, Version 1.1 |
6 |
* The Apache Software License, Version 1.1 |
7 |
* |
7 |
* |
8 |
* Copyright (c) 1999 The Apache Software Foundation. All rights |
8 |
* Copyright (c) 1999 The Apache Software Foundation. All rights |
9 |
* reserved. |
9 |
* reserved. |
10 |
* |
10 |
* |
11 |
* Redistribution and use in source and binary forms, with or without |
11 |
* Redistribution and use in source and binary forms, with or without |
Lines 13-19
Link Here
|
13 |
* are met: |
13 |
* are met: |
14 |
* |
14 |
* |
15 |
* 1. Redistributions of source code must retain the above copyright |
15 |
* 1. Redistributions of source code must retain the above copyright |
16 |
* notice, this list of conditions and the following disclaimer. |
16 |
* notice, this list of conditions and the following disclaimer. |
17 |
* |
17 |
* |
18 |
* 2. Redistributions in binary form must reproduce the above copyright |
18 |
* 2. Redistributions in binary form must reproduce the above copyright |
19 |
* notice, this list of conditions and the following disclaimer in |
19 |
* notice, this list of conditions and the following disclaimer in |
Lines 21-35
Link Here
|
21 |
* distribution. |
21 |
* distribution. |
22 |
* |
22 |
* |
23 |
* 3. The end-user documentation included with the redistribution, if |
23 |
* 3. The end-user documentation included with the redistribution, if |
24 |
* any, must include the following acknowlegement: |
24 |
* any, must include the following acknowlegement: |
25 |
* "This product includes software developed by the |
25 |
* "This product includes software developed by the |
26 |
* Apache Software Foundation (http://www.apache.org/)." |
26 |
* Apache Software Foundation (http://www.apache.org/)." |
27 |
* Alternately, this acknowlegement may appear in the software itself, |
27 |
* Alternately, this acknowlegement may appear in the software itself, |
28 |
* if and wherever such third-party acknowlegements normally appear. |
28 |
* if and wherever such third-party acknowlegements normally appear. |
29 |
* |
29 |
* |
30 |
* 4. The names "The Jakarta Project", "Jakarta-Regexp", and "Apache Software |
30 |
* 4. The names "The Jakarta Project", "Jakarta-Regexp", and "Apache Software |
31 |
* Foundation" must not be used to endorse or promote products derived |
31 |
* Foundation" must not be used to endorse or promote products derived |
32 |
* from this software without prior written permission. For written |
32 |
* from this software without prior written permission. For written |
33 |
* permission, please contact apache@apache.org. |
33 |
* permission, please contact apache@apache.org. |
34 |
* |
34 |
* |
35 |
* 5. Products derived from this software may not be called "Apache" |
35 |
* 5. Products derived from this software may not be called "Apache" |
Lines 55-61
Link Here
|
55 |
* information on the Apache Software Foundation, please see |
55 |
* information on the Apache Software Foundation, please see |
56 |
* <http://www.apache.org/>. |
56 |
* <http://www.apache.org/>. |
57 |
* |
57 |
* |
58 |
*/ |
58 |
*/ |
59 |
|
59 |
|
60 |
import org.apache.regexp.RE; |
60 |
import org.apache.regexp.RE; |
61 |
import java.util.Hashtable; |
61 |
import java.util.Hashtable; |
Lines 100-106
Link Here
|
100 |
// {m,n} stacks |
100 |
// {m,n} stacks |
101 |
static final int maxBrackets = 10; // Maximum number of bracket pairs |
101 |
static final int maxBrackets = 10; // Maximum number of bracket pairs |
102 |
static final int bracketUnbounded = -1; // Unbounded value |
102 |
static final int bracketUnbounded = -1; // Unbounded value |
103 |
static final int bracketFinished = -2; // Unbounded value |
|
|
104 |
int brackets = 0; // Number of bracket sets |
103 |
int brackets = 0; // Number of bracket sets |
105 |
int[] bracketStart = null; // Starting point |
104 |
int[] bracketStart = null; // Starting point |
106 |
int[] bracketEnd = null; // Ending point |
105 |
int[] bracketEnd = null; // Ending point |
Lines 373-389
Link Here
|
373 |
try |
372 |
try |
374 |
{ |
373 |
{ |
375 |
bracketOpt[brackets] = Integer.parseInt(number.toString()) - bracketMin[brackets]; |
374 |
bracketOpt[brackets] = Integer.parseInt(number.toString()) - bracketMin[brackets]; |
376 |
if (bracketMin[brackets] < 1) |
|
|
377 |
{ |
378 |
bracketOpt[brackets]--; |
379 |
} |
380 |
} |
375 |
} |
381 |
catch (NumberFormatException e) |
376 |
catch (NumberFormatException e) |
382 |
{ |
377 |
{ |
383 |
syntaxError("Expected valid number"); |
378 |
syntaxError("Expected valid number"); |
384 |
} |
379 |
} |
385 |
|
380 |
|
386 |
// Optional repetitions must be > 0 |
381 |
// Optional repetitions must be >= 0 |
387 |
if (bracketOpt[brackets] < 0) |
382 |
if (bracketOpt[brackets] < 0) |
388 |
{ |
383 |
{ |
389 |
syntaxError("Bad range"); |
384 |
syntaxError("Bad range"); |
Lines 461-467
Link Here
|
461 |
c = Character.toLowerCase(c); |
456 |
c = Character.toLowerCase(c); |
462 |
if (c >= 'a' && c <= 'f') |
457 |
if (c >= 'a' && c <= 'f') |
463 |
{ |
458 |
{ |
464 |
// Compute new value |
459 |
// Compute new value |
465 |
val = (val << 4) + (c - 'a') + 10; |
460 |
val = (val << 4) + (c - 'a') + 10; |
466 |
} |
461 |
} |
467 |
else |
462 |
else |
Lines 555-561
Link Here
|
555 |
{ |
550 |
{ |
556 |
idx++; |
551 |
idx++; |
557 |
} |
552 |
} |
558 |
|
553 |
|
559 |
// Should be a ":]" to terminate the POSIX character class |
554 |
// Should be a ":]" to terminate the POSIX character class |
560 |
if ((idx + 1) < len && pattern.charAt(idx) == ':' && pattern.charAt(idx + 1) == ']') |
555 |
if ((idx + 1) < len && pattern.charAt(idx) == ':' && pattern.charAt(idx + 1) == ']') |
561 |
{ |
556 |
{ |
Lines 714-720
Link Here
|
714 |
else |
709 |
else |
715 |
{ |
710 |
{ |
716 |
// If simple character and not start of range, include it |
711 |
// If simple character and not start of range, include it |
717 |
if ((idx + 1) >= len || pattern.charAt(idx + 1) != '-') |
712 |
if (idx >= len || pattern.charAt(idx) != '-') |
718 |
{ |
713 |
{ |
719 |
range.include(simpleChar, include); |
714 |
range.include(simpleChar, include); |
720 |
} |
715 |
} |
Lines 1025-1031
Link Here
|
1025 |
break; |
1020 |
break; |
1026 |
} |
1021 |
} |
1027 |
} |
1022 |
} |
1028 |
|
1023 |
|
1029 |
// If its not in the list we parse the {m,n} |
1024 |
// If its not in the list we parse the {m,n} |
1030 |
if (!found) |
1025 |
if (!found) |
1031 |
{ |
1026 |
{ |
Lines 1038-1054
Link Here
|
1038 |
bracketEnd[brackets] = idx; |
1033 |
bracketEnd[brackets] = idx; |
1039 |
i = brackets++; |
1034 |
i = brackets++; |
1040 |
} |
1035 |
} |
1041 |
|
1036 |
|
1042 |
// If there's a min, rewind stream and reparse |
1037 |
// Process min first |
1043 |
if (--bracketMin[i] > 0) |
1038 |
if (bracketMin[i]-- > 0) |
1044 |
{ |
1039 |
{ |
1045 |
// Rewind stream and run it through again |
1040 |
if (bracketMin[i] > 0 || bracketOpt[i] != 0) { |
1046 |
idx = idxBeforeTerminal; |
1041 |
// Rewind stream and run it through again - more matchers coming |
|
|
1042 |
idx = idxBeforeTerminal; |
1043 |
} else { |
1044 |
// Bug #1030: No optinal matches - no need to rewind |
1045 |
idx = bracketEnd[i]; |
1046 |
} |
1047 |
break; |
1047 |
break; |
1048 |
} |
1048 |
} |
1049 |
|
1049 |
|
1050 |
// Do the right thing for maximum ({m,}) |
1050 |
// Do the right thing for maximum ({m,}) |
1051 |
if (bracketOpt[i] == bracketFinished) |
1051 |
if (bracketOpt[i] == bracketUnbounded) |
1052 |
{ |
1052 |
{ |
1053 |
// Drop through now and closure expression. |
1053 |
// Drop through now and closure expression. |
1054 |
// We are done with the {m,} expr, so skip rest |
1054 |
// We are done with the {m,} expr, so skip rest |
Lines 1057-1093
Link Here
|
1057 |
idx = bracketEnd[i]; |
1057 |
idx = bracketEnd[i]; |
1058 |
} |
1058 |
} |
1059 |
else |
1059 |
else |
1060 |
if (bracketOpt[i] == bracketUnbounded) |
1060 |
if (bracketOpt[i]-- > 0) |
1061 |
{ |
1061 |
{ |
1062 |
idx = idxBeforeTerminal; |
1062 |
if (bracketOpt[i] > 0) |
1063 |
bracketOpt[i] = bracketFinished; |
|
|
1064 |
break; |
1065 |
} |
1066 |
else |
1067 |
if (bracketOpt[i]-- > 0) |
1068 |
{ |
1063 |
{ |
1069 |
// Drop through to optionally close and then 'play it again sam!' |
1064 |
// More optional matchers - 'play it again sam!' |
1070 |
idx = idxBeforeTerminal; |
1065 |
idx = idxBeforeTerminal; |
1071 |
closureType = '?'; |
1066 |
} else { |
1072 |
} |
1067 |
// Bug #1030: We are done - this one is last and optional |
1073 |
else |
|
|
1074 |
{ |
1075 |
// We are done. skip the rest of {m,n} expr |
1076 |
idx = bracketEnd[i]; |
1068 |
idx = bracketEnd[i]; |
1077 |
break; |
|
|
1078 |
} |
1069 |
} |
|
|
1070 |
// Drop through to optionally close |
1071 |
closureType = '?'; |
1072 |
} |
1073 |
else |
1074 |
{ |
1075 |
// Rollback terminal - neither min nor opt matchers present |
1076 |
lenInstruction = ret; |
1077 |
node(RE.OP_NOTHING, 0); |
1078 |
|
1079 |
// We are done. skip the rest of {m,n} expr |
1080 |
idx = bracketEnd[i]; |
1081 |
break; |
1082 |
} |
1079 |
} |
1083 |
} |
1080 |
|
1084 |
|
1081 |
// Fall through! |
1085 |
// Fall through! |
1082 |
|
1086 |
|
1083 |
case '?': |
1087 |
case '?': |
1084 |
case '*': |
1088 |
case '*': |
1085 |
|
1089 |
|
1086 |
if (!greedy) |
1090 |
if (!greedy) |
1087 |
{ |
1091 |
{ |
1088 |
break; |
1092 |
break; |
1089 |
} |
1093 |
} |
1090 |
|
1094 |
|
1091 |
if (closureType == '?') |
1095 |
if (closureType == '?') |
1092 |
{ |
1096 |
{ |
1093 |
// X? is compiled as (X|) |
1097 |
// X? is compiled as (X|) |
Lines 1097-1103
Link Here
|
1097 |
setNextOfEnd(ret, nothing); // point (second) branch to OP_NOTHING |
1101 |
setNextOfEnd(ret, nothing); // point (second) branch to OP_NOTHING |
1098 |
setNextOfEnd(ret + RE.nodeSize, nothing); // point the end of X to OP_NOTHING node |
1102 |
setNextOfEnd(ret + RE.nodeSize, nothing); // point the end of X to OP_NOTHING node |
1099 |
} |
1103 |
} |
1100 |
|
1104 |
|
1101 |
if (closureType == '*') |
1105 |
if (closureType == '*') |
1102 |
{ |
1106 |
{ |
1103 |
// X* is compiled as (X{gotoX}|) |
1107 |
// X* is compiled as (X{gotoX}|) |
Lines 1109-1115
Link Here
|
1109 |
setNextOfEnd(ret, node(RE.OP_NOTHING, 0)); // OP_NOTHING |
1113 |
setNextOfEnd(ret, node(RE.OP_NOTHING, 0)); // OP_NOTHING |
1110 |
} |
1114 |
} |
1111 |
break; |
1115 |
break; |
1112 |
|
1116 |
|
1113 |
case '+': |
1117 |
case '+': |
1114 |
{ |
1118 |
{ |
1115 |
// X+ is compiled as X({gotoX}|) |
1119 |
// X+ is compiled as X({gotoX}|) |
Lines 1134-1141
Link Here
|
1134 |
case '?': |
1138 |
case '?': |
1135 |
nodeInsert(RE.OP_RELUCTANTMAYBE, 0, ret); |
1139 |
nodeInsert(RE.OP_RELUCTANTMAYBE, 0, ret); |
1136 |
break; |
1140 |
break; |
1137 |
|
1141 |
|
1138 |
case '*': |
1142 |
case '*': |
1139 |
nodeInsert(RE.OP_RELUCTANTSTAR, 0, ret); |
1143 |
nodeInsert(RE.OP_RELUCTANTSTAR, 0, ret); |
1140 |
break; |
1144 |
break; |
1141 |
|
1145 |
|
Lines 1145-1151
Link Here
|
1145 |
} |
1149 |
} |
1146 |
|
1150 |
|
1147 |
// Point to the expr after the closure |
1151 |
// Point to the expr after the closure |
1148 |
setNextOfEnd(ret, lenInstruction); |
1152 |
setNextOfEnd(ret, lenInstruction); |
1149 |
} |
1153 |
} |
1150 |
return ret; |
1154 |
return ret; |
1151 |
} |
1155 |
} |
Lines 1347-1353
Link Here
|
1347 |
int[] minRange = new int[size]; // Range minima |
1351 |
int[] minRange = new int[size]; // Range minima |
1348 |
int[] maxRange = new int[size]; // Range maxima |
1352 |
int[] maxRange = new int[size]; // Range maxima |
1349 |
int num = 0; // Number of range array elements in use |
1353 |
int num = 0; // Number of range array elements in use |
1350 |
|
1354 |
|
1351 |
/** |
1355 |
/** |
1352 |
* Deletes the range at a given index from the range lists |
1356 |
* Deletes the range at a given index from the range lists |
1353 |
* @param index Index of range to delete from minRange and maxRange arrays. |
1357 |
* @param index Index of range to delete from minRange and maxRange arrays. |
Lines 1359-1365
Link Here
|
1359 |
{ |
1363 |
{ |
1360 |
return; |
1364 |
return; |
1361 |
} |
1365 |
} |
1362 |
|
1366 |
|
1363 |
// Move elements down |
1367 |
// Move elements down |
1364 |
while (index++ < num) |
1368 |
while (index++ < num) |
1365 |
{ |
1369 |
{ |
Lines 1369-1379
Link Here
|
1369 |
maxRange[index-1] = maxRange[index]; |
1373 |
maxRange[index-1] = maxRange[index]; |
1370 |
} |
1374 |
} |
1371 |
} |
1375 |
} |
1372 |
|
1376 |
|
1373 |
// One less element now |
1377 |
// One less element now |
1374 |
num--; |
1378 |
num--; |
1375 |
} |
1379 |
} |
1376 |
|
1380 |
|
1377 |
/** |
1381 |
/** |
1378 |
* Merges a range into the range list, coalescing ranges if possible. |
1382 |
* Merges a range into the range list, coalescing ranges if possible. |
1379 |
* @param min Minimum end of range |
1383 |
* @param min Minimum end of range |
Lines 1389-1395
Link Here
|
1389 |
{ |
1393 |
{ |
1390 |
return; |
1394 |
return; |
1391 |
} |
1395 |
} |
1392 |
|
1396 |
|
1393 |
// Min-max subsumes minRange[i]-maxRange[i] |
1397 |
// Min-max subsumes minRange[i]-maxRange[i] |
1394 |
else if (min <= minRange[i] && max >= maxRange[i]) |
1398 |
else if (min <= minRange[i] && max >= maxRange[i]) |
1395 |
{ |
1399 |
{ |
Lines 1397-1403
Link Here
|
1397 |
merge(min, max); |
1401 |
merge(min, max); |
1398 |
return; |
1402 |
return; |
1399 |
} |
1403 |
} |
1400 |
|
1404 |
|
1401 |
// Min is in the range, but max is outside |
1405 |
// Min is in the range, but max is outside |
1402 |
else if (min >= minRange[i] && min <= maxRange[i]) |
1406 |
else if (min >= minRange[i] && min <= maxRange[i]) |
1403 |
{ |
1407 |
{ |
Lines 1406-1412
Link Here
|
1406 |
merge(min, max); |
1410 |
merge(min, max); |
1407 |
return; |
1411 |
return; |
1408 |
} |
1412 |
} |
1409 |
|
1413 |
|
1410 |
// Max is in the range, but min is outside |
1414 |
// Max is in the range, but min is outside |
1411 |
else if (max >= minRange[i] && max <= maxRange[i]) |
1415 |
else if (max >= minRange[i] && max <= maxRange[i]) |
1412 |
{ |
1416 |
{ |
Lines 1416-1422
Link Here
|
1416 |
return; |
1420 |
return; |
1417 |
} |
1421 |
} |
1418 |
} |
1422 |
} |
1419 |
|
1423 |
|
1420 |
// Must not overlap any other ranges |
1424 |
// Must not overlap any other ranges |
1421 |
if (num >= size) |
1425 |
if (num >= size) |
1422 |
{ |
1426 |
{ |
Lines 1432-1438
Link Here
|
1432 |
maxRange[num] = max; |
1436 |
maxRange[num] = max; |
1433 |
num++; |
1437 |
num++; |
1434 |
} |
1438 |
} |
1435 |
|
1439 |
|
1436 |
/** |
1440 |
/** |
1437 |
* Removes a range by deleting or shrinking all other ranges |
1441 |
* Removes a range by deleting or shrinking all other ranges |
1438 |
* @param min Minimum end of range |
1442 |
* @param min Minimum end of range |
Lines 1450-1456
Link Here
|
1450 |
i--; |
1454 |
i--; |
1451 |
return; |
1455 |
return; |
1452 |
} |
1456 |
} |
1453 |
|
1457 |
|
1454 |
// min-max is subsumed by minRange[i]-maxRange[i] |
1458 |
// min-max is subsumed by minRange[i]-maxRange[i] |
1455 |
else if (min >= minRange[i] && max <= maxRange[i]) |
1459 |
else if (min >= minRange[i] && max <= maxRange[i]) |
1456 |
{ |
1460 |
{ |
Lines 1467-1480
Link Here
|
1467 |
} |
1471 |
} |
1468 |
return; |
1472 |
return; |
1469 |
} |
1473 |
} |
1470 |
|
1474 |
|
1471 |
// minRange is in the range, but maxRange is outside |
1475 |
// minRange is in the range, but maxRange is outside |
1472 |
else if (minRange[i] >= min && minRange[i] <= max) |
1476 |
else if (minRange[i] >= min && minRange[i] <= max) |
1473 |
{ |
1477 |
{ |
1474 |
minRange[i] = max + 1; |
1478 |
minRange[i] = max + 1; |
1475 |
return; |
1479 |
return; |
1476 |
} |
1480 |
} |
1477 |
|
1481 |
|
1478 |
// maxRange is in the range, but minRange is outside |
1482 |
// maxRange is in the range, but minRange is outside |
1479 |
else if (maxRange[i] >= min && maxRange[i] <= max) |
1483 |
else if (maxRange[i] >= min && maxRange[i] <= max) |
1480 |
{ |
1484 |
{ |
Lines 1483-1489
Link Here
|
1483 |
} |
1487 |
} |
1484 |
} |
1488 |
} |
1485 |
} |
1489 |
} |
1486 |
|
1490 |
|
1487 |
/** |
1491 |
/** |
1488 |
* Includes (or excludes) the range from min to max, inclusive. |
1492 |
* Includes (or excludes) the range from min to max, inclusive. |
1489 |
* @param min Minimum end of range |
1493 |
* @param min Minimum end of range |
Lines 1501-1507
Link Here
|
1501 |
remove(min, max); |
1505 |
remove(min, max); |
1502 |
} |
1506 |
} |
1503 |
} |
1507 |
} |
1504 |
|
1508 |
|
1505 |
/** |
1509 |
/** |
1506 |
* Includes a range with the same min and max |
1510 |
* Includes a range with the same min and max |
1507 |
* @param minmax Minimum and maximum end of range (inclusive) |
1511 |
* @param minmax Minimum and maximum end of range (inclusive) |