@@ -18,7 +18,7 @@ Writing tests the long way is preferred here, as it will avoid the circular
1818dependency.
1919*/
2020
21- import std.algorithm : equal, map, startsWith;
21+ import std.algorithm : equal, map, startsWith, max, countUntil, maxElement, filter ;
2222import std.uni : isAlpha, icmp;
2323import std.array ;
2424import std.conv ;
@@ -248,6 +248,9 @@ struct ParseTree
248248
249249 ParseTree[] children; /// The sub-trees created by sub-rules parsing.
250250
251+ size_t failEnd; // The furthest this tree could match the input (including !successful rules).
252+ ParseTree[] failedChild; /// The !successful child that could still be partially parsed.
253+
251254 /**
252255 Basic toString for easy pretty-printing.
253256 */
@@ -343,6 +346,8 @@ struct ParseTree
343346 result.input = input;
344347 result.begin = begin;
345348 result.end = end;
349+ result.failEnd = failEnd;
350+ result.failedChild = map!(p => p.dup)(failedChild).array();
346351 result.children = map!(p => p.dup)(children).array();
347352 return result;
348353 }
@@ -716,11 +721,16 @@ template literal(string s)
716721 ParseTree literal(ParseTree p)
717722 {
718723 enum lit = "\"" ~ s ~ "\"";
724+
719725 if (p.end+s.length <= p.input.length && p.input[p.end..p.end+s.length] == s)
720726 return ParseTree(name, true, [s], p.input, p.end, p.end+s.length);
721- else
722- return ParseTree(name, false, [lit], p.input, p.end, p.end);
723- }
727+ else {
728+ import std.algorithm : commonPrefix;
729+ import std.utf : byCodeUnit;
730+ auto prefix = p.input[p.end..$].byCodeUnit.commonPrefix(s.byCodeUnit);
731+ return ParseTree(name, false, [lit], p.input, p.end, p.end, null, p.end + prefix.length);
732+ }
733+ }
724734
725735 ParseTree literal(string input)
726736 {
@@ -1239,7 +1249,6 @@ and that the second subrule ('[a-z]') failed at position 3 (so, on '1').
12391249*/
12401250template and(rules...) if (rules.length > 0)
12411251{
1242-
12431252 string ctfeGetNameAnd()
12441253 {
12451254 string name = "and!(";
@@ -1262,7 +1271,8 @@ template and(rules...) if (rules.length > 0)
12621271 //&& !node.name.startsWith("drop!(")
12631272 && node.matches !is null
12641273 //&& node.begin != node.end
1265- );
1274+ )
1275+ || (node.failEnd >= node.end);
12661276 }
12671277
12681278 version (tracer)
@@ -1281,6 +1291,7 @@ template and(rules...) if (rules.length > 0)
12811291 }
12821292 ParseTree temp = r(result);
12831293 result.end = temp.end;
1294+ result.failEnd = max(result.failEnd, temp.failEnd);
12841295 if (temp.successful)
12851296 {
12861297 if (keepNode(temp))
@@ -1296,9 +1307,21 @@ template and(rules...) if (rules.length > 0)
12961307 }
12971308 else
12981309 {
1299- result.children ~= temp;// add the failed node, to indicate which failed
1300- if (temp.matches.length > 0)
1301- result.matches ~= temp.matches[$-1];
1310+ auto firstLongestFailedMatch = result.children.countUntil!(c => c.failEnd > temp.end);
1311+ if (firstLongestFailedMatch == -1) {
1312+ result.children ~= temp;// add the failed node, to indicate which failed
1313+ if (temp.matches.length > 0)
1314+ result.matches ~= temp.matches[$-1];
1315+ } else {
1316+ // don't add the failed node because a previous one already failed further back
1317+ result.children = result.children[0 .. firstLongestFailedMatch+1]; // discard any intermediate correct nodes
1318+ // This current 'and' rule has failed parsing and there is a successful child
1319+ // that had a longer failing match. We now want to revisit that child and modify it
1320+ // so that it is no longer successful and we want to move its failedChild into its children.
1321+ failedChildFixup(result.children[firstLongestFailedMatch], result.children[firstLongestFailedMatch].failEnd);
1322+ }
1323+ result.end = result.children.map!(c => c.end).maxElement;
1324+ result.failEnd = result.children.map!(c => c.failEnd).maxElement;
13021325 version (tracer)
13031326 {
13041327 if (shouldTrace(getName!(r)(), p))
@@ -1331,6 +1354,35 @@ template and(rules...) if (rules.length > 0)
13311354 {
13321355 return name;
13331356 }
1357+
1358+ // A child ParseTree has kept track of an alternate ParseTree (in failedChild) that matches longer.
1359+ // whenever the 'and' rule fails we want to rewrite that child so that the failedChild is
1360+ // moved into its children, the successful is set to false, the end is set the its failEnd,
1361+ // the failEnd is reset, and all that info is propagated upwards the tree so intermediate
1362+ // nodes reflect the proper state.
1363+ bool failedChildFixup(ref ParseTree p, size_t failEnd) {
1364+ if (p.failedChild.length > 0) {
1365+ p.children ~= p.failedChild[0];
1366+ p.failedChild = [];
1367+ p.successful = false;
1368+ p.end = p.failEnd;
1369+ p.failEnd = p.children.map!(c => c.failEnd).maxElement();
1370+ return true;
1371+ } else {
1372+ bool result = false;
1373+ foreach(ref c; p.children) {
1374+ if (c.failEnd != failEnd)
1375+ continue;
1376+ if (failedChildFixup(c, failEnd)) {
1377+ p.end = c.end;
1378+ p.successful = false;
1379+ p.failEnd = p.children.map!(c => c.failEnd).maxElement();
1380+ result = true;
1381+ }
1382+ }
1383+ return result;
1384+ }
1385+ }
13341386}
13351387
13361388unittest // 'and' unit test
@@ -1403,6 +1455,62 @@ unittest // 'and' unit test
14031455 , "'abc' 'de' 'f' has two child on 'abc_efghi', the one from 'abc' (success) and the one from 'de' (failure).");
14041456}
14051457
1458+ version (unittest) {
1459+ static ParseTree getError(ref ParseTree p) {
1460+ if (p.children.length > 0)
1461+ return getError(p.children[$-1]);
1462+ return p;
1463+ }
1464+ }
1465+
1466+ unittest // 'and' unit test with zeroOrMore and longest failing match
1467+ {
1468+ alias literal!"abc" A;
1469+ alias literal!"def" B;
1470+ alias literal!"ghi" C;
1471+
1472+ alias and!(zeroOrMore!(and!(A,B)), C) Thing;
1473+
1474+ ParseTree input = ParseTree("",false,[], "abc");
1475+ ParseTree result = Thing(input);
1476+
1477+ assert(!result.successful);
1478+ assert(getError(result).matches[$-1] == "\"def\"", "and!(zeroOrMore!(and!(literal!\"abc\", literal!\"def\")), literal!\"ghi\") should expected def when input is \"abc\"");
1479+ assert(result.matches == []);
1480+ }
1481+
1482+ unittest // 'and' unit test with option and longest failing match
1483+ {
1484+ alias literal!"abc" A;
1485+ alias literal!"def" B;
1486+ alias literal!"ghi" C;
1487+
1488+ alias and!(option!(and!(A,B)), C) Thing;
1489+
1490+ ParseTree input = ParseTree("",false,[], "abc");
1491+ ParseTree result = Thing(input);
1492+
1493+ assert(!result.successful);
1494+ assert(getError(result).matches[$-1] == "\"def\"", "and!(option!(and!(literal!\"abc\", literal!\"def\")), literal!\"ghi\") should expected def when input is \"abc\"");
1495+ assert(result.matches == []);
1496+ }
1497+
1498+ unittest // 'and' unit test with oneOrMore and longest failing match
1499+ {
1500+ alias literal!"abc" A;
1501+ alias literal!"def" B;
1502+ alias literal!"ghi" C;
1503+
1504+ alias and!(oneOrMore!(and!(A,B)), C) Thing;
1505+
1506+ ParseTree input = ParseTree("",false,[], "abcdefabc");
1507+ ParseTree result = Thing(input);
1508+
1509+ assert(!result.successful);
1510+ assert(getError(result).matches[$-1] == "\"def\"", "and!(oneOrMore!(and!(literal!\"abc\", literal!\"def\")), literal!\"ghi\") should expected def when input is \"abcdefabc\"");
1511+ assert(result.matches == ["abc", "def"]);
1512+ }
1513+
14061514template wrapAround(alias before, alias target, alias after)
14071515{
14081516 ParseTree wrapAround(ParseTree p)
@@ -1524,6 +1632,11 @@ template or(rules...) if (rules.length > 0)
15241632 {
15251633 temp.children = [temp];
15261634 temp.name = name;
1635+ // if there is a child that failed but parsed more
1636+ if (longestFail.failEnd > temp.end) {
1637+ temp.failEnd = longestFail.failEnd;
1638+ temp.failedChild = [longestFail];
1639+ }
15271640 version (tracer)
15281641 {
15291642 if (shouldTrace(getName!(r)(), p))
@@ -1543,15 +1656,15 @@ template or(rules...) if (rules.length > 0)
15431656 failedLength[i] = temp.end;
15441657 if (temp.end >= longestFail.end)
15451658 {
1659+ if (temp.end == longestFail.end)
1660+ errorStringChars += (temp.matches.length > 0 ? temp.matches[$-1].length : 0) + errName.length + 4;
1661+ else
1662+ errorStringChars = (temp.matches.length > 0 ? temp.matches[$-1].length : 0) + errName.length + 4;
15461663 maxFailedLength = temp.end;
15471664 longestFail = temp;
15481665 names[i] = errName;
15491666 results[i] = temp;
15501667
1551- if (temp.end == longestFail.end)
1552- errorStringChars += (temp.matches.length > 0 ? temp.matches[$-1].length : 0) + errName.length + 4;
1553- else
1554- errorStringChars = (temp.matches.length > 0 ? temp.matches[$-1].length : 0) + errName.length + 4;
15551668 }
15561669 // Else, this error parsed less input than another one: we discard it.
15571670 }
@@ -1587,9 +1700,8 @@ template or(rules...) if (rules.length > 0)
15871700 longestFail.matches = longestFail.matches.length == 0 ? [orErrorString] :
15881701 longestFail.matches[0..$-1] // discarding longestFail error message
15891702 ~ [orErrorString]; // and replacing it by the new, concatenated one.
1590- longestFail.name = name;
1591- longestFail.begin = p.end;
1592- return longestFail;
1703+ auto children = results[].filter!(r => max(r.end, r.failEnd) >= maxFailedLength).array();
1704+ return ParseTree(name, false, longestFail.matches, p.input, p.end, longestFail.end, children, children.map!(c => c.failEnd).maxElement);
15931705 }
15941706
15951707 ParseTree or(string input)
@@ -2166,13 +2278,19 @@ template zeroOrMore(alias r)
21662278 result.matches ~= temp.matches;
21672279 result.children ~= temp;
21682280 result.end = temp.end;
2281+ result.failEnd = max(result.failEnd, temp.failEnd);
21692282 version (tracer)
21702283 {
21712284 if (shouldTrace(getName!(r)(), p))
21722285 trace(traceMsg(result, name, getName!(r)()));
21732286 }
21742287 temp = r(result);
21752288 }
2289+ auto maxFail = max(temp.failEnd, temp.end);
2290+ if (maxFail > result.failEnd && maxFail > result.end) {
2291+ result.failedChild = [temp];
2292+ result.failEnd = maxFail;
2293+ }
21762294 result.successful = true;
21772295 version (tracer)
21782296 {
@@ -2328,13 +2446,19 @@ template oneOrMore(alias r)
23282446 result.matches ~= temp.matches;
23292447 result.children ~= temp;
23302448 result.end = temp.end;
2449+ result.failEnd = max(result.failEnd, temp.failEnd);
23312450 version (tracer)
23322451 {
23332452 if (shouldTrace(getName!(r)(), p))
23342453 trace(traceMsg(result, name, getName!(r)()));
23352454 }
23362455 temp = r(result);
23372456 }
2457+ auto maxFail = max(temp.failEnd, temp.end);
2458+ if (maxFail > result.failEnd && maxFail > result.end) {
2459+ result.failedChild = [temp];
2460+ result.failEnd = maxFail;
2461+ }
23382462 result.successful = true;
23392463 }
23402464 version (tracer)
@@ -2451,9 +2575,9 @@ template option(alias r)
24512575 }
24522576 ParseTree result = r(p);
24532577 if (result.successful)
2454- return ParseTree(name, true, result.matches, result.input, result.begin, result.end, [result]);
2578+ return ParseTree(name, true, result.matches, result.input, result.begin, result.end, [result], result.failEnd );
24552579 else
2456- return ParseTree(name, true, [], p.input, p.end, p.end, null);
2580+ return ParseTree(name, true, [], p.input, p.end, p.end, null, max(result.end,result.failEnd), [result] );
24572581 }
24582582
24592583 ParseTree option(string input)
@@ -3474,15 +3598,19 @@ mixin template decimateTree()
34743598 {
34753599 if(p.children.length == 0) return p;
34763600
3601+ bool parseFailed = !p.successful;
3602+
34773603 ParseTree[] filterChildren(ParseTree pt)
34783604 {
34793605 ParseTree[] result;
34803606 foreach(child; pt.children)
34813607 {
34823608 import std.algorithm : startsWith;
34833609
3484- if ( (isRule(child.name) && child.matches.length != 0)
3485- || !child.successful && child.children.length == 0)
3610+ if ( (isRule(child.name) && (child.matches.length != 0 || parseFailed))
3611+ || (!child.successful && child.children.length == 0)
3612+ || (!child.successful && child.name.startsWith(" or! " ) && child.children.length > 1)
3613+ || (!pt.successful && child.successful && child.children.length == 0 && child.failedChild.length > 0))
34863614 {
34873615 child.children = filterChildren(child);
34883616 result ~= child;
@@ -3499,6 +3627,37 @@ mixin template decimateTree()
34993627 }
35003628 return result;
35013629 }
3630+ void filterFailedChildren(ref ParseTree pt)
3631+ {
3632+ foreach(ref child; pt.children)
3633+ {
3634+ filterFailedChildren(child);
3635+ import std.algorithm : startsWith;
3636+
3637+ if ( (isRule(child.name) && (child.matches.length != 0 || parseFailed))
3638+ || (!child.successful && child.children.length == 0)
3639+ || (!child.successful && child.name.startsWith(" or! " ) && child.children.length > 1)
3640+ || (!pt.successful && child.successful && child.children.length == 0 && child.failedChild.length > 0))
3641+ {
3642+ }
3643+ else if (child.name.startsWith(" keep! (" )) // 'keep' node are never discarded.
3644+ // They have only one child, the node to keep
3645+ {
3646+ }
3647+ else if (child.failedChild.length > 0)// discard this node, but see if its children contain nodes to keep
3648+ {
3649+ pt.failedChild ~= child.failedChild;
3650+ child.failedChild = [];
3651+ }
3652+ }
3653+ foreach(ref child; pt.failedChild)
3654+ {
3655+ filterFailedChildren(child);
3656+ child.children = filterChildren(child);
3657+ }
3658+ }
3659+ if (!p.successful)
3660+ filterFailedChildren(p);
35023661 p.children = filterChildren(p);
35033662 return p;
35043663 }
0 commit comments