Skip to content

Commit 43a0dea

Browse files
committed
Comments about stopper rules and blocked memoization.
1 parent d4c4287 commit 43a0dea

File tree

1 file changed

+25
-6
lines changed

1 file changed

+25
-6
lines changed

pegged/grammar.d

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,17 @@ string grammar(Memoization withMemo = Memoization.yes)(string definition)
142142
"/** Rules that stop left-recursive cycles, followed by rules for which\n"
143143
~ " * memoization is blocked during recursion:\n" ~ result ~ "*/\n\n" : "";
144144
}
145+
146+
/*
147+
I once considered that if two left-recursive cycles intersect, unbounded left-recursion
148+
would be prevented in both cycles if only the intersection rule would be a stopper. Although
149+
true, it causes other problems, as documented in the "Mutual left-recursion" unittest below.
150+
Therefore, we simply make the first rule in every left-recursive cycle a stopper.
151+
Also, one might think that it suffices to prevent ordinary memoization in just the rules
152+
that are part of the cycle. However, some larger input files for pegged/examples/extended_pascal
153+
would fail to parse. So memoization for all left-recursive rules is disabled during
154+
left-recursion.
155+
*/
145156
string[] allLeftRecursiveRules;
146157
foreach (cycle; grammarInfo.leftRecursiveCycles)
147158
foreach (rule; cycle)
@@ -155,21 +166,19 @@ string grammar(Memoization withMemo = Memoization.yes)(string definition)
155166
string blockMemoForLeftRecursion(string stopper)
156167
{
157168
string result;
158-
foreach (rule; stoppers[stopper]) {
169+
foreach (rule; stoppers[stopper])
159170
result ~= " blockMemo_" ~ rule ~ "_atPos ~= p.end;\n";
160-
}
161171
return result;
162172
}
163173

164174
/// Returns code that enables memoization when left-recursion has completed.
165175
string unblockMemoForLeftRecursion(string stopper)
166176
{
167177
string result;
168-
foreach (rule; stoppers[stopper]) {
178+
foreach (rule; stoppers[stopper])
169179
// TODO investigate if p.end is always the last element.
170180
result ~= " assert(blockMemo_" ~ rule ~ "_atPos.canFind(p.end));\n"
171181
~ " blockMemo_" ~ rule ~ "_atPos = remove(blockMemo_" ~ rule ~ "_atPos, countUntil(blockMemo_" ~ rule ~ "_atPos, p.end));\n";
172-
}
173182
return result;
174183
}
175184

@@ -251,9 +260,9 @@ string grammar(Memoization withMemo = Memoization.yes)(string definition)
251260
result =
252261
"struct Generic" ~ shortGrammarName ~ "(TParseTree)
253262
{
254-
import std.functional : toDelegate;
263+
import std.functional : toDelegate;
255264
import pegged.dynamic.grammar;
256-
static import pegged.peg;
265+
static import pegged.peg;
257266
struct " ~ grammarName ~ "\n {
258267
enum name = \"" ~ shortGrammarName ~ "\";
259268
static ParseTree delegate(ParseTree)[string] before;
@@ -2915,6 +2924,16 @@ unittest // Proper blocking of memoization
29152924
// Example from http://www.inf.puc-rio.br/~roberto/docs/sblp2012.pdf
29162925
unittest // Mutual left-recursion
29172926
{
2927+
/* A thing about stoppers:
2928+
Because P is at the intersection of left-recursive cycles P -> P and L -> P -> L, it should
2929+
suffice to make only P a stopper to stop unbounded left-recursion. And so it does. But,
2930+
stoppers parse greedily: they always consume the maximum of input. So below, if only P is a stopper,
2931+
at some point P parses the complete input. Then L fails because it cannot append ".x", then M fails.
2932+
If both are made a stopper then M succeeds. That is because P will try L when P '(n)' no longer
2933+
consumes input, which will appear as a left-recursion to L if it is a stopper and because of that
2934+
it will have a chance to succeed on the full input which it recorded in its seed for the previous
2935+
recursion.
2936+
*/
29182937
enum LeftGrammar = `
29192938
Left:
29202939
M <- L eoi

0 commit comments

Comments
 (0)