@@ -113,9 +113,28 @@ string grammar(Memoization withMemo = Memoization.yes)(string definition)
113113 import pegged.introspection;
114114 import std.algorithm : canFind;
115115 GrammarInfo grammarInfo = grammarInfo(defAsParseTree.children[0 ]);
116- string [][string ] stoppers; // Keys are the rules that stop left-recursion and the
117- // values are arrays of strings containing the corresponding
118- // rules for which memoization needs to be blocked.
116+ string [] stoppers; // Keys are the rules that stop left-recursion and the
117+ // values are arrays of strings containing the corresponding
118+ // rules for which memoization needs to be blocked.
119+
120+ /*
121+ I once considered that if two left-recursive cycles intersect, unbounded left-recursion
122+ would be prevented in both cycles if only the intersection rule would be a stopper. Although
123+ true, it causes other problems, as documented in the "Mutual left-recursion" unittest below.
124+ Therefore, we simply make the first rule in every left-recursive cycle a stopper.
125+ Also, one might think that it suffices to prevent ordinary memoization in just the rules
126+ that are part of the cycle. However, some larger input files for pegged/examples/extended_pascal
127+ would fail to parse. So memoization for all left-recursive rules is disabled during
128+ left-recursion.
129+ */
130+ string [] allLeftRecursiveRules;
131+ foreach (cycle; grammarInfo.leftRecursiveCycles)
132+ foreach (rule; cycle)
133+ if (! canFind(allLeftRecursiveRules, rule))
134+ allLeftRecursiveRules ~= rule;
135+ foreach (cycle; grammarInfo.leftRecursiveCycles)
136+ if (! stoppers.canFind(cycle[0 ]))
137+ stoppers ~= cycle[0 ];
119138
120139 // Prints comment showing detected left-recursive cycles.
121140 string printLeftRecursiveCycles ()
@@ -136,163 +155,14 @@ string grammar(Memoization withMemo = Memoization.yes)(string definition)
136155 {
137156 import std.array : join;
138157 string result;
139- foreach (stopper, rules; stoppers)
140- {
141- result ~= stopper ~ " : " ~ rules.join(" , " ) ~ " \n " ;
142- /* if (rules.length > 0)
143- result ~= rules[0];
144- foreach (rule; rules[1..$])
145- result ~= ", " ~ rule;
146- result ~= "\n";*/
147- }
158+ foreach (stopper; stoppers)
159+ result ~= stopper ~ " : " ~ allLeftRecursiveRules.join(" , " ) ~ " \n " ;
148160 return result.length > 0 ?
149161 " /** Rules that stop left-recursive cycles, followed by rules for which\n "
150162 ~ " * memoization is blocked during recursion:\n " ~ result ~ " */\n\n " : " " ;
151163 }
152- size_t [] handledCycleIndices;
153- // Detect interlocking cycles. Each cycle needs a different stopper.
154- foreach (i, cycle; grammarInfo.leftRecursiveCycles)
155- {
156- foreach (j, otherCycle; grammarInfo.leftRecursiveCycles[i+ 1 .. $])
157- {
158- foreach (rule; cycle)
159- {
160- if (otherCycle.canFind(rule))
161- {
162- // cycle and otherCycle intersect at rule.
163- // If a cycle has one single rule (direct left-recursion) then it needs to be a stopper.
164- if (cycle.length == 1 )
165- {
166- if (! handledCycleIndices.canFind(i))
167- {
168- if (! (rule in stoppers))
169- stoppers[rule] = [];
170- handledCycleIndices ~= i;
171- }
172- // The other cycle needs a different stopper.
173- assert (otherCycle.length > 1 );
174- if (! handledCycleIndices.canFind(j + i + 1 ))
175- {
176- foreach (r; otherCycle)
177- if (! (r in stoppers))
178- {
179- stoppers[r] = [];
180- foreach (rr; otherCycle)
181- if (rr != r)
182- stoppers[r] ~= rr;
183- handledCycleIndices ~= j + i + 1 ;
184- break ;
185- }
186- assert (handledCycleIndices.canFind(j + i + 1 ));
187- }
188- }
189- if (otherCycle.length == 1 )
190- {
191- if (! handledCycleIndices.canFind(j + i + 1 ))
192- {
193- if (! (rule in stoppers))
194- stoppers[rule] = [];
195- handledCycleIndices ~= j + i + 1 ;
196- }
197- // The other cycle needs a different stopper.
198- assert (cycle.length > 1 );
199- if (! handledCycleIndices.canFind(i))
200- {
201- foreach (r; cycle)
202- if (! (r in stoppers))
203- {
204- stoppers[r] = [];
205- foreach (rr; cycle)
206- if (rr != r)
207- stoppers[r] ~= rr;
208- handledCycleIndices ~= i;
209- break ;
210- }
211- assert (handledCycleIndices.canFind(i));
212- }
213- }
214- // At this point, if a cycle has not been handled yet, it has more than one rule.
215- if (! handledCycleIndices.canFind(i))
216- {
217- foreach (r; cycle)
218- if (! (r in stoppers))
219- {
220- stoppers[r] = [];
221- foreach (rr; cycle)
222- if (rr != r)
223- stoppers[r] ~= rr;
224- handledCycleIndices ~= i;
225- break ;
226- }
227- assert (handledCycleIndices.canFind(i));
228- }
229- if (! handledCycleIndices.canFind(j + i + 1 ))
230- {
231- foreach (r; otherCycle)
232- if (! (r in stoppers))
233- {
234- stoppers[r] = [];
235- foreach (rr; otherCycle)
236- if (rr != r)
237- stoppers[r] ~= rr;
238- handledCycleIndices ~= j + i + 1 ;
239- break ;
240- }
241- assert (handledCycleIndices.canFind(j + i + 1 ));
242- }
243- }
244- }
245- }
246- }
247- // Take the first node in remaining cycles as the stopper.
248- foreach (i, cycle; grammarInfo.leftRecursiveCycles)
249- {
250- if (handledCycleIndices.canFind(i))
251- continue ;
252- stoppers[cycle[0 ]] = cycle[1 .. $].dup ;
253- }
254164 // Analysis completed.
255165
256- // / Returns code to prevent memoization of incomplete matches during left-recursion through this rule.
257- string blockMemoForLeftRecursion (string stopper)
258- {
259- string result;
260- foreach (rule; stoppers[stopper] ~ stopper)
261- result ~= " blockMemo_" ~ rule ~ " _atPos ~= p.end;\n " ;
262- return result;
263- }
264-
265- // / Returns code that enables memoization when left-recursion has completed.
266- string unblockMemoForLeftRecursion (string stopper)
267- {
268- string result;
269- foreach (rule; stoppers[stopper] ~ stopper)
270- // TODO investigate if p.end is always the last element.
271- result ~= " assert(blockMemo_" ~ rule ~ " _atPos.canFind(p.end));\n "
272- ~ " remove(blockMemo_" ~ rule ~ " _atPos, countUntil(blockMemo_" ~ rule ~ " _atPos, p.end));\n " ;
273- return result;
274- }
275-
276- // / If $(D_PARAM name) is part of a left-recursive cycle and not a stopping rule, code is
277- // inserted to test for blocking and if blocked return with "$(D_PARAM code)(p)".
278- string maybeBlockedMemo (string name, string code)
279- {
280- assert (! stoppers.keys .canFind(name));
281- foreach (cycle; stoppers)
282- foreach (rule; cycle)
283- if (rule == name)
284- return
285- " if (blockMemo_" ~ name ~ " _atPos.canFind(p.end))\n "
286- ~ " return " ~ code ~ " (p);\n " ;
287- return " " ;
288- }
289-
290- // / Returns a Boolean expression whether $(D_PARAM rule) is not blocked.
291- string shouldMemoLeftRecursion (string rule)
292- {
293- return " !blockMemo_" ~ rule ~ " _atPos.canFind(p.end)" ;
294- }
295-
296166 string generateForgetMemo ()
297167 {
298168 string result;
@@ -318,25 +188,6 @@ string grammar(Memoization withMemo = Memoization.yes)(string definition)
318188 {
319189 string result;
320190
321- // Variables holding the block-state.
322- string generateBlockers ()
323- {
324- string result;
325- string [] visited = [];
326- foreach (cycle; grammarInfo.leftRecursiveCycles)
327- foreach (rule; cycle)
328- if (! visited.canFind(rule))
329- {
330- visited ~= rule;
331- result ~= "
332- static size_t[] blockMemo_" ~ rule ~ " _atPos;" ;
333- }
334- if (result.length > 0 )
335- return "
336- import std.algorithm: canFind, countUntil, remove;" ~ result;
337- return result;
338- }
339-
340191 switch (p.name)
341192 {
342193 case " Pegged" :
@@ -351,9 +202,9 @@ string grammar(Memoization withMemo = Memoization.yes)(string definition)
351202 result =
352203" struct Generic" ~ shortGrammarName ~ " (TParseTree)
353204{
354- import std.functional : toDelegate;
205+ import std.functional : toDelegate;
355206 import pegged.dynamic.grammar;
356- static import pegged.peg;
207+ static import pegged.peg;
357208 struct " ~ grammarName ~ " \n {
358209 enum name = \" " ~ shortGrammarName ~ " \" ;
359210 static ParseTree delegate(ParseTree)[string] before;
@@ -364,7 +215,10 @@ string grammar(Memoization withMemo = Memoization.yes)(string definition)
364215 result ~= "
365216 import std.typecons:Tuple, tuple;
366217 static TParseTree[Tuple!(string, size_t)] memo;" ;
367- result ~= generateBlockers();
218+ if (grammarInfo.leftRecursiveCycles.length > 0 )
219+ result ~= "
220+ import std.algorithm: canFind, countUntil, remove;
221+ static size_t[] blockMemoAtPos;" ;
368222 }
369223
370224 result ~= "
@@ -603,13 +457,12 @@ string grammar(Memoization withMemo = Memoization.yes)(string definition)
603457 string ctfeCode = " pegged.peg.defined!(" ~ code ~ " , \" " ~ propagatedName ~ " ." ~ innerName[1 .. $- 1 ] ~ " \" )" ;
604458 code = " hooked!(pegged.peg.defined!(" ~ code ~ " , \" " ~ propagatedName ~ " ." ~ innerName[1 .. $- 1 ] ~ " \" ), \" " ~ hookedName ~ " \" )" ;
605459
606- import std.algorithm.searching : canFind;
607460 if (withMemo == Memoization.no)
608461 result ~= " static TParseTree " ~ shortName ~ " (TParseTree p)\n "
609462 ~ " {\n "
610463 ~ " if(__ctfe)\n "
611464 ~ " {\n "
612- ~ (stoppers.keys . canFind(shortName) ?
465+ ~ (stoppers.canFind(shortName) ?
613466 " assert(false, \" " ~ shortName ~ " is left-recursive, which is not supported "
614467 ~ " at compile-time. Consider using asModule().\" );\n "
615468 :
@@ -618,7 +471,7 @@ string grammar(Memoization withMemo = Memoization.yes)(string definition)
618471 ~ " }\n "
619472 ~ " else\n "
620473 ~ " {\n "
621- ~ (stoppers.keys . canFind(shortName) ?
474+ ~ (stoppers.canFind(shortName) ?
622475 // This rule needs to prevent infinite left-recursion.
623476 " static TParseTree[size_t /*position*/] seed;\n "
624477 ~ " if (auto s = p.end in seed)\n "
@@ -663,7 +516,7 @@ string grammar(Memoization withMemo = Memoization.yes)(string definition)
663516 ~ " {\n "
664517 ~ " if(__ctfe)\n "
665518 ~ " {\n "
666- ~ (stoppers.keys . canFind(shortName) ?
519+ ~ (stoppers.canFind(shortName) ?
667520 " assert(false, \" " ~ shortName ~ " is left-recursive, which is not supported "
668521 ~ " at compile-time. Consider using asModule().\" );\n "
669522 :
@@ -672,17 +525,17 @@ string grammar(Memoization withMemo = Memoization.yes)(string definition)
672525 ~ " }\n "
673526 ~ " else\n "
674527 ~ " {\n "
675- ~ (stoppers.keys . canFind(shortName) ?
528+ ~ (stoppers.canFind(shortName) ?
676529 // This rule needs to prevent infinite left-recursion.
677530 " static TParseTree[size_t /*position*/] seed;\n "
678531 ~ " if (auto s = p.end in seed)\n "
679532 ~ " return *s;\n "
680- ~ " if (" ~ shouldMemoLeftRecursion(shortName) ~ " )\n "
533+ ~ " if (!blockMemoAtPos.canFind(p.end) )\n "
681534 ~ " if (auto m = tuple(" ~ innerName ~ " , p.end) in memo)\n "
682535 ~ " return *m;\n "
683536 ~ " auto current = fail(p);\n "
684537 ~ " seed[p.end] = current;\n "
685- ~ blockMemoForLeftRecursion(shortName)
538+ ~ " blockMemoAtPos ~= p.end; \n "
686539 ~ " while (true)\n "
687540 ~ " {\n "
688541 ~ " auto result = " ~ code ~ " (p);\n "
@@ -704,14 +557,20 @@ string grammar(Memoization withMemo = Memoization.yes)(string definition)
704557 // care of by memo. Note that p.end remains constant for the course of recursion,
705558 // and the length of seed only grows when nested recursion occurs.
706559 ~ " seed.remove(p.end);\n "
707- ~ unblockMemoForLeftRecursion(shortName)
560+ // TODO investigate if p.end is always the last element of blockMemoAtPos.
561+ ~ " assert(blockMemoAtPos.canFind(p.end));\n "
562+ ~ " blockMemoAtPos = blockMemoAtPos.remove(countUntil(blockMemoAtPos, p.end));\n "
708563 ~ " memo[tuple(" ~ innerName ~ " , p.end)] = current;\n "
709564 ~ " return current;\n "
710565 ~ " }\n "
711566 ~ " }\n "
712567 :
713568 // Possibly left-recursive rule, but infinite recursion is already prevented by another rule in the same cycle.
714- maybeBlockedMemo(shortName, code)
569+ (allLeftRecursiveRules.canFind(shortName) ?
570+ " if (blockMemoAtPos.canFind(p.end))\n "
571+ ~ " return " ~ code ~ " (p);\n "
572+ : " "
573+ )
715574 ~ " if (auto m = tuple(" ~ innerName ~ " , p.end) in memo)\n "
716575 ~ " return *m;\n "
717576 ~ " else\n "
@@ -3016,6 +2875,16 @@ unittest // Proper blocking of memoization
30162875// Example from http://www.inf.puc-rio.br/~roberto/docs/sblp2012.pdf
30172876unittest // Mutual left-recursion
30182877{
2878+ /* A thing about stoppers:
2879+ Because P is at the intersection of left-recursive cycles P -> P and L -> P -> L, it should
2880+ suffice to make only P a stopper to stop unbounded left-recursion. And so it does. But,
2881+ stoppers parse greedily: they always consume the maximum of input. So below, if only P is a stopper,
2882+ at some point P parses the complete input. Then L fails because it cannot append ".x", then M fails.
2883+ If both are made a stopper then M succeeds. That is because P will try L when P '(n)' no longer
2884+ consumes input, which will appear as a left-recursion to L if it is a stopper and because of that
2885+ it will have a chance to succeed on the full input which it recorded in its seed for the previous
2886+ recursion.
2887+ */
30192888 enum LeftGrammar = `
30202889 Left:
30212890 M <- L eoi
0 commit comments