@@ -199,6 +199,7 @@ def done(self, *nodes):
199199 self ._ready_nodes .append (successor )
200200 self ._nfinished += 1
201201
202+ # See note "On Finding Cycles" at the bottom.
202203 def _find_cycle (self ):
203204 n2i = self ._node2info
204205 stack = []
@@ -212,8 +213,6 @@ def _find_cycle(self):
212213
213214 while True :
214215 if node in seen :
215- # If we have seen already the node and is in the
216- # current stack we have found a cycle.
217216 if node in node2stacki :
218217 return stack [node2stacki [node ] :] + [node ]
219218 # else go on to get next successor
@@ -228,11 +227,15 @@ def _find_cycle(self):
228227 while stack :
229228 try :
230229 node = itstack [- 1 ]()
231- break
230+ break # resume at top ot "while Tree:"
232231 except StopIteration :
232+ # no more successors; pop the stack
233+ # and continue looking up
233234 del node2stacki [stack .pop ()]
234235 itstack .pop ()
235236 else :
237+ # stack is empty; look for a fresh node to
238+ # start over from (a node not yet in seen)
236239 break
237240 return None
238241
@@ -252,3 +255,53 @@ def static_order(self):
252255 self .done (* node_group )
253256
254257 __class_getitem__ = classmethod (GenericAlias )
258+
259+ # On Finding Cycles
260+ # -----------------
261+ # There is a (at least one) total order if and only if the graph is
262+ # acyclic.
263+ #
264+ # When it is cyclic, "there's a cycle - somewhere!" isn't very helpful.
265+ # In theory, it would be most helpful to partition the graph into
266+ # strongly connected components (SCCs) and display those with more than
267+ # one node. Then all cycles could easily be identified "by eyeball".
268+ #
269+ # That's a lot of work, though, and we can get most of the benefit much
270+ # more easily just by showing a single specific cycle.
271+ #
272+ # Finding a cycle is most natural via a breadth first search, which can
273+ # easily be arranged to find a shortest-possible cycle. But memory
274+ # burden can be high, because every path-in-progress has to keep its own
275+ # idea of what "the path" is so far.
276+ #
277+ # Depth first search (DFS) is much easier on RAM, only requiring keeping
278+ # track of _the_ path from the starting node to the current node at the
279+ # current recursion level. But there may be any number of nodes, and so
280+ # there's no bound on recursion depth short of the total number of
281+ # nodes.
282+ #
283+ # So we use an iterative version of DFS, keeping an exploit list
284+ # (`stack`) of the path so far. A parallel stack (`itstack`) holds the
285+ # `__next__` method of an iterator over the current level's node's
286+ # successors, so when backtracking to a shallower level we can just call
287+ # that to get the node's next successor. This is state that a recursive
288+ # version would implicitly store in a `for` loop's internals.
289+ #
290+ # `seen()` is a set recording which nodes have already been, at some
291+ # time, pushed on the stack. If a node has been pushed on the stack, DFS
292+ # will find any cycle it's part of, so there's no need to ever look at
293+ # it again.
294+ #
295+ # Finally, `node2stacki` maps a node to its index on the current stack,
296+ # for and only for nodes currently _on_ the stack. If a successor to be
297+ # pushed on the stack is in that dict, the node is already on the path,
298+ # at that index. The cycle is then `stack[that_index :] + [node]`.
299+ #
300+ # As is often the case when removing recursion, the control flow looks a
301+ # bit off. The "while True:" loop here rarely actually loops - it's only
302+ # looking to go "up the stack" until finding a level that has another
303+ # successor to consider, emulating a chain of returns in a recursive
304+ # version.
305+ #
306+ # Worst case time is linear in the number of nodes plus the number of
307+ # edges. Worst case memory burden is linear in the number of nodes.
0 commit comments