Last active
April 14, 2021 16:24
-
-
Save saolof/7b5d26a41e6a34ff2b3e76d3fc5541da to your computer and use it in GitHub Desktop.
Various modern implementations of Tarjans algorithm, for a PR to LightGraphs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using LightGraphs | |
using SimpleTraits | |
using BenchmarkTools | |
using GraphPlot | |
@traitfn function strongly_connected_components_2(g::AG::IsDirected) where {T <: Integer, AG <: AbstractGraph{T}} | |
if iszero(nv(g)) return Vector{Vector{T}}() end | |
strongly_connected_components_tarjan(g,infer_nb_iterstate_type(g)) | |
end | |
# In recursive form, Tarjans algorithm has a recursive call inside a for loop. | |
# To save the loop state of each recursive step in a stack efficiently, | |
# we need to infer the type of its state (which should almost always be an int). | |
infer_nb_iterstate_type(g::LightGraphs.SimpleGraphs.AbstractSimpleGraph) = Int | |
is_unvisited(data::AbstractVector,v::Integer) = iszero(data[v]) | |
function infer_nb_iterstate_type(g::AbstractGraph{T}) where {T} | |
destructure_type(x) = Any | |
destructure_type(x::Type{Union{Nothing,Tuple{A,B}}}) where {A,B} = B | |
# If no specific dispatch is given, we peek at the first vertex and use Base.Iterator magic to try infering the type. | |
destructure_type(Base.Iterators.approx_iter_type(typeof(outneighbors(g,one(T))))) | |
end | |
# Vertex size threshold below which it isn't worth keeping the DFS iteration state. | |
is_large_vertex(g,v) = length(outneighbors(g,v)) >= 1024 | |
# The key idea behind any variation on Tarjan's algorithm is to use DFS and pop off found components. | |
# Whenever we are forced to backtrack, we are in a bottom cycle of the remaining graph, | |
# which we accumulate in a stack while backtracking, until we reach a local root. | |
# A local root is a vertex from which we cannot reach any node that was visited earlier by DFS. | |
# As such, when we have backtracked to it, we may pop off the contents the stack as a strongly connected component. | |
function strongly_connected_components_tarjan(g::AG, nb_iter_statetype::Type{S}) where {T <: Integer, AG <: AbstractGraph{T}, S} | |
nvg = nv(g) | |
one_count = one(T) | |
count = nvg # (Counting downwards) Visitation order for the branch being explored. Backtracks when we pop an scc. | |
component_count = one_count # Index of the current component being discovered. | |
# Invariant 1: count is always smaller than component_count. | |
# Invariant 2: if rindex[v] < component_count, then v is in components[rindex[v]]. | |
# This trivially lets us tell if a vertex belongs to a previously discovered scc without any extra bits, | |
# just inequalities that combine naturally with other checks. | |
is_component_root = Vector{Bool}(undef,nvg) # Fields are set when tracing and read when backtracking, so can be initialized undef. | |
rindex = zeros(T,nvg) | |
components = Vector{Vector{T}}() # maintains a list of scc (order is not guaranteed in API) | |
stack = Vector{T}() # while backtracking, stores vertices which have been discovered and not yet assigned to any component | |
dfs_stack = Vector{T}() | |
largev_iterstate_stack = Vector{Tuple{T,S}}() # For large vertexes we push the iteration state into a stack so we may resume it. | |
# adding this last stack fixes the O(|E|^2) performance bug that could previously be seen in large star graphs. | |
# The Tuples come from Julia's iteration protocol, and the code is structured so that we never push a Nothing into thise last stack. | |
@inbounds for s in vertices(g) | |
if is_unvisited(rindex, s) | |
rindex[s] = count | |
is_component_root[s] = true | |
count -= one_count | |
# start dfs from 's' | |
push!(dfs_stack, s) | |
if is_large_vertex(g, s) | |
push!(largev_iterstate_stack, iterate(outneighbors(g, s))) | |
end | |
@inbounds while !isempty(dfs_stack) | |
v = dfs_stack[end] #end is the most recently added item | |
outn = outneighbors(g, v) | |
v_is_large = is_large_vertex(g, v) | |
next = v_is_large ? pop!(largev_iterstate_stack) : iterate(outn) | |
while next !== nothing | |
(v_neighbor, state) = next | |
if is_unvisited(rindex, v_neighbor) | |
break | |
#GOTO A: push v_neighbor onto DFS stack and continue DFS | |
# Note: This is no longer quadratic for (very large) tournament graphs or star graphs, | |
# as we save the iteration state in largev_iterstate_stack for large vertices. | |
# The loop is tight so not saving the state still benchmarks well unless the vertex orders are large enough to make quadratic growth kick in. | |
elseif (rindex[v_neighbor] > rindex[v]) | |
rindex[v] = rindex[v_neighbor] | |
is_component_root[v] = false | |
end | |
next = iterate(outn, state) | |
end | |
if isnothing(next) # Natural loop end. | |
# All out neighbors already visited or no out neighbors | |
# we have fully explored the DFS tree from v. | |
# time to start popping. | |
popped = pop!(dfs_stack) | |
if is_component_root[popped] # Found an SCC rooted at popped which is a bottom cycle in remaining graph. | |
component = T[popped] | |
count += one_count # We also backtrack the count to reset it to what it would be if the component were never in the graph. | |
while !isempty(stack) && (rindex[popped] >= rindex[stack[end]]) # Keep popping its children from the backtracking stack. | |
newpopped = pop!(stack) | |
rindex[newpopped] = component_count # Bigger than the value of anything unexplored. | |
push!(component, newpopped) # popped has been assigned a component, so we will never see it again. | |
count += one_count | |
end | |
rindex[popped] = component_count | |
component_count += one_count | |
push!(components, component) | |
else # Invariant: the DFS stack can never be empty in this second branch where popped is not a root. | |
if (rindex[popped] > rindex[dfs_stack[end]]) | |
rindex[dfs_stack[end]] = rindex[popped] | |
is_component_root[dfs_stack[end]] = false | |
end | |
# Because we only push to stack when backtracking, it gets filled up less than in Tarjan's original algorithm. | |
push!(stack, popped) # For DAG inputs, the stack variable never gets touched at all. | |
end | |
else #LABEL A | |
# add unvisited neighbor to dfs | |
(u, state) = next | |
push!(dfs_stack, u) | |
if v_is_large | |
push!(largev_iterstate_stack, next) # Because this is the else branch of isnothing(state), we can push this on the stack. | |
end | |
if is_large_vertex(g, u) | |
push!(largev_iterstate_stack, iterate(outneighbors(g, u))) # Because u is large, iterate cannot return nothing, so we can push this on stack. | |
end | |
is_component_root[u] = true | |
rindex[u] = count | |
count -= one_count | |
# next iteration of while loop will expand the DFS tree from u. | |
end | |
end | |
end | |
end | |
#Unlike in the original Tarjans, rindex are potentially also worth returning here. | |
# For any v, v is in components[rindex[v]], s it acts as a lookup table for components. | |
# Scipy's graph library returns only that and lets the user sort by its values. | |
return components # ,rindex | |
end | |
# # Example graph from the wikipedia article. | |
example_graph = SimpleDiGraph(8); | |
add_edge!(example_graph,1,2) | |
add_edge!(example_graph,2,3) | |
add_edge!(example_graph,3,1) | |
add_edge!(example_graph,4,2) | |
add_edge!(example_graph,4,3) | |
add_edge!(example_graph,4,5) | |
add_edge!(example_graph,5,4) | |
add_edge!(example_graph,5,6) | |
add_edge!(example_graph,6,3) | |
add_edge!(example_graph,6,7) | |
add_edge!(example_graph,7,6) | |
add_edge!(example_graph,8,5) | |
add_edge!(example_graph,8,7) | |
add_edge!(example_graph,8,8) | |
# Various large special graphs. | |
println("Constructing large random graphs...") | |
example_graphs_labels = ["path_digraph(10000)", "random_regular_digraph(50000,3)", "random_regular_digraph(50000,8)", "random_regular_digraph(50000,200)"," random_orientation_dag(random_regular_graph(50000,200))", "random_regular_digraph(50000,2000)"," random_tournament_digraph(10000)"," random_orientation_dag(complete_graph(10000))"," star_digraph(100000)"] | |
example_graphs = [ path_digraph(10000), random_regular_digraph(50000,3), random_regular_digraph(50000,8), random_regular_digraph(50000,200), random_orientation_dag(random_regular_graph(50000,200)) , random_regular_digraph(50000,2000), random_tournament_digraph(10000), random_orientation_dag(complete_graph(10000)), star_digraph(100000)] | |
println("Benchmarking:") | |
for f in (strongly_connected_components,strongly_connected_components_2) | |
println(" ------------------------------") | |
println("testing function $(string(f)): ") | |
for i in eachindex(example_graphs) | |
gg = example_graphs[i] | |
println(example_graphs_labels[i]) | |
@assert sort.(f(gg)) == sort.(strongly_connected_components(gg)) "incorrect implementation" | |
@btime ($f($(gg));) | |
end | |
end | |
# for i in eachindex(example_graphs) | |
# println(" ------------------------------") | |
# gg = example_graphs[i] | |
# println(example_graphs_labels[i]) | |
# @btime $f($(gg)) | |
# end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment