why do we create a transpose of the graph and then run dfs on the transpose in the second pass.I've tried reading proof of correctness http://www.jeffreykarres.com/blog/kosaraju/ online but couldn't understand is there some alternative approach to do this which is easy to understand
here is my implementation of the algorithm it takes number of vetices and edges as inputs and then takes directed edges as inputs(vertices are numbered 0 to n-1)
#include <bits/stdc++.h>
using namespace std;
void dfsForward(int src , vector<vector<int>> g , vector<bool> &vis, stack<int> &s ){
vis[src]=true;
for(int i=0;i<g[src].size();i++){
if(!vis[g[src][i]])
dfsForward(g[src][i],g,vis,s);
}
s.push(src);
}
void dfsRev(int src , vector<vector<int>> t , vector<bool> &vis, vector<int> &comp,int count ){
vis[src]=true;
for(int i=0;i<t[src].size();i++){
if(!vis[t[src][i]]){
comp.push_back(t[src][i]);
dfsRev(t[src][i],t,vis,comp,count);
}
}
}
vector<vector<int>> kosaraju(vector<vector<int>> g,vector<vector<int>> t, int n){
vector<bool> vis(n,false);
vector<bool> visRev(n,false);
vector<vector<int>> scc;
int count=0;
stack<int> s;
for(int i=0;i<n;i++){
if(!vis[i])
dfsForward(i,g,vis,s);
}
while(!s.empty()){
int temp =s.top();
s.pop();
if(!visRev[temp]){
count++;
vector<int> comp;
comp.push_back(temp);
dfsRev(temp,t,visRev,comp,count);
scc.push_back(comp);
}
}
return scc;
}
int main() {
int n,e,u,v;
cin>>n>>e;
vector<vector<int>> g(n);
vector<vector<int>> t(n);
for(int i=0;i<e;i++){
cin>>u>>v;
g[u].push_back(v);
t[v].push_back(u);
}
cout<<"components are "<<endl;
vector<vector<int>> scc = kosaraju(g,t,n);
for(int i=0;i<scc.size();i++){
vector<int> arr = scc[i];
for(int j=0;j<arr.size();j++){
cout<<arr[j]<<" ";
}
cout<<endl;
}
return 0;
}
Kosaraju algorithm works in three simple steps:
As you know, SCCs in Graph G and reverse graph G' will always be the same. Consider each SCC a separate node N, which defines the "graphs of SCCs" S:=G and S':= G', then both S and S' will be DAGs (directed acyclic graphs).
When we ran DFS on G' terminating at v, we have effectively also run DFS on S' terminating at N, and we must have v being a member of N. But (*) states that N is a sink vertex. So if you re-run DFS on G from v, it will only iterate through N. Hence this way iteratively you will uncover all the SCCs in the graph.