I want to create a graph with 2-3 million vertices using an adjacency list. The input is created randomly. When I ran a version that only prints out the increasing numbers of edges, it worked perfectly (returned 0). But when I add the BFS and DFS, it only prints out about 80% of the numbers and then returns 123456789.
Here is my code:
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
struct Node {
int vertex;
struct Node* next;
};
struct Graph {
int num_vertices;
struct Node** adj_list;
};
struct Node* createNode(int v) {
struct Node* newNode = (struct Node*)malloc(sizeof(struct Node));
newNode->vertex = v;
newNode->next = NULL;
return newNode;`your text`
}
struct Graph* createGraph(int num_vertices) {
struct Graph* graph = (struct Graph*)malloc(sizeof(struct Graph));
graph->num_vertices = num_vertices;
graph->adj_list = (struct Node**)malloc(num_vertices * sizeof(struct Node*));
int i;
for (i = 0; i < num_vertices; i++) {
graph->adj_list[i] = NULL;
}
return graph;
}
void addEdge(struct Graph* graph, int src, int dest) {
struct Node* newNode = createNode(dest);
newNode->next = graph->adj_list[src];
graph->adj_list[src] = newNode;
newNode = createNode(src);
newNode->next = graph->adj_list[dest];
graph->adj_list[dest] = newNode;
}
void DFSUtil(struct Graph* graph, int v, int* visited) {
visited[v] = 1;
printf("%d ", v);
struct Node* temp = graph->adj_list[v];
while (temp) {
int adj_vertex = temp->vertex;
if (!visited[adj_vertex]) {
DFSUtil(graph, adj_vertex, visited);
}
temp = temp->next;
}
}
void DFS(struct Graph* graph, int start_vertex) {
int* visited = (int*)calloc(graph->num_vertices, sizeof(int));
DFSUtil(graph, start_vertex, visited);
free(visited);
}
void BFS(struct Graph* graph, int start_vertex) {
int* visited = (int*)calloc(graph->num_vertices, sizeof(int));
int* queue = (int*)malloc(graph->num_vertices * sizeof(int));
int front = 0, rear = 0;
visited[start_vertex] = 1;
queue[rear++] = start_vertex;
while (front < rear) {
int current_vertex = queue[front++];
printf("%d ", current_vertex);
struct Node* temp = graph->adj_list[current_vertex];
while (temp) {
int adj_vertex = temp->vertex;
if (!visited[adj_vertex]) {
visited[adj_vertex] = 1;
queue[rear++] = adj_vertex;
}
temp = temp->next;
}
}
free(visited);
free(queue);
}
int main() {
int num_vertices = 50000000;
int num_edges = 10000000;
struct Graph* graph = createGraph(num_vertices);
srand(time(NULL));
int i;
for (i = 0; i < num_edges; i++) {
int src = rand() % num_vertices;
int dest = rand() % num_vertices;
addEdge(graph, src, dest);
//print the number of edge
printf("\ncount: %d",i);
}
//BFS and DFS code
int start_vertex = 0;
printf("Depth-First Search (DFS): ");
DFS(graph, start_vertex);
printf("\n");
printf("Breadth-First Search (BFS): ");
BFS(graph, start_vertex);
printf("\n");
return 0;
}
if I change the values of num_vertices and num_edges to smaller ones:
int num_vertices = 1000000;
int num_edges = 2000000;
the code runs to completion with return=0, but
if I change the values of num_vertices and num_edges to bigger ones:
int num_vertices = 10000000;
int num_edges = 20000000;
I think maybe the numbers are too big, but I don't know why or how to solve it.
There are at least two possibilities:
You are exhausting available memory (that your C implementation is willing to let you allocate). As a result, you are performing null-pointer dereferences, and undefined behavior ensues. (Probably the program crashes.)
You are recursing deeply enough to overflow the stack. Just how much stack is available for you to use is system- and context-dependant, but you are already pushing your luck if you rely on a thousand levels. On some systems, the limit is much less; on others, more. (In any case, probably the program crashes.)
If the system can't or won't give you enough memory then you need a beefier system, but you should detect that by checking whether each memory allocation succeeds, and failing gracefully, with an informative diagnostic, if any of them don't.
If you have a stack overflow then you may be able to overcome that by switching from your recursive version of DFS to an iterative version. Ultimately, though, you are still limited by how much memory the system will let you use, so if you keep increasing the problem size then you will still eventually reach a point where you need a system with more resources.