My project is to create a graph with 1-2 milions nodes using adjacency list. Input data is from text file. I need to count the connected components and max/min degree of vetices of each components. My code can work fine with small number of nodes, but if the nodes are too big, it can't. It always stop at addEdge fucntion and run out of memory.
Edit: before posting, i have already change the fomat of input file so it match my progaram, but it still crash anyway.

Hope sb can give me some answers !
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <stdbool.h>
struct Node {
struct Node* next;
int vertex;
};
struct Graph {
struct Node** adj_list;
int num_vertices;
};
struct Node* createNode(int v) {
struct Node *newNode = (struct Node*)malloc(sizeof(struct Node));
newNode->next = NULL;
newNode->vertex = v;
return newNode;
}
void addEdge(struct Graph* graph, FILE* file) {
printf("\nSTART create EDGE");
int src,dest;
struct Node* check,* newNode;
while (fscanf(file, "%d\t%d", &src, &dest) != EOF) {
newNode=createNode(dest);
if (graph->adj_list[src] !=NULL) {
check = graph->adj_list[src];
while (check->next) {
check = check->next;
}
check->next= newNode;
printf("\nENDING 2 create EDGE");
}
else {
graph->adj_list[src] = newNode;
printf("\nENDING 1 create EDGE");
}
}
}
struct Graph* createGraph(int num_vertices) {
struct Graph* graph = (struct Graph*)malloc(sizeof(struct Graph));
graph->adj_list = (struct Node**)malloc(num_vertices * sizeof(struct Node*));
graph->num_vertices = num_vertices;
int i;
for (i = 0; i < num_vertices; ++i) {
graph->adj_list[i] = NULL;
}
return graph;
}
void printGraph(struct Graph* graph)
{
int v;
for (v = 0; v < graph->num_vertices; v++) {
if(graph->adj_list[v]!=NULL){
struct Node* pCrawl = graph->adj_list[v];
printf("\n Adjacency list of vertex %d\n head ", v);
while (pCrawl->next !=NULL) {
printf("-> %d", pCrawl->vertex);
pCrawl = pCrawl->next;
}
printf("-> %d", pCrawl->vertex);
printf("\n");
}
}
}
void BFS(struct Graph* graph, int start_vertex, bool* visited, int* num_vertices, int* num_edges, int* max_degree, int* min_degree) {
int* queue = (int*)malloc(graph->num_vertices * sizeof(int));
struct Node* temp=NULL;
int front = 0, rear = 0;
int current_vertex;
visited[start_vertex] = true;
queue[rear++] = start_vertex;
while (front < rear) {
int degree=0;
current_vertex = queue[front++];
++(*num_vertices);
temp = graph->adj_list[current_vertex];
while (temp) {
int adj_vertex = temp->vertex;
++(*num_edges);
if (!visited[adj_vertex]) {
visited[adj_vertex] = true;
queue[rear++] = adj_vertex;
}
++degree;
temp = temp->next;
}
*max_degree = (*max_degree <= degree) ? degree : *max_degree;
*min_degree = (*min_degree >= degree) ? degree : *min_degree;
}
free(queue);
}
void findConnectedComponents(struct Graph* graph) {
bool* visited = (bool*)calloc(graph->num_vertices, sizeof(bool));
for (int i = 0; i < graph->num_vertices; i++) {
visited[i] = false;
}
int num_components = 0;
int v;
for (v = 0; v < graph->num_vertices; v++) {
if (graph->adj_list[v]!=NULL &&!visited[v]) {
num_components++;
int num_vertices = 0;
int num_edges = 0;
int component_max_degree = 0;
int component_min_degree = graph->num_vertices;
BFS(graph, v, visited, &num_vertices, &num_edges, &component_max_degree, &component_min_degree);
printf("Connected Component %d:\n", num_components);
printf("Number of vertices: %d\n", num_vertices);
printf("Number of edges: %d\n", num_edges);
printf("Max degree: %d\n", component_max_degree);
printf("Min degree: %d\n", component_min_degree);
}
}
printf("Total number of connected components: %d\n",num_components);
free(visited);
}
int main() {
char filename[] = "roadNet-CA.txt";
FILE* file = fopen(filename, "r+");
if (file == NULL) {
printf("Failed to open the file.\n");
return -1;
}
int num_vertices;
fscanf(file, "%d", &num_vertices);
struct Graph* graph = createGraph(num_vertices);
addEdge(graph, file);
fclose(file);
printGraph(graph);
printf("Breadth-First Search (BFS): ");
findConnectedComponents(graph);
printf("\n");
return 0;
}
Here are my input file https://snap.stanford.edu/data/roadNet-CA.html
The problem is not that your program runs out of memory. The problem is that the input file format does not match what your program is expecting. But the program doesn't care and tries to read it in anyways, and so it segfaults.
I could give a whole speech about why it's important to sanitize your inputs to prevent nasty bugs like this but you're probably just looking for a quick fix so here you go.
Replace the first 4 lines in your input file with a conservative estimate of the number of nodes
Before:
After: