I have a program that generates 10000 integers from a set of 1000000 (may contain duplicates). The file generator function is below :
void generateNumberFile() {
const char* filename = "/tmp/integerfile.csv";
FILE *numberFile = fopen(filename, "w");
int number_idx;
int random_integer;
struct timeval t1;
for (number_idx = 0; number_idx < NUMBER_OF_ELEMENTS - 1; number_idx++) {
gettimeofday(&t1, NULL);
srand(t1.tv_usec * t1.tv_sec);
random_integer = ((float)rand()/(float)RAND_MAX) * NUMBER_OF_INTEGERS;
if (number_idx == 0)
printf("random number at idx %d : %d\n", number_idx, random_integer);
fprintf(numberFile, "%d,", random_integer);
}
random_integer = ((float)rand()/(float)RAND_MAX) * NUMBER_OF_INTEGERS;
fprintf(numberFile, "%d", random_integer);
printf("last number generated : %d\n", random_integer);
fclose(numberFile);
}
I then implemented a sort on a bit array while reading the file with the numbers (below) :
double space_optimized_sort(int *bit_array) {
clock_t begin, end;
double time_spent;
begin = clock();
char *filename = "/tmp/integerfile.csv";
FILE *numberFile = fopen(filename, "r");
char* number = malloc(10*sizeof(char));
char* digit = malloc(2*sizeof(char));
int int_number;
while ((digit = fgets(digit, 2, numberFile)) != NULL) {
digit[1] = '\0';
if (strcmp(digit, ",") == 0) {
sscanf(number, "%d", &int_number);
bit_array[int_number >> sizeof(int)] &= (1 << (int_number & MASK));
*number = '\0';
} else {
strcat(number, digit);
}
}
fclose(numberFile);
end = clock();
time_spent = (double)(end - begin) / CLOCKS_PER_SEC;
return time_spent;
}
I then implemented another function that reads all the numbers into memory and then sorts them using qsort.
int integer_cmp(const void *keyval, const void *datum) {
int i1 = *(int*)keyval;
int i2 = *(int*)datum;
return i1 - i2;
}
double sort_system_function() {
clock_t begin, end;
double time_spent;
begin = clock();
char *filename = "/tmp/integerfile.csv";
FILE *numberFile = fopen(filename, "r");
char* number = malloc(10*sizeof(char));
char* digit = malloc(2*sizeof(char));
int int_number;
int number_array[NUMBER_OF_ELEMENTS];
int element_counter = 0;
while ((digit = fgets(digit, 2, numberFile)) != NULL) {
digit[1] = '\0';
if (strcmp(digit, ",") == 0) {
sscanf(number, "%d", &int_number);
number_array[element_counter] = int_number;
element_counter++;
*number = '\0';
} else {
strcat(number, digit);
}
}
fclose(numberFile);
// sort using the std lib
qsort(number_array, NUMBER_OF_ELEMENTS, sizeof(int), &integer_cmp);
end = clock();
time_spent = (double)(end - begin) / CLOCKS_PER_SEC;
return time_spent;
}
Heres the main function :
int main(int argc, char** argv) {
int number_of_bytes = NUMBER_OF_INTEGERS / INT_SIZE;
int iteration_counter;
double total_time_spent = 0;
double average_time_spent = 0;
for (iteration_counter = 0; iteration_counter < NUMBER_ITERATIONS; iteration_counter++) {
generateNumberFile();
total_time_spent += sort_system_function();
}
average_time_spent = total_time_spent / NUMBER_ITERATIONS;
printf("system sort average time spent : %f\n", average_time_spent);
int *bit_array = malloc(number_of_bytes*sizeof(int));
total_time_spent = 0.0;
for (iteration_counter = 0; iteration_counter < NUMBER_ITERATIONS; iteration_counter++) {
generateNumberFile();
total_time_spent += space_optimized_sort(bit_array);
}
average_time_spent = total_time_spent / NUMBER_ITERATIONS;
printf("space constrained sort average time spent : %f\n", average_time_spent);
return 0;
}
Runnning the program without profiling gives the following output (on a mac) :
system sort average time spent : 0.016194
space constrained sort average time spent : 0.014640
Since I don't have gprof on my mac I ran the same program in a Ubuntu VM running on VMWare Fusion. The values I got are an order of magnitude different from what I got on the native OS (running without profiling information compiled into the executable):
system sort average time spent : 0.007830
space constrained sort average time spent : 0.006710
I then compiled the code with the -pg (profiling) flag and ran it on The VM. Then ran gprof on the output. The results are below :
system sort average time spent : 0.008800
space constrained sort average time spent : 0.005600
The outputs from gprof are in the images below (could not copy paste) :
Two questions :
Why is the order of magnitude different in the native OS run and the VM run (being the latter smaller) ?
In the gprof output (first image) does the time spent in the function 'integer_cmp' is included in the function 'sort_system_function' ? (which is where the sort function that uses it called)