qsort performance on mac os x

197 Views Asked by At

I have a program that generates 10000 integers from a set of 1000000 (may contain duplicates). The file generator function is below :

void generateNumberFile() {

    const char* filename = "/tmp/integerfile.csv";
    FILE *numberFile = fopen(filename, "w");

    int number_idx;
    int random_integer;
    struct timeval t1;
    for (number_idx = 0; number_idx < NUMBER_OF_ELEMENTS - 1; number_idx++) {

        gettimeofday(&t1, NULL);
        srand(t1.tv_usec * t1.tv_sec);
        random_integer = ((float)rand()/(float)RAND_MAX) * NUMBER_OF_INTEGERS;
        if (number_idx == 0)
            printf("random number at idx %d : %d\n", number_idx, random_integer);
        fprintf(numberFile, "%d,", random_integer);
    }
    random_integer = ((float)rand()/(float)RAND_MAX) * NUMBER_OF_INTEGERS;
    fprintf(numberFile, "%d", random_integer);

    printf("last number generated : %d\n", random_integer);
    fclose(numberFile);
}

I then implemented a sort on a bit array while reading the file with the numbers (below) :

double space_optimized_sort(int *bit_array) {

    clock_t begin, end;
    double time_spent;

    begin = clock();

    char *filename = "/tmp/integerfile.csv";
    FILE *numberFile = fopen(filename, "r");

    char* number = malloc(10*sizeof(char));
    char* digit = malloc(2*sizeof(char));
    int int_number;
    while ((digit = fgets(digit, 2, numberFile)) != NULL) {

        digit[1] = '\0';
        if (strcmp(digit, ",") == 0) {

            sscanf(number, "%d", &int_number);
            bit_array[int_number >> sizeof(int)] &= (1 << (int_number & MASK));

            *number = '\0';
        } else {
            strcat(number, digit);
        }
    }
    fclose(numberFile);

    end = clock();
    time_spent = (double)(end - begin) / CLOCKS_PER_SEC;

    return time_spent;
}

I then implemented another function that reads all the numbers into memory and then sorts them using qsort.

int integer_cmp(const void *keyval, const void *datum) {

    int i1 = *(int*)keyval;
    int i2 = *(int*)datum;

    return i1 - i2;
}

double sort_system_function() {

    clock_t begin, end;
    double time_spent;

    begin = clock();

    char *filename = "/tmp/integerfile.csv";
    FILE *numberFile = fopen(filename, "r");

    char* number = malloc(10*sizeof(char));
    char* digit = malloc(2*sizeof(char));
    int int_number;
    int number_array[NUMBER_OF_ELEMENTS];
    int element_counter = 0;
    while ((digit = fgets(digit, 2, numberFile)) != NULL) {

        digit[1] = '\0';

        if (strcmp(digit, ",") == 0) {

            sscanf(number, "%d", &int_number);
            number_array[element_counter] = int_number;
            element_counter++;
            *number = '\0';
        } else {
            strcat(number, digit);
        }
    }
    fclose(numberFile);

    // sort using the std lib
    qsort(number_array, NUMBER_OF_ELEMENTS, sizeof(int), &integer_cmp);

    end = clock();
    time_spent = (double)(end - begin) / CLOCKS_PER_SEC;

    return time_spent;
}

Heres the main function :

int main(int argc, char** argv) {

    int number_of_bytes = NUMBER_OF_INTEGERS / INT_SIZE;
    int iteration_counter;
    double total_time_spent = 0;
    double average_time_spent = 0;
    for (iteration_counter = 0; iteration_counter < NUMBER_ITERATIONS; iteration_counter++) {

        generateNumberFile();
        total_time_spent += sort_system_function();
    }
    average_time_spent = total_time_spent / NUMBER_ITERATIONS;
    printf("system sort average time spent : %f\n", average_time_spent);

    int *bit_array = malloc(number_of_bytes*sizeof(int));
    total_time_spent = 0.0;
    for (iteration_counter = 0; iteration_counter < NUMBER_ITERATIONS; iteration_counter++) {

        generateNumberFile();
        total_time_spent += space_optimized_sort(bit_array);
    }
    average_time_spent = total_time_spent / NUMBER_ITERATIONS;
    printf("space constrained sort average time spent : %f\n", average_time_spent);

    return 0;
}

Runnning the program without profiling gives the following output (on a mac) :

system sort average time spent : 0.016194
space constrained sort average time spent : 0.014640

Since I don't have gprof on my mac I ran the same program in a Ubuntu VM running on VMWare Fusion. The values I got are an order of magnitude different from what I got on the native OS (running without profiling information compiled into the executable):

system sort average time spent : 0.007830
space constrained sort average time spent : 0.006710

I then compiled the code with the -pg (profiling) flag and ran it on The VM. Then ran gprof on the output. The results are below :

system sort average time spent : 0.008800
space constrained sort average time spent : 0.005600

The outputs from gprof are in the images below (could not copy paste) :

gprof_p

gprof_q

Two questions :

Why is the order of magnitude different in the native OS run and the VM run (being the latter smaller) ?

In the gprof output (first image) does the time spent in the function 'integer_cmp' is included in the function 'sort_system_function' ? (which is where the sort function that uses it called)

0

There are 0 best solutions below