3 way quicksort (C implementation)

2.4k Views Asked by At

I try to implement some of the algorithms pure generic using C. I stick with the 3-way quicksort but somehow the implementation does not give correct output. The output nearly sorted but some keys aren't where it should be. The code is below. Thanks in advance.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>

static void swap(void *x, void *y, size_t size) {
    void *tmp = malloc(size);

    memcpy(tmp, x, size);
    memcpy(x, y, size);
    memcpy(y, tmp, size);

    free(tmp);
}

static int cmpDouble(const void *i, const void *j) {
    if (*(double *)i < *(double *)j)
        return 1;
    else if (*(double *)i == *(double *)j)
        return 0;
    else 
        return -1;
}

void qsort3way(void *base, int lo, int hi, size_t size,
               int (*cmp)(const void *, const void *)) {
    if (hi <= lo)
        return;
    else {
        char *ptr = (char*)base;
        char *v = ptr + lo * size;

        int lt = lo, gt = hi;
        int i = lo;
        while (i <= gt) {
            int c = cmp(v, ptr + i * size);
            if (c < 0)
                swap(ptr + (lt++) * size, ptr + (i++) * size, size);
            else if (c > 0)
                swap(ptr + i * size, ptr + (gt--) * size, size);    
            else 
                i++;
        }

        qsort3way(base, lo, lt - 1, size, cmp);
        qsort3way(base, gt + 1, hi, size, cmp);
    }     
}

int main(void) {
    int i;
    double *d = (double*)malloc(sizeof(double) * 100);

    for (i = 0; i < 100; i++)
        d[i] = (double)rand();

    qsort3way(d, 0, 100 -1, sizeof(double), cmpDouble);

    for (i = 0; i < 100; i++)
        printf("%.10lf\n", d[i]);

    free(d);
    return 0;
}

sample output:

   41.0000000000
   153.0000000000
   288.0000000000
   2082.0000000000
   292.0000000000
   1869.0000000000
   491.0000000000
   778.0000000000
   1842.0000000000
   6334.0000000000
   2995.0000000000
   8723.0000000000
   3035.0000000000
   3548.0000000000
   4827.0000000000
   3902.0000000000
   4664.0000000000
   5436.0000000000
   4966.0000000000
   5537.0000000000
   5447.0000000000
   7376.0000000000
   5705.0000000000
   6729.0000000000
   6868.0000000000
   7711.0000000000
   9961.0000000000
   8942.0000000000
   9894.0000000000
   9040.0000000000
   9741.0000000000
3

There are 3 best solutions below

0
On BEST ANSWER

Your implementation is incorrect because the pivot may move during the partitioning phase and you use a pointer for the comparison which no longer points to it. Implementations in other languages use the value of the pivot instead of its address.

Note also these shortcomings:

  • recursing both ways may cause stack overflow on pathological distributions. In you case, an array that is already sorted is a pathological distribution.
  • the comparison function should return the opposite values: -1 if a < b, +1 is a > b and 0 if a == b.
  • the API is non-standard and confusing: you should pass the number of elements instead of a range with included bounds.

Here is a corrected and commented version:

#include <stdio.h>
#include <stdlib.h>

static void swap(unsigned char *x, unsigned char *y, size_t size) {
    /* sub-optimal, but better than malloc */
    while (size-- > 0) {
        unsigned char c = *x;
        *x++ = *y;
        *y++ = c;
    }
}

void qsort3way(void *base, int n, size_t size,
               int (*cmp)(const void *, const void *))
{
    unsigned char *ptr = (unsigned char *)base;

    while (n > 1) {
        /* use first element as pivot, pointed to by lt */
        int i = 1, lt = 0, gt = n;
        while (i < gt) {
            int c = cmp(ptr + lt * size, ptr + i * size);
            if (c > 0) {
                /* move smaller element before the pivot range */
                swap(ptr + lt * size, ptr + i * size, size);
                lt++;
                i++;
            } else if (c < 0) {
                /* move larger element to the end */
                gt--;
                swap(ptr + i * size, ptr + gt * size, size);
                /* test with that element again */
            } else {
                /* leave identical element alone */
                i++;
            }
        }
        /* array has 3 parts:
         * from 0 to lt excluded: elements smaller than pivot
         * from lt to gt excluded: elements identical to pivot
         * from gt to n excluded: elements greater than pivot
         */
        /* recurse on smaller part, loop on larger to minimize
           stack use for pathological distributions */
        if (lt < n - gt) {
            qsort3way(ptr, lt, size, cmp);
            ptr += gt * size;
            n -= gt;
        } else {
            qsort3way(ptr + gt * size, n - gt, size, cmp);
            n = lt;
        }
    }
}    

static int cmp_double(const void *i, const void *j) {
    /* this comparison function does not handle NaNs */
    if (*(const double *)i < *(const double *)j)
        return -1;
    if (*(const double *)i > *(const double *)j)
        return +1;
    else
        return 0;
}

int main(void) {
    double d[100];
    int i;

    for (i = 0; i < 100; i++)
        d[i] = rand() / ((double)RAND_MAX + 1);

    qsort3way(d, 100, sizeof(*d), cmp_double);

    for (i = 0; i < 100; i++)
        printf("%.10lf\n", d[i]);

    return 0;
}
1
On

The implementation does not give the correct result because it is wrong. Pretty badly wrong, in fact, given that it's supposed to be a three-way quicksort and not a regular one.

One basic problem is that you've omitted the bit where you move the pivot(s) into their proper position after the main partitioning loop. For standard quicksort, that requires one extra swap or assignment after the loop, depending on implementation details. For a three-way quicksort that involves one or two extra loops to move the potentially-many values equal to the pivot into their positions.

A more insidious problem is the one @Stargateur first pointed out: you track the pivot element by pointer, not by value, and you (sometimes) swap the original value out from that position in the course of the partitioning loop.

Furthermore, your main partitioning loop is wrong for a three-way quicksort, too. When you encounter an element equal to the pivot you just leave it in place, but you need instead to move it to one end or the other (or to some kind of auxiliary storage, if you're willing to incur that memory cost) so that you can perform that move to the middle at the end. In a sense, the previous problem is a special case of this one -- you're not reserving space for or tracking the pivot values. Fixing this will solve the previous problem as well.

I'm not sure what reference you used to prepare your implementation, or whether you built it from scratch, but Geeks for Geeks has a C++ (but pretty much also C) implementation for int arrays that you might want to check out.

6
On

After reading the book link that you provide to @JohnBollinger. I understand how your algorithm work. Your problem is that your pivot move, but you don't change the value of v. Your pivot is at the index lt

char *ptr = base;

int lt = lo, gt = hi; // lt is the pivot
int i = lo + 1; // we don't compare pivot with itself
while (i <= gt) {
  int c = cmp(ptr + lt * size, ptr + i * size);
  if (c < 0) {
    swap(ptr + lt++ * size, ptr + i++ * size, size);
  }
  else if (c > 0)
    swap(ptr + i * size, ptr + gt-- * size, size);
  else
    i++;
}
qsort3way(base, lo, lt - 1, size, cmp);
qsort3way(base, gt + 1, hi, size, cmp);

I propose you a "proper" solution:

#include <stdio.h>
#include <stdlib.h>
#include <time.h>

typedef void qsort3way_swap(void *a, void *b);
typedef int qsort3way_cmp(void const *a, void const *b);

static void qsort3way_aux(char *array_begin, char *array_end, size_t size,
                          qsort3way_cmp *cmp, qsort3way_swap *swap) {
  if (array_begin < array_end) {
    char *i = array_begin + size;
    char *lower = array_begin;
    char *greater = array_end;
    while (i < greater) {
      int ret = cmp(lower, i);
      if (ret < 0) {
        swap(i, lower);
        i += size;
        lower += size;
      } else if (ret > 0) {
        greater -= size;
        swap(i, greater);
      } else {
        i += size;
      }
    }
    qsort3way_aux(array_begin, lower, size, cmp, swap);
    qsort3way_aux(greater, array_end, size, cmp, swap);
  }
}

static void qsort3way(void *array_begin, void *array_end, size_t size,
                      qsort3way_cmp *cmp, qsort3way_swap *swap) {
  qsort3way_aux(array_begin, array_end, size, cmp, swap);
}

static void swap_int_aux(int *a, int *b) {
  int tmp = *a;
  *a = *b;
  *b = tmp;
}

static void swap_int(void *a, void *b) { swap_int_aux(a, b); }

static int cmp_int_aux(int const *a, int const *b) {
  if (*a < *b) {
    return 1;
  } else if (*a > *b) {
    return -1;
  } else {
    return 0;
  }
}

static int cmp_int(void const *a, void const *b) { return cmp_int_aux(a, b); }

static void print_int(char const *intro, int const *array, size_t const size) {
  printf("%s:", intro);
  for (size_t i = 0; i < size; i++) {
    printf(" %d", array[i]);
  }
  printf("\n");
}

#define SIZE 42

int main(void) {
  int array[SIZE];

  srand((unsigned int)time(NULL));
  for (size_t i = 0; i < SIZE; i++) {
    array[i] = rand() % SIZE - SIZE / 2;
  }

  print_int("before", array, SIZE);

  qsort3way(array, array + SIZE, sizeof *array, cmp_int, swap_int);

  print_int("after", array, SIZE);
}

Note: The optimization int i = lo + 1; and char *i = array_begin + size; are mandatory. Because in the case where the function compare return that pivot != pivot this will lead to a infinite recursion. How this would be possible?

  1. The function cmp is bug.
  2. double has strange power... A double can be not equal to itself! (-nan).