2015-01-29

IntroSort vs CombSort vs ShellSort vs HeapSort vs MergeSort

As we already know, that GCC's std::sort uses IntroSort (a QuickSort algorithm that changes into HeapSort when recursion depth is too deep). Also as we already know that CombSort is improved version of BubbleSort, and ShellSort is improved version of InsertionSort), but sometimes we don't know, how good are they in real-life benchmark. So today I tried to compare them all (with addition HeapSort and inplace MergeSort). My setup is 64-bit Linux 3.18.2-1, Intel i3-4150, 8 GB RAM, Non-SSD disk, here's the code:

// intro.cpp
#include<cstdlib>
#include<cstdio>
#include<algorithm>
using namespace std;
const int N = 10000000;
int main() {
    int* arr = new int[N];
    for(int z=0;z<N;++z) arr[z] = rand();
    sort(arr,arr+N);
    for(int z=1;z<N;++z) if(arr[z]<arr[z-1]) printf("!");
    delete[] arr;
}

// comb.cpp
#include<cstdlib>
#include<cstdio>
#include<algorithm>
using namespace std;
int newGap(int gap){
    gap /= 1.3;
    if(gap == 9 || gap == 10) gap = 11;
    if(gap < 1) return 1;
    return gap;
}
void combSort(int a[], int len){
    int gap = len;
    bool swapped;
    do {
        swapped = false;
        gap = newGap(gap);
        for(int i=0; i < len-gap; ++i) {
            if(a[i] > a[i+gap]) {
                swapped = true;
                swap(a[i], a[i+gap]);
            }
        }
    } while(gap > 1 || swapped);
}
const int N = 10000000;
int main() {
    int* arr = new int[N];
    for(int z=0;z<N;++z) arr[z] = rand();    
    combSort(arr,N);
    for(int z=1;z<N;++z) if(arr[z]<arr[z-1]) printf("!");
    delete[] arr;
}

// shell.cpp
#include<cstdlib>
#include<cstdio>
#include<deque>
#include<algorithm>
using namespace std;
void shellSortPhase(int a[], int length, int gap) {
    int i;
    for (i = gap; i < length; ++i) {
        int value = a[i];
        int j;
        for (j = i - gap; j >= 0 && a[j] > value; j -= gap) a[j + gap] = a[j];
        a[j + gap] = value;
    }
}
void shellSort(int a[], size_t length) { 
    deque<int> gaps;
    for(size_t z=length/2;z>701;z/=2) gaps.push_front(z);
    static int cuira[] = {701,301,132,57,23,10,4,1};
    for(int z=0;z<sizeof(cuira)/sizeof(int);++z) gaps.push_front(cuira[z]);
    while(gaps.size()) {
        shellSortPhase(a, length, gaps.back());
        gaps.pop_back();
    }
}
const int N = 10000000;
int main() {
    int* arr = new int[N];
    for(int z=0;z<N;++z) arr[z] = rand();    
    shellSort(arr,N);
    for(int z=1;z<N;++z) if(arr[z]<arr[z-1]) printf("!");    
    delete[] arr;
}

// heap.cpp
#include<algorithm>
#include<cstdio>
#include<cstdlib>
#include<iostream>
using namespace std;
const int N = 10000000;
int main() {
    int* arr = new int[N];
    for(int z=0;z<N;++z) arr[z] = rand();
    make_heap(arr,arr+N);
    sort_heap(arr,arr+N);
    for(int z=1;z<N;++z) if(arr[z]<arr[z-1]) printf("!");
    delete[] arr;
}

// merge.cpp
#include<vector>
#include<string>
#include<algorithm>
#include<cstdio>
using namespace std;
void merge_sort( int* beg, int* end) {
    if (end - beg > 1)  {
        int* mid = beg + (end - beg) / 2;
        merge_sort(beg, mid);
        merge_sort(mid, end);
        inplace_merge(beg, mid, end);
    }
}
const int N = 10000000;
int main() {
    int* arr = new int[N];
    for(int z=0;z<N;++z) arr[z] = rand();
    merge_sort(arr,arr+N);
    for(int z=1;z<N;++z) if(arr[z]<arr[z-1]) printf("!");
    delete[] arr;
}

The benchmark result for 10M items:

$ g++ intro.cpp ; time ./a.out 
real    0m2.515s
user    0m2.503s
sys     0m0.007s

$ g++ comb.cpp ; time ./a.out 
real    0m3.457s
user    0m3.443s
sys     0m0.007s

$ g++ shell.cpp ; time ./a.out 
real    0m4.109s
user    0m4.103s
sys     0m0.003s

$ g++ heap.cpp ; time ./a.out 
real    0m4.478s
user    0m4.470s
sys     0m0.003s

$ g++ merge.cpp ; time ./a.out 
real    0m3.748s
user    0m3.743s
sys     0m0.007s

Result with -O2 flag:

$ g++ -O2 intro.cpp ; time ./a.out 
real    0m0.806s
user    0m0.797s
sys     0m0.007s

$ g++ -O2 comb.cpp ; time ./a.out 
real    0m1.421s
user    0m1.417s
sys     0m0.000s

$ g++ -O2 shell.cpp ; time ./a.out 
real    0m1.959s
user    0m1.950s
sys     0m0.003s

$ g++ -O2 heap.cpp ; time ./a.out 
real    0m2.373s
user    0m2.357s
sys     0m0.010s

$ g++ -O2 merge.cpp ; time ./a.out
real    0m1.384s
user    0m1.360s
sys     0m0.017s

As we could see both are quite good enough, at par with some O(n log n) sorting algorithms.

Just for comparison let's compare with O(n^2) sorting algorithms, with lower number of items:

// sel.cpp
#include<cstdlib>
#include<cstdio>
#include<algorithm>
using namespace std;
void selSort(int a[], size_t length) {
    for(size_t z=0;z<length-1;++z) {
        size_t best = z;
        for(size_t y=z+1;y<length;++y) if(a[best]>a[y]) best = y;
        if(z!=best) swap(a[best],a[z]);
    }    
}
const int N = 200000;
int main() {
    int* arr = new int[N];
    for(int z=0;z<N;++z) arr[z] = rand();
    selSort(arr,N);
    for(int z=1;z<N;++z) if(arr[z]<arr[z-1]) printf("!");
    delete[] arr;
}

// bub.cpp
#include<cstdlib>
#include<cstdio>
#include<algorithm>
using namespace std;
void bubSort(int a[], size_t length) {
    for(size_t z=length-1;z>0;--z) {
        for(size_t y=0;y<z;++y) if(a[y]>a[y+1]) swap(a[y],a[y+1]);
    }    
}
const int N = 200000;
int main() {
    int* arr = new int[N];
    for(int z=0;z<N;++z) arr[z] = rand();
    bubSort(arr,N);
    for(int z=1;z<N;++z) if(arr[z]<arr[z-1]) printf("!");
    delete[] arr;
}

// ins.cpp
#include<cstdlib>
#include<cstdio>
#include<algorithm>
using namespace std;
void insSort(int a[], size_t length) {
    for(size_t z=1;z<length;++z) {
    int copy = a[z];    
    size_t y = z;
        for(;y>0 && a[y-1]>copy;--y) a[y] = a[y-1];
        a[y] = copy;
    }    
}
const int N = 200000;
int main() {
    int* arr = new int[N];
    for(int z=0;z<N;++z) arr[z] = rand();
    insSort(arr,N);
    for(int z=1;z<N;++z) if(arr[z]<arr[z-1]) printf("!");
    delete[] arr;
}

// bin-ins.cpp
#include<cstdlib>
#include<cstdio>
#include<algorithm>
using namespace std;
void binInsSort(int data[], int size) {
    int index = -1,i = 1,j;
    for(;i<size;i++) {
        int temp = data[i];
        int high = i,low = 0,mid;
        while(low <= high) {
            mid = (low + high) /2;
            if(temp < data[mid]) {
                high = mid - 1;
                index = mid;
            } else if (temp > data[mid]) {
                low = mid + 1;      
            } else if(data[mid] == temp) {
                index = mid;
                break; 
            } 
        }
        for(j = i;j > index;j--) data[j] = data[j-1];
        data[j] = temp;
    }   
}
const int N = 200000;
int main() {
    int* arr = new int[N];
    for(int z=0;z<N;++z) arr[z] = rand();
    binInsSort(arr,N);
    for(int z=1;z<N;++z) if(arr[z]<arr[z-1]) printf("!");
    delete[] arr;
}

The benchmark result for 200K items:

$ g++ -O2 bub.cpp ; time ./a.out 
real    0m54.837s
user    0m54.790s
sys     0m00.000s

$ g++ -O2 sel.cpp ; time ./a.out 
real    0m51.671s
user    0m51.623s
sys     0m00.000s

$ g++ -O2 ins.cpp ; time ./a.out
real    0m4.513s
user    0m4.507s
sys     0m0.000s

$ g++ -O2 bin-ins.cpp ; time ./a.out 
real    0m3.736s
user    0m3.730s
sys     0m0.000s

The benchmark for 500K items:

$ g++ -O2 ins.cpp ; time ./a.out 
real    0m31.179s
user    0m31.137s
sys     0m00.003s

$ g++ -O2 bin-ins.cpp ; time ./a.out 
real    0m24.109s
user    0m24.087s
sys     0m00.000s


Apparently I was wrong, till recently I thought that SelectionSort is faster than InsertionSort (because of lower number of write/swap), but now I know that it doesn't..