Programming Rants: sort

As we already know, that GCC's std::sort uses IntroSort (a QuickSort algorithm that changes into HeapSort when recursion depth is too deep). Also as we already know that CombSort is improved version of BubbleSort, and ShellSort is improved version of InsertionSort), but sometimes we don't know, how good are they in real-life benchmark. So today I tried to compare them all (with addition HeapSort and inplace MergeSort). My setup is 64-bit Linux 3.18.2-1, Intel i3-4150, 8 GB RAM, Non-SSD disk, here's the code:

// intro.cpp
#include<cstdlib>
#include<cstdio>
#include<algorithm>
using namespace std;
const int N = 10000000;
int main() {
int* arr = new int[N];
for(int z=0;z<N;++z) arr[z] = rand();
sort(arr,arr+N);
for(int z=1;z<N;++z) if(arr[z]<arr[z-1]) printf("!");
delete[] arr;
}

// comb.cpp
#include<cstdlib>
#include<cstdio>
#include<algorithm>
using namespace std;
int newGap(int gap){
gap /= 1.3;
if(gap == 9 || gap == 10) gap = 11;
if(gap < 1) return 1;
return gap;
}
void combSort(int a[], int len){
int gap = len;
bool swapped;
do {
swapped = false;
gap = newGap(gap);
for(int i=0; i < len-gap; ++i) {
if(a[i] > a[i+gap]) {
swapped = true;
swap(a[i], a[i+gap]);
}
}
} while(gap > 1 || swapped);
}
const int N = 10000000;
int main() {
int* arr = new int[N];
for(int z=0;z<N;++z) arr[z] = rand();
  combSort(arr,N);
for(int z=1;z<N;++z) if(arr[z]<arr[z-1]) printf("!");
delete[] arr;
}

// shell.cpp
#include<cstdlib>
#include<cstdio>
#include<deque>
#include<algorithm>
using namespace std;
void shellSortPhase(int a[], int length, int gap) {
int i;
for (i = gap; i < length; ++i) {
int value = a[i];
int j;
for (j = i - gap; j >= 0 && a[j] > value; j -= gap) a[j + gap] = a[j];
a[j + gap] = value;
}
}
void shellSort(int a[], size_t length) {
deque<int> gaps;
for(size_t z=length/2;z>701;z/=2) gaps.push_front(z);
static int cuira[] = {701,301,132,57,23,10,4,1};
for(int z=0;z<sizeof(cuira)/sizeof(int);++z) gaps.push_front(cuira[z]);
while(gaps.size()) {
shellSortPhase(a, length, gaps.back());
gaps.pop_back();
}
}
const int N = 10000000;
int main() {
int* arr = new int[N];
for(int z=0;z<N;++z) arr[z] = rand();
  shellSort(arr,N);
for(int z=1;z<N;++z) if(arr[z]<arr[z-1]) printf("!");
  delete[] arr;
}

// heap.cpp
#include<algorithm>
#include<cstdio>
#include<cstdlib>
#include<iostream>
using namespace std;
const int N = 10000000;
int main() {
int* arr = new int[N];
for(int z=0;z<N;++z) arr[z] = rand();
make_heap(arr,arr+N);
sort_heap(arr,arr+N);
for(int z=1;z<N;++z) if(arr[z]<arr[z-1]) printf("!");
delete[] arr;
}

// merge.cpp
#include<vector>
#include<string>
#include<algorithm>
#include<cstdio>
using namespace std;
void merge_sort( int* beg, int* end) {
if (end - beg > 1) {
int* mid = beg + (end - beg) / 2;
merge_sort(beg, mid);
merge_sort(mid, end);
inplace_merge(beg, mid, end);
}
}
const int N = 10000000;
int main() {
int* arr = new int[N];
for(int z=0;z<N;++z) arr[z] = rand();
merge_sort(arr,arr+N);
for(int z=1;z<N;++z) if(arr[z]<arr[z-1]) printf("!");
delete[] arr;
}

The benchmark result for 10M items:

$ g++ intro.cpp ; time ./a.out
real 0m2.515s
user 0m2.503s
sys 0m0.007s

$ g++ comb.cpp ; time ./a.out
real 0m3.457s
user 0m3.443s
sys 0m0.007s

$ g++ shell.cpp ; time ./a.out
real 0m4.109s
user 0m4.103s
sys 0m0.003s

$ g++ heap.cpp ; time ./a.out

real 0m4.478s

user 0m4.470s

sys 0m0.003s

$ g++ merge.cpp ; time ./a.out

real 0m3.748s

user 0m3.743s

sys 0m0.007s

Result with -O2 flag:

$ g++ -O2 intro.cpp ; time ./a.out

real 0m0.806s

user 0m0.797s

sys 0m0.007s

$ g++ -O2 comb.cpp ; time ./a.out

real 0m1.421s

user 0m1.417s

sys 0m0.000s

$ g++ -O2 shell.cpp ; time ./a.out

real 0m1.959s

user 0m1.950s

sys 0m0.003s

$ g++ -O2 heap.cpp ; time ./a.out

real 0m2.373s

user 0m2.357s

sys 0m0.010s

$ g++ -O2 merge.cpp ; time ./a.out
real 0m1.384s

user 0m1.360s
sys 0m0.017s

As we could see both are quite good enough, at par with some O(n log n) sorting algorithms.

Just for comparison let's compare with O(n^2) sorting algorithms, with lower number of items:

// sel.cpp
#include<cstdlib>
#include<cstdio>
#include<algorithm>
using namespace std;
void selSort(int a[], size_t length) {
for(size_t z=0;z<length-1;++z) {
size_t best = z;
for(size_t y=z+1;y<length;++y) if(a[best]>a[y]) best = y;
if(z!=best) swap(a[best],a[z]);
}
}
const int N = 200000;
int main() {
int* arr = new int[N];
for(int z=0;z<N;++z) arr[z] = rand();
selSort(arr,N);
for(int z=1;z<N;++z) if(arr[z]<arr[z-1]) printf("!");
delete[] arr;
}

// bub.cpp
#include<cstdlib>
#include<cstdio>
#include<algorithm>
using namespace std;
void bubSort(int a[], size_t length) {
for(size_t z=length-1;z>0;--z) {
for(size_t y=0;y<z;++y) if(a[y]>a[y+1]) swap(a[y],a[y+1]);
}
}
const int N = 200000;
int main() {
int* arr = new int[N];
for(int z=0;z<N;++z) arr[z] = rand();
bubSort(arr,N);
for(int z=1;z<N;++z) if(arr[z]<arr[z-1]) printf("!");
delete[] arr;
}

// ins.cpp
#include<cstdlib>
#include<cstdio>
#include<algorithm>
using namespace std;
void insSort(int a[], size_t length) {
for(size_t z=1;z<length;++z) {
int copy = a[z];
size_t y = z;
for(;y>0 && a[y-1]>copy;--y) a[y] = a[y-1];
a[y] = copy;
}
}
const int N = 200000;
int main() {
int* arr = new int[N];
for(int z=0;z<N;++z) arr[z] = rand();
insSort(arr,N);
for(int z=1;z<N;++z) if(arr[z]<arr[z-1]) printf("!");
delete[] arr;
}

// bin-ins.cpp
#include<cstdlib>
#include<cstdio>
#include<algorithm>
using namespace std;
void binInsSort(int data[], int size) {
int index = -1,i = 1,j;
for(;i<size;i++) {
int temp = data[i];
int high = i,low = 0,mid;
while(low <= high) {
mid = (low + high) /2;
if(temp < data[mid]) {
high = mid - 1;
index = mid;
} else if (temp > data[mid]) {
low = mid + 1;
} else if(data[mid] == temp) {
index = mid;
break;
}
}
for(j = i;j > index;j--) data[j] = data[j-1];
data[j] = temp;
}
}
const int N = 200000;
int main() {
int* arr = new int[N];
for(int z=0;z<N;++z) arr[z] = rand();
binInsSort(arr,N);
for(int z=1;z<N;++z) if(arr[z]<arr[z-1]) printf("!");
delete[] arr;
}

The benchmark result for 200K items:

$ g++ -O2 bub.cpp ; time ./a.out
real 0m54.837s
user 0m54.790s
sys 0m00.000s

$ g++ -O2 sel.cpp ; time ./a.out
real 0m51.671s
user 0m51.623s
sys 0m00.000s

$ g++ -O2 ins.cpp ; time ./a.out
real 0m4.513s
user 0m4.507s
sys 0m0.000s

$ g++ -O2 bin-ins.cpp ; time ./a.out
real 0m3.736s
user 0m3.730s
sys 0m0.000s

The benchmark for 500K items:

$ g++ -O2 ins.cpp ; time ./a.out
real 0m31.179s

user 0m31.137s

sys 0m00.003s

$ g++ -O2 bin-ins.cpp ; time ./a.out
real 0m24.109s
user 0m24.087s
sys 0m00.000s

Apparently I was wrong, till recently I thought that SelectionSort is faster than InsertionSort (because of lower number of write/swap), but now I know that it doesn't..

Programming Rants

2015-02-03

Old String CombSort Benchmark

2015-01-29

IntroSort vs CombSort vs ShellSort vs HeapSort vs MergeSort