this is old benchmark, you can find updated result here.
programming: the action or process of writing computer programs. | rants: speak or shout at length in a wild, [im]passioned way.
2015-01-31
2015-01-29
IntroSort vs CombSort vs ShellSort vs HeapSort vs MergeSort
As we already know, that GCC's std::sort uses IntroSort (a QuickSort algorithm that changes into HeapSort when recursion depth is too deep). Also as we already know that CombSort is improved version of BubbleSort, and ShellSort is improved version of InsertionSort), but sometimes we don't know, how good are they in real-life benchmark. So today I tried to compare them all (with addition HeapSort and inplace MergeSort). My setup is 64-bit Linux 3.18.2-1, Intel i3-4150, 8 GB RAM, Non-SSD disk, here's the code:
// intro.cpp
#include<cstdlib>
#include<cstdio>
#include<algorithm>
using namespace std;
const int N = 10000000;
int main() {
int* arr = new int[N];
for(int z=0;z<N;++z) arr[z] = rand();
sort(arr,arr+N);
for(int z=1;z<N;++z) if(arr[z]<arr[z-1]) printf("!");
delete[] arr;
}
// comb.cpp
#include<cstdlib>
#include<cstdio>
#include<algorithm>
using namespace std;
int newGap(int gap){
gap /= 1.3;
if(gap == 9 || gap == 10) gap = 11;
if(gap < 1) return 1;
return gap;
}
void combSort(int a[], int len){
int gap = len;
bool swapped;
do {
swapped = false;
gap = newGap(gap);
for(int i=0; i < len-gap; ++i) {
if(a[i] > a[i+gap]) {
swapped = true;
swap(a[i], a[i+gap]);
}
}
} while(gap > 1 || swapped);
}
const int N = 10000000;
int main() {
int* arr = new int[N];
for(int z=0;z<N;++z) arr[z] = rand();
combSort(arr,N);
for(int z=1;z<N;++z) if(arr[z]<arr[z-1]) printf("!");
delete[] arr;
}
// shell.cpp
#include<cstdlib>
#include<cstdio>
#include<deque>
#include<algorithm>
using namespace std;
void shellSortPhase(int a[], int length, int gap) {
int i;
for (i = gap; i < length; ++i) {
int value = a[i];
int j;
for (j = i - gap; j >= 0 && a[j] > value; j -= gap) a[j + gap] = a[j];
a[j + gap] = value;
}
}
void shellSort(int a[], size_t length) {
deque<int> gaps;
for(size_t z=length/2;z>701;z/=2) gaps.push_front(z);
static int cuira[] = {701,301,132,57,23,10,4,1};
for(int z=0;z<sizeof(cuira)/sizeof(int);++z) gaps.push_front(cuira[z]);
while(gaps.size()) {
shellSortPhase(a, length, gaps.back());
gaps.pop_back();
}
}
const int N = 10000000;
int main() {
int* arr = new int[N];
for(int z=0;z<N;++z) arr[z] = rand();
shellSort(arr,N);
for(int z=1;z<N;++z) if(arr[z]<arr[z-1]) printf("!");
delete[] arr;
}
// heap.cpp
#include<algorithm>
#include<cstdio>
#include<cstdlib>
#include<iostream>
using namespace std;
const int N = 10000000;
int main() {
int* arr = new int[N];
for(int z=0;z<N;++z) arr[z] = rand();
make_heap(arr,arr+N);
sort_heap(arr,arr+N);
for(int z=1;z<N;++z) if(arr[z]<arr[z-1]) printf("!");
delete[] arr;
}
// merge.cpp
#include<vector>
#include<string>
#include<algorithm>
#include<cstdio>
using namespace std;
void merge_sort( int* beg, int* end) {
if (end - beg > 1) {
int* mid = beg + (end - beg) / 2;
merge_sort(beg, mid);
merge_sort(mid, end);
inplace_merge(beg, mid, end);
}
}
const int N = 10000000;
int main() {
int* arr = new int[N];
for(int z=0;z<N;++z) arr[z] = rand();
merge_sort(arr,arr+N);
for(int z=1;z<N;++z) if(arr[z]<arr[z-1]) printf("!");
delete[] arr;
}
The benchmark result for 10M items:
$ g++ intro.cpp ; time ./a.out
real 0m2.515s
user 0m2.503s
sys 0m0.007s
$ g++ comb.cpp ; time ./a.out
real 0m3.457s
user 0m3.443s
sys 0m0.007s
$ g++ shell.cpp ; time ./a.out
real 0m4.109s
user 0m4.103s
sys 0m0.003s
$ g++ -O2 intro.cpp ; time ./a.out
// intro.cpp
#include<cstdlib>
#include<cstdio>
#include<algorithm>
using namespace std;
const int N = 10000000;
int main() {
int* arr = new int[N];
for(int z=0;z<N;++z) arr[z] = rand();
sort(arr,arr+N);
for(int z=1;z<N;++z) if(arr[z]<arr[z-1]) printf("!");
delete[] arr;
}
// comb.cpp
#include<cstdlib>
#include<cstdio>
#include<algorithm>
using namespace std;
int newGap(int gap){
gap /= 1.3;
if(gap == 9 || gap == 10) gap = 11;
if(gap < 1) return 1;
return gap;
}
void combSort(int a[], int len){
int gap = len;
bool swapped;
do {
swapped = false;
gap = newGap(gap);
for(int i=0; i < len-gap; ++i) {
if(a[i] > a[i+gap]) {
swapped = true;
swap(a[i], a[i+gap]);
}
}
} while(gap > 1 || swapped);
}
const int N = 10000000;
int main() {
int* arr = new int[N];
for(int z=0;z<N;++z) arr[z] = rand();
combSort(arr,N);
for(int z=1;z<N;++z) if(arr[z]<arr[z-1]) printf("!");
delete[] arr;
}
// shell.cpp
#include<cstdlib>
#include<cstdio>
#include<deque>
#include<algorithm>
using namespace std;
void shellSortPhase(int a[], int length, int gap) {
int i;
for (i = gap; i < length; ++i) {
int value = a[i];
int j;
for (j = i - gap; j >= 0 && a[j] > value; j -= gap) a[j + gap] = a[j];
a[j + gap] = value;
}
}
void shellSort(int a[], size_t length) {
deque<int> gaps;
for(size_t z=length/2;z>701;z/=2) gaps.push_front(z);
static int cuira[] = {701,301,132,57,23,10,4,1};
for(int z=0;z<sizeof(cuira)/sizeof(int);++z) gaps.push_front(cuira[z]);
while(gaps.size()) {
shellSortPhase(a, length, gaps.back());
gaps.pop_back();
}
}
const int N = 10000000;
int main() {
int* arr = new int[N];
for(int z=0;z<N;++z) arr[z] = rand();
shellSort(arr,N);
for(int z=1;z<N;++z) if(arr[z]<arr[z-1]) printf("!");
delete[] arr;
}
// heap.cpp
#include<algorithm>
#include<cstdio>
#include<cstdlib>
#include<iostream>
using namespace std;
const int N = 10000000;
int main() {
int* arr = new int[N];
for(int z=0;z<N;++z) arr[z] = rand();
make_heap(arr,arr+N);
sort_heap(arr,arr+N);
for(int z=1;z<N;++z) if(arr[z]<arr[z-1]) printf("!");
delete[] arr;
}
// merge.cpp
#include<vector>
#include<string>
#include<algorithm>
#include<cstdio>
using namespace std;
void merge_sort( int* beg, int* end) {
if (end - beg > 1) {
int* mid = beg + (end - beg) / 2;
merge_sort(beg, mid);
merge_sort(mid, end);
inplace_merge(beg, mid, end);
}
}
const int N = 10000000;
int main() {
int* arr = new int[N];
for(int z=0;z<N;++z) arr[z] = rand();
merge_sort(arr,arr+N);
for(int z=1;z<N;++z) if(arr[z]<arr[z-1]) printf("!");
delete[] arr;
}
The benchmark result for 10M items:
$ g++ intro.cpp ; time ./a.out
real 0m2.515s
user 0m2.503s
sys 0m0.007s
$ g++ comb.cpp ; time ./a.out
real 0m3.457s
user 0m3.443s
sys 0m0.007s
real 0m4.109s
user 0m4.103s
sys 0m0.003s
$ g++ heap.cpp ; time ./a.out
real 0m4.478s
user 0m4.470s
sys 0m0.003s
$ g++ merge.cpp ; time ./a.out
real 0m3.748s
user 0m3.743s
sys 0m0.007s
Result with -O2 flag:
$ g++ -O2 intro.cpp ; time ./a.out
real 0m0.806s
user 0m0.797s
sys 0m0.007s
$ g++ -O2 comb.cpp ; time ./a.out
real 0m1.421s
user 0m1.417s
sys 0m0.000s
$ g++ -O2 shell.cpp ; time ./a.out
real 0m1.959s
user 0m1.950s
sys 0m0.003s
$ g++ -O2 heap.cpp ; time ./a.out
real 0m2.373s
user 0m2.357s
sys 0m0.010s
$ g++ -O2 merge.cpp ; time ./a.out
real 0m1.384s
real 0m1.384s
user 0m1.360s
sys 0m0.017s
sys 0m0.017s
As we could see both are quite good enough, at par with some O(n log n) sorting algorithms.
Just for comparison let's compare with O(n^2) sorting algorithms, with lower number of items:
// sel.cpp
#include<cstdlib>
#include<cstdio>
#include<algorithm>
using namespace std;
void selSort(int a[], size_t length) {
for(size_t z=0;z<length-1;++z) {
size_t best = z;
for(size_t y=z+1;y<length;++y) if(a[best]>a[y]) best = y;
if(z!=best) swap(a[best],a[z]);
}
}
const int N = 200000;
int main() {
int* arr = new int[N];
for(int z=0;z<N;++z) arr[z] = rand();
selSort(arr,N);
for(int z=1;z<N;++z) if(arr[z]<arr[z-1]) printf("!");
delete[] arr;
}
// bub.cpp
#include<cstdlib>
#include<cstdio>
#include<algorithm>
using namespace std;
void bubSort(int a[], size_t length) {
for(size_t z=length-1;z>0;--z) {
for(size_t y=0;y<z;++y) if(a[y]>a[y+1]) swap(a[y],a[y+1]);
}
}
const int N = 200000;
int main() {
int* arr = new int[N];
for(int z=0;z<N;++z) arr[z] = rand();
bubSort(arr,N);
for(int z=1;z<N;++z) if(arr[z]<arr[z-1]) printf("!");
delete[] arr;
}
// ins.cpp
#include<cstdlib>
#include<cstdio>
#include<algorithm>
using namespace std;
void insSort(int a[], size_t length) {
for(size_t z=1;z<length;++z) {
int copy = a[z];
size_t y = z;
for(;y>0 && a[y-1]>copy;--y) a[y] = a[y-1];
a[y] = copy;
}
}
const int N = 200000;
int main() {
int* arr = new int[N];
for(int z=0;z<N;++z) arr[z] = rand();
insSort(arr,N);
for(int z=1;z<N;++z) if(arr[z]<arr[z-1]) printf("!");
delete[] arr;
}
// bin-ins.cpp
#include<cstdlib>
#include<cstdio>
#include<algorithm>
using namespace std;
void binInsSort(int data[], int size) {
int index = -1,i = 1,j;
for(;i<size;i++) {
int temp = data[i];
int high = i,low = 0,mid;
while(low <= high) {
mid = (low + high) /2;
if(temp < data[mid]) {
high = mid - 1;
index = mid;
} else if (temp > data[mid]) {
low = mid + 1;
} else if(data[mid] == temp) {
index = mid;
break;
}
}
for(j = i;j > index;j--) data[j] = data[j-1];
data[j] = temp;
}
}
const int N = 200000;
int main() {
int* arr = new int[N];
for(int z=0;z<N;++z) arr[z] = rand();
binInsSort(arr,N);
for(int z=1;z<N;++z) if(arr[z]<arr[z-1]) printf("!");
delete[] arr;
}
$ g++ -O2 bub.cpp ; time ./a.out
real 0m54.837s
user 0m54.790s
sys 0m00.000s
$ g++ -O2 sel.cpp ; time ./a.out
real 0m51.671s
user 0m51.623s
sys 0m00.000s
$ g++ -O2 ins.cpp ; time ./a.out
real 0m4.513s
user 0m4.507s
sys 0m0.000s
$ g++ -O2 bin-ins.cpp ; time ./a.out
real 0m3.736s
user 0m3.730s
sys 0m0.000s
Just for comparison let's compare with O(n^2) sorting algorithms, with lower number of items:
// sel.cpp
#include<cstdlib>
#include<cstdio>
#include<algorithm>
using namespace std;
void selSort(int a[], size_t length) {
for(size_t z=0;z<length-1;++z) {
size_t best = z;
for(size_t y=z+1;y<length;++y) if(a[best]>a[y]) best = y;
if(z!=best) swap(a[best],a[z]);
}
}
const int N = 200000;
int main() {
int* arr = new int[N];
for(int z=0;z<N;++z) arr[z] = rand();
selSort(arr,N);
for(int z=1;z<N;++z) if(arr[z]<arr[z-1]) printf("!");
delete[] arr;
}
// bub.cpp
#include<cstdlib>
#include<cstdio>
#include<algorithm>
using namespace std;
void bubSort(int a[], size_t length) {
for(size_t z=length-1;z>0;--z) {
for(size_t y=0;y<z;++y) if(a[y]>a[y+1]) swap(a[y],a[y+1]);
}
}
const int N = 200000;
int main() {
int* arr = new int[N];
for(int z=0;z<N;++z) arr[z] = rand();
bubSort(arr,N);
for(int z=1;z<N;++z) if(arr[z]<arr[z-1]) printf("!");
delete[] arr;
}
// ins.cpp
#include<cstdlib>
#include<cstdio>
#include<algorithm>
using namespace std;
void insSort(int a[], size_t length) {
for(size_t z=1;z<length;++z) {
int copy = a[z];
size_t y = z;
for(;y>0 && a[y-1]>copy;--y) a[y] = a[y-1];
a[y] = copy;
}
}
const int N = 200000;
int main() {
int* arr = new int[N];
for(int z=0;z<N;++z) arr[z] = rand();
insSort(arr,N);
for(int z=1;z<N;++z) if(arr[z]<arr[z-1]) printf("!");
delete[] arr;
}
// bin-ins.cpp
#include<cstdlib>
#include<cstdio>
#include<algorithm>
using namespace std;
void binInsSort(int data[], int size) {
int index = -1,i = 1,j;
for(;i<size;i++) {
int temp = data[i];
int high = i,low = 0,mid;
while(low <= high) {
mid = (low + high) /2;
if(temp < data[mid]) {
high = mid - 1;
index = mid;
} else if (temp > data[mid]) {
low = mid + 1;
} else if(data[mid] == temp) {
index = mid;
break;
}
}
for(j = i;j > index;j--) data[j] = data[j-1];
data[j] = temp;
}
}
const int N = 200000;
int main() {
int* arr = new int[N];
for(int z=0;z<N;++z) arr[z] = rand();
binInsSort(arr,N);
for(int z=1;z<N;++z) if(arr[z]<arr[z-1]) printf("!");
delete[] arr;
}
The benchmark result for 200K items:
$ g++ -O2 bub.cpp ; time ./a.out
real 0m54.837s
user 0m54.790s
sys 0m00.000s
$ g++ -O2 sel.cpp ; time ./a.out
real 0m51.671s
user 0m51.623s
sys 0m00.000s
$ g++ -O2 ins.cpp ; time ./a.out
real 0m4.513s
user 0m4.507s
sys 0m0.000s
$ g++ -O2 bin-ins.cpp ; time ./a.out
real 0m3.736s
user 0m3.730s
sys 0m0.000s
The benchmark for 500K items:
$ g++ -O2 ins.cpp ; time ./a.out
real 0m31.179s
real 0m31.179s
user 0m31.137s
sys 0m00.003s
$ g++ -O2 bin-ins.cpp ; time ./a.out
real 0m24.109s
user 0m24.087s
sys 0m00.000s
real 0m24.109s
user 0m24.087s
sys 0m00.000s
Apparently I was wrong, till recently I thought that SelectionSort is faster than InsertionSort (because of lower number of write/swap), but now I know that it doesn't..
2015-01-27
Vivaldi: New Chromium-based Web Browser
So today a new web browser, another chromium-based released, the name is Vivaldi. The review? first thing: Ctrl+Enter doesn't work, in another browser its usually for appending ".com" and go! Ctrl+L (location bar), Ctrl+H (history), Ctrl+J (download) works fine. Opening many tabs lags a lot, maybe because of their skin. You can install it on Windows, Mac, Ubuntu, Fedora, also on ArchLinux using this command:
yaourt -Sy vivaldi
yaourt -Sy vivaldi
So, how about their javascript performance? Here's the result of SunSpider benchmark
Vivaldi 1.0.83.38 (Developer Build) stable
============================================ RESULTS (means and 95% confidence intervals) -------------------------------------------- Total: 228.3ms +/- 1.3% -------------------------------------------- 3d: 43.1ms +/- 3.1% cube: 13.0ms +/- 4.5% morph: 17.5ms +/- 2.9% raytrace: 12.6ms +/- 4.0% access: 17.6ms +/- 2.8% binary-trees: 2.0ms +/- 0.0% fannkuch: 7.9ms +/- 5.1% nbody: 4.0ms +/- 0.0% nsieve: 3.7ms +/- 9.3% bitops: 13.2ms +/- 3.4% 3bit-bits-in-byte: 1.2ms +/- 25.1% bits-in-byte: 3.0ms +/- 0.0% bitwise-and: 4.1ms +/- 5.5% nsieve-bits: 4.9ms +/- 4.6% controlflow: 2.3ms +/- 15.0% recursive: 2.3ms +/- 15.0% crypto: 16.7ms +/- 4.1% aes: 6.5ms +/- 5.8% md5: 5.2ms +/- 8.7% sha1: 5.0ms +/- 0.0% date: 23.3ms +/- 3.2% format-tofte: 12.1ms +/- 1.9% format-xparb: 11.2ms +/- 6.6% math: 23.2ms +/- 3.8% cordic: 4.9ms +/- 4.6% partial-sums: 14.9ms +/- 1.5% spectral-norm: 3.4ms +/- 20.3% regexp: 8.1ms +/- 2.8% dna: 8.1ms +/- 2.8% string: 80.8ms +/- 1.8% base64: 8.4ms +/- 4.4% fasta: 12.2ms +/- 2.5% tagcloud: 26.8ms +/- 2.8% unpack-code: 23.2ms +/- 3.2% validate-input: 10.2ms +/- 3.0%Firefox 35.0
============================================ RESULTS (means and 95% confidence intervals) -------------------------------------------- Total: 195.3ms +/- 1.3% -------------------------------------------- 3d: 32.0ms +/- 3.2% cube: 12.1ms +/- 5.9% morph: 6.6ms +/- 5.6% raytrace: 13.3ms +/- 3.6% access: 19.0ms +/- 6.4% binary-trees: 3.9ms +/- 10.4% fannkuch: 7.8ms +/- 8.4% nbody: 3.6ms +/- 10.3% nsieve: 3.7ms +/- 13.0% bitops: 11.4ms +/- 3.2% 3bit-bits-in-byte: 1.1ms +/- 20.5% bits-in-byte: 2.7ms +/- 12.8% bitwise-and: 2.5ms +/- 15.1% nsieve-bits: 5.1ms +/- 4.4% controlflow: 2.8ms +/- 10.8% recursive: 2.8ms +/- 10.8% crypto: 19.2ms +/- 3.4% aes: 9.6ms +/- 5.2% md5: 4.9ms +/- 4.6% sha1: 4.7ms +/- 7.3% date: 22.2ms +/- 3.0% format-tofte: 9.8ms +/- 3.1% format-xparb: 12.4ms +/- 4.0% math: 15.5ms +/- 2.4% cordic: 3.1ms +/- 7.3% partial-sums: 10.2ms +/- 3.0% spectral-norm: 2.2ms +/- 13.7% regexp: 9.0ms +/- 3.7% dna: 9.0ms +/- 3.7% string: 64.2ms +/- 2.6% base64: 8.0ms +/- 9.4% fasta: 8.6ms +/- 4.3% tagcloud: 17.2ms +/- 3.3% unpack-code: 21.5ms +/- 3.6% validate-input: 8.9ms +/- 8.0%Opera 26.0.1656.60
============================================ RESULTS (means and 95% confidence intervals) -------------------------------------------- Total: 235.4ms +/- 1.6% -------------------------------------------- 3d: 46.4ms +/- 7.3% cube: 13.7ms +/- 4.3% morph: 19.7ms +/- 12.7% raytrace: 13.0ms +/- 6.9% access: 17.9ms +/- 6.1% binary-trees: 1.8ms +/- 16.7% fannkuch: 8.2ms +/- 5.5% nbody: 4.0ms +/- 11.9% nsieve: 3.9ms +/- 10.4% bitops: 12.7ms +/- 4.6% 3bit-bits-in-byte: 1.3ms +/- 26.6% bits-in-byte: 3.0ms +/- 0.0% bitwise-and: 4.1ms +/- 5.5% nsieve-bits: 4.3ms +/- 8.0% controlflow: 2.1ms +/- 10.8% recursive: 2.1ms +/- 10.8% crypto: 17.7ms +/- 6.6% aes: 6.9ms +/- 7.6% md5: 5.4ms +/- 14.2% sha1: 5.4ms +/- 6.8% date: 24.7ms +/- 3.6% format-tofte: 12.7ms +/- 2.7% format-xparb: 12.0ms +/- 7.4% math: 22.8ms +/- 2.9% cordic: 5.0ms +/- 9.5% partial-sums: 15.0ms +/- 3.9% spectral-norm: 2.8ms +/- 10.8% regexp: 8.0ms +/- 4.2% dna: 8.0ms +/- 4.2% string: 83.1ms +/- 1.4% base64: 8.8ms +/- 10.0% fasta: 12.7ms +/- 3.8% tagcloud: 28.5ms +/- 3.4% unpack-code: 23.1ms +/- 3.1% validate-input: 10.0ms +/- 0.0%Chromium 39.0.2171.99 (64-bit)
============================================ RESULTS (means and 95% confidence intervals) -------------------------------------------- Total: 242.2ms +/- 2.2% -------------------------------------------- 3d: 46.6ms +/- 8.8% cube: 13.5ms +/- 5.1% morph: 20.7ms +/- 16.3% raytrace: 12.4ms +/- 5.6% access: 18.5ms +/- 4.2% binary-trees: 2.2ms +/- 13.7% fannkuch: 8.1ms +/- 6.5% nbody: 4.2ms +/- 7.2% nsieve: 4.0ms +/- 8.4% bitops: 13.0ms +/- 3.7% 3bit-bits-in-byte: 1.5ms +/- 25.1% bits-in-byte: 2.9ms +/- 7.8% bitwise-and: 3.9ms +/- 5.8% nsieve-bits: 4.7ms +/- 7.3% controlflow: 2.0ms +/- 0.0% recursive: 2.0ms +/- 0.0% crypto: 17.8ms +/- 5.9% aes: 6.8ms +/- 8.3% md5: 5.4ms +/- 9.3% sha1: 5.6ms +/- 17.2% date: 22.4ms +/- 2.2% format-tofte: 11.8ms +/- 5.6% format-xparb: 10.6ms +/- 4.7% math: 23.6ms +/- 2.6% cordic: 4.9ms +/- 4.6% partial-sums: 15.4ms +/- 3.2% spectral-norm: 3.3ms +/- 14.6% regexp: 8.0ms +/- 0.0% dna: 8.0ms +/- 0.0% string: 90.3ms +/- 4.8% base64: 8.5ms +/- 12.1% fasta: 11.2ms +/- 5.0% tagcloud: 34.8ms +/- 4.9% unpack-code: 26.0ms +/- 18.5% validate-input: 9.8ms +/- 5.8%
Javascript speed in latest browser doesn't really matter now, they are already blazing fast.
2015-01-26
PHP/HHVM vs Go vs NodeJs BubbleSort Benchmark
So, I wonder about how good HHVM now, the code was taken from this post. My setup is 64-bit Linux 3.18.2-1, AMD A8-6600K, 16 GB RAM, Non-SSD disk, here's the best result of each implementation:
PHP 5.6.4
$ time php bench.php
Array
(
[0] => 1
[1] => 1
[2] => 2
[3] => 3
[4] => 3
[5] => 4
[6] => 5
[7] => 12
[8] => 52
[9] => 92
[10] => 424
[11] => 4124
)
2.3711581230164
real 0m2.383s
user 0m2.373s
sys 0m0.010s
$ time hhvm -v Eval.Jit=true bench.php
Array
(
[0] => 1
[1] => 1
[2] => 2
[3] => 3
[4] => 3
[5] => 4
[6] => 5
[7] => 12
[8] => 52
[9] => 92
[10] => 424
[11] => 4124
)
0.13958597183228
real 0m0.414s
user 0m0.247s
sys 0m0.033s
PHP 5.6.4
$ time php bench.php
Array
(
[0] => 1
[1] => 1
[2] => 2
[3] => 3
[4] => 3
[5] => 4
[6] => 5
[7] => 12
[8] => 52
[9] => 92
[10] => 424
[11] => 4124
)
2.3711581230164
real 0m2.383s
user 0m2.373s
sys 0m0.010s
HHVM 3.5.0
Array
(
[0] => 1
[1] => 1
[2] => 2
[3] => 3
[4] => 3
[5] => 4
[6] => 5
[7] => 12
[8] => 52
[9] => 92
[10] => 424
[11] => 4124
)
0.13958597183228
real 0m0.414s
user 0m0.247s
sys 0m0.033s
Go 1.4.1
And yes, I include the compile time also for fairness.
$ rm bench; time(go build bench.go && ./bench)
removed ‘bench’
[1 1 2 3 3 4 5 12 52 92 424 4124]
23.027575ms
real 0m0.191s
user 0m0.150s
sys 0m0.033s
$ time ./bench
[1 1 2 3 3 4 5 12 52 92 424 4124]
22.757741ms
real 0m0.024s
user 0m0.023s
sys 0m0.000s
NodeJS 0.10.35
$ time node bench.js
[ 1, 1, 2, 3, 3, 4, 5, 12, 52, 92, 424, 4124 ]
42
real 0m0.065s
user 0m0.060s
sys 0m0.003s
From what we could see (run duration - actual duration):
Go: 22ms - 24ms
NodeJS: 42ms - 65ms
Go (with compile duration included): 23ms - 191ms
HHVM: 139ms - 414ms
PHP: 2371ms - 2383ms
From what we could see (run duration - actual duration):
Go: 22ms - 24ms
NodeJS: 42ms - 65ms
Go (with compile duration included): 23ms - 191ms
HHVM: 139ms - 414ms
PHP: 2371ms - 2383ms
2015-01-21
Ruby 2.1.5 vs 2.2.0 Performance Benchmark
Ruby 2.2.0 released last christmas (yea, I give you my heart~), and I ain't got any news about it (thanks to Google Alert) -_-|||, ok then let's do the benchmark, yay! :3
So, as we could see, ruby-2.2.0 does bring new performance increase, especially in Socket and GCs.
Benchmark File | Input Size | ruby-2.1.5 p273 | ruby-2.2.0 p0 | Performance Increase |
macro-benchmarks/bm_gzip.rb | 100 | 5.441800787 | 5.529919291 | -1.62% |
macro-benchmarks/bm_hilbert_matrix.rb | 10 | 0.001506644 | 0.001508498 | -0.12% |
macro-benchmarks/bm_hilbert_matrix.rb | 20 | 0.018034514 | 0.019916750 | -10.44% |
macro-benchmarks/bm_hilbert_matrix.rb | 30 | 0.084065866 | 0.088421611 | -5.18% |
macro-benchmarks/bm_hilbert_matrix.rb | 40 | 0.243368739 | 0.257345416 | -5.74% |
macro-benchmarks/bm_hilbert_matrix.rb | 50 | 0.656672931 | 0.665158535 | -1.29% |
macro-benchmarks/bm_hilbert_matrix.rb | 60 | 1.483352925 | 1.499879964 | -1.11% |
macro-benchmarks/bm_mpart.rb | 300 | 0.021045194 | 0.018580623 | 11.71% |
macro-benchmarks/bm_norvig_spelling.rb | 50 | 4.105601359 | 3.838181519 | 6.51% |
macro-benchmarks/bm_parse_log.rb | 100 | 0.524311824 | 0.534461600 | -1.94% |
macro-benchmarks/bm_rcs.rb | 100 | 0.167349193 | 0.167786871 | -0.26% |
macro-benchmarks/bm_sudoku.rb | 1 | 1.378527192 | 1.309769003 | 4.99% |
micro-benchmarks/bm_app_factorial.rb | 5000 | 0.018377852 | 0.016312503 | 11.24% |
micro-benchmarks/bm_app_fib.rb | 30 | 0.154745789 | 0.145133262 | 6.21% |
micro-benchmarks/bm_app_fib.rb | 35 | 1.723013699 | 1.651199102 | 4.17% |
micro-benchmarks/bm_app_mandelbrot.rb | 1 | 0.150893967 | 0.148029544 | 1.90% |
micro-benchmarks/bm_app_pentomino.rb | 1 | 20.916050147 | 20.219321355 | 3.33% |
micro-benchmarks/bm_app_tak.rb | 7 | 0.114274899 | 0.108184189 | 5.33% |
micro-benchmarks/bm_app_tak.rb | 8 | 0.332602074 | 0.311949329 | 6.21% |
micro-benchmarks/bm_app_tak.rb | 9 | 0.892592296 | 0.813961797 | 8.81% |
micro-benchmarks/bm_app_tarai.rb | 3 | 0.418669632 | 0.377954191 | 9.72% |
micro-benchmarks/bm_app_tarai.rb | 4 | 0.501545550 | 0.446574377 | 10.96% |
micro-benchmarks/bm_app_tarai.rb | 5 | 0.570404066 | 0.546178530 | 4.25% |
micro-benchmarks/bm_binary_trees.rb | 1 | 7.609491917 | 7.742826158 | -1.75% |
micro-benchmarks/bm_cal.rb | 500 | 0.041128382 | 0.040977642 | 0.37% |
micro-benchmarks/bm_count_multithreaded.rb | 1 | 0.004324664 | 0.005010469 | -15.86% |
micro-benchmarks/bm_count_multithreaded.rb | 2 | 0.009148898 | 0.010368676 | -13.33% |
micro-benchmarks/bm_count_multithreaded.rb | 4 | 0.021764523 | 0.019279018 | 11.42% |
micro-benchmarks/bm_count_multithreaded.rb | 8 | 0.045679936 | 0.041301824 | 9.58% |
micro-benchmarks/bm_count_multithreaded.rb | 16 | 0.079268071 | 0.091182084 | -15.03% |
micro-benchmarks/bm_count_shared_thread.rb | 1 | 0.040428474 | 0.042236155 | -4.47% |
micro-benchmarks/bm_count_shared_thread.rb | 2 | 0.045402622 | 0.043319078 | 4.59% |
micro-benchmarks/bm_count_shared_thread.rb | 4 | 0.046338546 | 0.045935778 | 0.87% |
micro-benchmarks/bm_count_shared_thread.rb | 8 | 0.059489594 | 0.052797231 | 11.25% |
micro-benchmarks/bm_count_shared_thread.rb | 16 | 0.055508897 | 0.062376136 | -12.37% |
micro-benchmarks/bm_dirp.rb | 10000 | 1.041128339 | 1.069316017 | -2.71% |
micro-benchmarks/bm_eval.rb | 1000000 | 6.485932626 | 6.937864716 | -6.97% |
micro-benchmarks/bm_fannkuch.rb | 6 | 0.001213466 | 0.001289357 | -6.25% |
micro-benchmarks/bm_fannkuch.rb | 8 | 0.091404702 | 0.095585812 | -4.57% |
micro-benchmarks/bm_fannkuch.rb | 10 | 11.697475867 | 12.100328001 | -3.44% |
micro-benchmarks/bm_fasta.rb | 1000000 | 8.576651335 | 8.308239976 | 3.13% |
micro-benchmarks/bm_ffi_printf.rb | 100000 | 0.516967724 | LoadError | |
micro-benchmarks/bm_fiber_ring.rb | 10 | 0.000308344 | 0.000332843 | -7.95% |
micro-benchmarks/bm_fiber_ring.rb | 100 | 0.013368638 | 0.011803480 | 11.71% |
micro-benchmarks/bm_fiber_ring.rb | 1000 | 1.942049853 | 1.503775298 | 22.57% |
micro-benchmarks/bm_fractal.rb | 5 | 0.589704152 | 0.539340453 | 8.54% |
micro-benchmarks/bm_gc_array.rb | 1 | 13.646902038 | 11.945846656 | 12.46% |
micro-benchmarks/bm_gc_mb.rb | 500000 | 0.138416534 | 0.099156060 | 28.36% |
micro-benchmarks/bm_gc_mb.rb | 1000000 | 0.245671563 | 0.181434410 | 26.15% |
micro-benchmarks/bm_gc_mb.rb | 3000000 | 0.768378183 | 0.668100862 | 13.05% |
micro-benchmarks/bm_gc_string.rb | 1 | 2.618494607 | 2.556772318 | 2.36% |
micro-benchmarks/bm_knucleotide.rb | 1 | 0.523677831 | 0.517581241 | 1.16% |
micro-benchmarks/bm_list.rb | 1000 | 0.024172078 | 0.019861599 | 17.83% |
micro-benchmarks/bm_list.rb | 10000 | 0.906083771 | 0.869684240 | 4.02% |
micro-benchmarks/bm_lucas_lehmer.rb | 9689 | 0.523225807 | 0.518668121 | 0.87% |
micro-benchmarks/bm_lucas_lehmer.rb | 9941 | 0.562339636 | 0.553851605 | 1.51% |
micro-benchmarks/bm_lucas_lehmer.rb | 11213 | 0.750138474 | 0.736377808 | 1.83% |
micro-benchmarks/bm_lucas_lehmer.rb | 19937 | 2.921622909 | 2.917808062 | 0.13% |
micro-benchmarks/bm_mandelbrot.rb | 1 | 7.568357616 | 7.282273656 | 3.78% |
micro-benchmarks/bm_mbari_bogus1.rb | 1 | 0.019700559 | 0.020653362 | -4.84% |
micro-benchmarks/bm_mergesort.rb | 1 | 0.429642351 | 0.404129411 | 5.94% |
micro-benchmarks/bm_mergesort_hongli.rb | 3000 | 0.676850926 | 0.673826003 | 0.45% |
micro-benchmarks/bm_meteor_contest.rb | 1 | 3.805291502 | 3.630069959 | 4.60% |
micro-benchmarks/bm_monte_carlo_pi.rb | 10000000 | 2.359449302 | 2.196967268 | 6.89% |
micro-benchmarks/bm_nbody.rb | 100000 | 0.898108469 | 0.897828268 | 0.03% |
micro-benchmarks/bm_nsieve.rb | 9 | 2.177539100 | 2.056355906 | 5.57% |
micro-benchmarks/bm_observ.rb | 100000 | 0.300629705 | 0.268134252 | 10.81% |
micro-benchmarks/bm_open_many_files.rb | 50000 | 0.234240497 | 0.200070046 | 14.59% |
micro-benchmarks/bm_partial_sums.rb | 2500000 | 3.175624789 | 2.966665119 | 6.58% |
micro-benchmarks/bm_pathname.rb | 100 | 28.866053656 | 29.000798768 | -0.47% |
micro-benchmarks/bm_pi.rb | 1000 | 0.031816670 | 0.028837649 | 9.36% |
micro-benchmarks/bm_pi.rb | 10000 | 2.793874224 | 2.728208813 | 2.35% |
micro-benchmarks/bm_primes.rb | 3000 | 0.000884100 | 0.000834377 | 5.62% |
micro-benchmarks/bm_primes.rb | 30000 | 0.007950027 | 0.008346128 | -4.98% |
micro-benchmarks/bm_primes.rb | 300000 | 0.077019855 | 0.078940931 | -2.49% |
micro-benchmarks/bm_primes.rb | 3000000 | 0.774730695 | 0.765232657 | 1.23% |
micro-benchmarks/bm_quicksort.rb | 1 | 0.975718723 | 1.005529795 | -3.06% |
micro-benchmarks/bm_read_large.rb | 100 | 1.445822904 | 1.433691285 | 0.84% |
micro-benchmarks/bm_regex_dna.rb | 20 | 1.664740585 | 1.492710180 | 10.33% |
micro-benchmarks/bm_reverse_complement.rb | 1 | 2.260494686 | 2.219066674 | 1.83% |
micro-benchmarks/bm_simple_connect.rb | 1 | 0.000132251 | 0.000116141 | 12.18% |
micro-benchmarks/bm_simple_connect.rb | 100 | 0.003311302 | 0.003090082 | 6.68% |
micro-benchmarks/bm_simple_connect.rb | 500 | 0.016338940 | 0.015020540 | 8.07% |
micro-benchmarks/bm_simple_server.rb | 1 | 0.000133503 | 0.000155542 | -16.51% |
micro-benchmarks/bm_simple_server.rb | 100 | 0.000969707 | 0.000846978 | 12.66% |
micro-benchmarks/bm_simple_server.rb | 100000 | 0.729301984 | 0.658444843 | 9.72% |
micro-benchmarks/bm_so_ackermann.rb | 7 | 0.040541865 | 0.038593147 | 4.81% |
micro-benchmarks/bm_so_ackermann.rb | 9 | 0.649686458 | 0.608577279 | 6.33% |
micro-benchmarks/bm_so_array.rb | 9000 | 0.982821222 | 0.947042115 | 3.64% |
micro-benchmarks/bm_so_count_words.rb | 100 | 1.870193048 | 1.748334946 | 6.52% |
micro-benchmarks/bm_so_exception.rb | 500000 | 1.238312823 | 1.149542351 | 7.17% |
micro-benchmarks/bm_so_lists.rb | 1000 | 2.198686097 | 2.114709163 | 3.82% |
micro-benchmarks/bm_so_lists_small.rb | 1000 | 0.453313595 | 0.432864767 | 4.51% |
micro-benchmarks/bm_so_matrix.rb | 60 | 0.310554994 | 0.308662303 | 0.61% |
micro-benchmarks/bm_so_object.rb | 500000 | 0.244054583 | 0.254701496 | -4.36% |
micro-benchmarks/bm_so_object.rb | 1000000 | 0.510160533 | 0.523120809 | -2.54% |
micro-benchmarks/bm_so_object.rb | 1500000 | 0.786008195 | 0.787754481 | -0.22% |
micro-benchmarks/bm_so_sieve.rb | 4000 | 4.926280142 | 4.693643955 | 4.72% |
micro-benchmarks/bm_socket_transfer_1mb.rb | 10000 | 0.138346571 | 0.077399967 | 44.05% |
micro-benchmarks/bm_socket_transfer_1mb.rb | 1000000 | 0.138263657 | 0.078234789 | 43.42% |
micro-benchmarks/bm_socket_transfer_1mb_noblock.rb | 10000 | 0.144179379 | 0.078645168 | 45.45% |
micro-benchmarks/bm_socket_transfer_1mb_noblock.rb | 1000000 | 0.136524679 | 0.076419931 | 44.02% |
micro-benchmarks/bm_spectral_norm.rb | 100 | 0.087557986 | 0.086901845 | 0.75% |
micro-benchmarks/bm_string_concat.rb | 10000000 | 1.572824759 | 1.396739485 | 11.20% |
micro-benchmarks/bm_sum_file.rb | 100 | 2.483852444 | 2.512043986 | -1.13% |
micro-benchmarks/bm_word_anagrams.rb | 1 | 2.299233762 | 2.214820932 | 3.67% |
micro-benchmarks/bm_write_large.rb | 100 | 0.094776340 | 0.085387468 | 9.91% |
rdoc/bm_rdoc_against_itself_darkfish.rb | 1 | 5.156284378 | 4.492923306 | 12.87% |
rdoc/bm_rdoc_against_itself_ri.rb | 1 | 4.397700124 | 4.010792575 | 8.80% |
So, as we could see, ruby-2.2.0 does bring new performance increase, especially in Socket and GCs.
Subscribe to:
Posts
(
Atom
)