Code:
jkb@seq3a[compress.../entropy_...] ./entropy_bench -b64 -i3 100m
entropy_benchmark 0.2 (64-bit Linux) (c) Dell Inc. Written by P.Skibinski
memcpy [chunk_size] [95 MB] 27 ms (3616 MB/s), 100000000, 28 ms (3487 MB/s)
FSE 2014-04-07 [64 KB] 427 ms (228 MB/s), 43042331, 297 ms (328 MB/s)
FSE 2014-04-07 [95 MB] 419 ms (233 MB/s), 43220627, 283 ms (345 MB/s)
ryg_rANS 2014-02-18 [64 KB] 655 ms (149 MB/s), 46070442, 907 ms (107 MB/s)
ryg_rANS 2014-02-18 [95 MB] 645 ms (151 MB/s), 43203696, 889 ms (109 MB/s)
ryg_rANS interleaved [64 KB] 571 ms (171 MB/s), 46075762, 557 ms (175 MB/s)
ryg_rANS interleaved [95 MB] 566 ms (172 MB/s), 43203696, 537 ms (181 MB/s)
ryg_rANS64 2014-02-18 [64 KB] 567 ms (172 MB/s), 46074280, 744 ms (131 MB/s)
ryg_rANS64 2014-02-18 [95 MB] 561 ms (174 MB/s), 43203688, 725 ms (134 MB/s)
ryg_rANS64 interleaved [64 KB] 484 ms (201 MB/s), 46083380, 436 ms (223 MB/s)
ryg_rANS64 interleaved [95 MB] 477 ms (204 MB/s), 43203696, 417 ms (234 MB/s)
ryg_rANS_SIMD [64 KB] 815 ms (119 MB/s), 46108652, 208 ms (469 MB/s)
ryg_rANS_SIMD [95 MB] 814 ms (119 MB/s), 43211504, 201 ms (485 MB/s)
tornado ArithCoder [64 KB] 1392 ms (70 MB/s), 50179429, 1795 ms (54 MB/s)
tornado ArithCoder [95 MB] 1160 ms (84 MB/s), 43263298, 1807 ms (54 MB/s)
tornado HuffCoder [64 KB] 793 ms (123 MB/s), 45212263, 845 ms (115 MB/s)
tornado HuffCoder [95 MB] 789 ms (123 MB/s), 43654432, 849 ms (115 MB/s)
tornado HuffCoder o1 [95 MB] 887 ms (110 MB/s), 34727880, 1478 ms (66 MB/s)
ryg_rANS+jkb-O0 [64 KB] 503 ms (194 MB/s), 43051785, 411 ms (237 MB/s)
ryg_rANS+jkb-O0 [95 MB] 513 ms (190 MB/s), 43216457, 408 ms (239 MB/s)
ryg_rANS+jkb-O1 [64 KB] 867 ms (112 MB/s), 35804852, 669 ms (145 MB/s)
ryg_rANS+jkb-O1 [95 MB] 777 ms (125 MB/s), 33092156, 588 ms (166 MB/s)
sh_arith+jkb-O0 [64 KB] 614 ms (159 MB/s), 43101134, 1001 ms (97 MB/s)
sh_arith+jkb-O0 [95 MB] 642 ms (152 MB/s), 43216872, 1004 ms (97 MB/s)
sh_arith+jkb-O1 [64 KB] 944 ms (103 MB/s), 35987822, 1263 ms (77 MB/s)
sh_arith+jkb-O1 [95 MB] 879 ms (111 MB/s), 33092667, 1198 ms (81 MB/s)
zlibh [64 KB] 449 ms (217 MB/s), 43445698, 482 ms (202 MB/s)
done... (3 iterations)
It appears on this system (Intel(R) Xeon(R) CPU E5-2660 0 @ 2.20GHz) that more interleaving of the 32-bit ryg_rANS code has a considerable improvement still.
Code:
*** entropy_bench.cpp~ Mon Apr 14 16:57:22 2014
--- entropy_bench.cpp Wed Apr 16 17:19:59 2014
***************
*** 30,39 ****
--- 30,43 ----
#include <algorithm> // std::sort
#include <vector>
#include <numeric>
+ #include <unistd.h>
+ #include <sys/time.h>
+ #include <sys/resource.h>
#include "zlibh/zlibh.h"
#include "fse/fse.h"
#include "tornado/tor_test.h"
#include "ryg_rans/ryg_rans.h"
+ #include "rANS_demo-3/rANS_static4.h"
#if defined(WIN32) || defined(_WIN32) || defined(__WIN32__) || defined(WIN64) || defined(_WIN64)
*** tornado/EntropyCoder.cpp~ Thu Apr 10 17:30:32 2014
--- tornado/EntropyCoder.cpp Wed Apr 16 16:26:15 2014
***************
*** 1,3 ****
--- 1,11 ----
+ #if defined(WIN32) || defined(_WIN32) || defined(__WIN32__) || defined(WIN64) || defined(_WIN64)
+ # define WINDOWS
+ #endif
+
+ #ifndef WINDOWS
+ # define __cdecl
+ #endif
+
// Code for various streams and entropy codecs:
// - in/out byte streams
// - in/out bit streams