
Tobias Schwinger <tschwinger <at> isonews2.com> writes:
We actually very much want a preprocessor-generated 'switch' statement because it is a special hint for optimization and most compilers generate very efficient code for it...
BTW, switch_ doesn't implement fall-though and I was worried about performance of this important case (bzero with Duff's device optimization): switch(n % 8) { case 7: buf[6] = 0; case 6: buf[5] = 0; case 5: buf[4] = 0; case 4: buf[3] = 0; case 3: buf[2] = 0; case 2: buf[1] = 0; case 1: buf[0] = 0; } switch_ would generate this code: switch(n % 8) { case 7: buf[6] = 0; buf[5] = 0; buf[4] = 0; buf[3] = 0; buf[2] = 0; buf[1] = 0; buf[0] = 0; break; case 6: buf[5] = 0; buf[4] = 0; buf[3] = 0; buf[2] = 0; buf[1] = 0; buf[0] = 0; break; case 5: buf[4] = 0; buf[3] = 0; buf[2] = 0; buf[1] = 0; buf[0] = 0; break; case 4: buf[3] = 0; buf[2] = 0; buf[1] = 0; buf[0] = 0; break; case 3: buf[2] = 0; buf[1] = 0; buf[0] = 0; break; case 2: buf[1] = 0; buf[0] = 0; break; case 1: buf[0] = 0; break; default: break; } Below is a program that demonstates a difference of assembly code between hand- crafted switch and the switch_. The are identical on gcc 3.4.6 x86_64. #include <iostream> #include "switch.hpp" #include <boost/mpl/integral_c.hpp> #include <boost/mpl/range_c.hpp> #include <boost/mpl/vector.hpp> void classic_duff(char* buf, int n) { switch(n % 8) { case 7: buf[6] = 0; case 6: buf[5] = 0; case 5: buf[4] = 0; case 4: buf[3] = 0; case 3: buf[2] = 0; case 2: buf[1] = 0; case 1: buf[0] = 0; } // ... } template<int N> struct duff_step; template<int N> struct duff_step { static void step(char* buf) { buf[N-1] = 0; duff_step<N-1>::step(buf); } }; template<> struct duff_step<0> { static void step(char* buf) { } }; struct duff_case { char* buf; duff_case(char* buf) : buf(buf) {} typedef void result_type; template<class Case> void operator()(Case) const { duff_step<Case::value>::step(buf); } }; struct ignore { template<class Int> void operator()(Int) const {} }; template<int Mod> void modern_duff(char* buf, int n) { using namespace boost; switch_< mpl::range_c<int,0,Mod> >(n % Mod, duff_case(buf), ignore()); // ... } int main(int argc, char* argv[]) { using namespace boost; char buf1[7] = { 1, 1, 1, 1, 1, 1, 1 }; modern_duff<8>(buf1, 7); for(int i = 0; i < 8; ++i) std::cout << static_cast<int>(buf1[i]) << ", "; std::cout << '\n'; char buf2[7] = { 1, 1, 1, 1, 1, 1, 1 }; classic_duff(buf2, 7); for(int i = 0; i < 8; ++i) std::cout << static_cast<int>(buf2[i]) << ", "; std::cout << '\n'; } 0000000000400820 <classic_duff(char*, int)>: 400820: 8d 46 07 lea 0x7(%rsi),%eax 400823: 83 fe ff cmp $0xffffffffffffffff,%esi 400826: 0f 4f c6 cmovg %esi,%eax 400829: 83 e0 f8 and $0xfffffffffffffff8,%eax 40082c: 29 c6 sub %eax,%esi 40082e: 83 fe 07 cmp $0x7,%esi 400831: 77 24 ja 400857 <classic_duff(char*, int) +0x37> 400833: 89 f0 mov %esi,%eax 400835: ff 24 c5 50 0b 40 00 jmpq *0x400b50(,%rax,8) 40083c: c6 47 06 00 movb $0x0,0x6(%rdi) 400840: c6 47 05 00 movb $0x0,0x5(%rdi) 400844: c6 47 04 00 movb $0x0,0x4(%rdi) 400848: c6 47 03 00 movb $0x0,0x3(%rdi) 40084c: c6 47 02 00 movb $0x0,0x2(%rdi) 400850: c6 47 01 00 movb $0x0,0x1(%rdi) 400854: c6 07 00 movb $0x0,(%rdi) 400857: f3 c3 repz retq 400859: 90 nop 40085a: 66 data16 40085b: 66 data16 40085c: 90 nop 40085d: 66 data16 40085e: 66 data16 40085f: 90 nop 0000000000400a00 <void modern_duff<8>(char*, int)>: 400a00: 8d 46 07 lea 0x7(%rsi),%eax 400a03: 83 fe ff cmp $0xffffffffffffffff,%esi 400a06: 0f 4f c6 cmovg %esi,%eax 400a09: 83 e0 f8 and $0xfffffffffffffff8,%eax 400a0c: 29 c6 sub %eax,%esi 400a0e: 83 fe 07 cmp $0x7,%esi 400a11: 77 24 ja 400a37 <void modern_duff<8> (char*, int)+0x37> 400a13: 89 f0 mov %esi,%eax 400a15: ff 24 c5 90 0b 40 00 jmpq *0x400b90(,%rax,8) 400a1c: c6 47 06 00 movb $0x0,0x6(%rdi) 400a20: c6 47 05 00 movb $0x0,0x5(%rdi) 400a24: c6 47 04 00 movb $0x0,0x4(%rdi) 400a28: c6 47 03 00 movb $0x0,0x3(%rdi) 400a2c: c6 47 02 00 movb $0x0,0x2(%rdi) 400a30: c6 47 01 00 movb $0x0,0x1(%rdi) 400a34: c6 07 00 movb $0x0,(%rdi) 400a37: f3 c3 repz retq 400a39: c6 47 05 00 movb $0x0,0x5(%rdi) 400a3d: c6 47 04 00 movb $0x0,0x4(%rdi) 400a41: c6 47 03 00 movb $0x0,0x3(%rdi) 400a45: c6 47 02 00 movb $0x0,0x2(%rdi) 400a49: c6 47 01 00 movb $0x0,0x1(%rdi) 400a4d: c6 07 00 movb $0x0,(%rdi) 400a50: c3 retq 400a51: 90 nop 400a52: 90 nop 400a53: 90 nop 400a54: 90 nop 400a55: 90 nop 400a56: 90 nop 400a57: 90 nop 400a58: 90 nop 400a59: 90 nop 400a5a: 90 nop 400a5b: 90 nop 400a5c: 90 nop 400a5d: 90 nop 400a5e: 90 nop 400a5f: 90 nop