
Interestingly, if I try the following program, the "stupid" my_memcmp produces the fastest result. #include <boost/uuid/uuid.hpp> #include <iostream> #include <windows.h> #include <mmsystem.h> typedef unsigned uint32_t; inline bool Eq16( unsigned char const * p, unsigned char const * q ) { return *reinterpret_cast<const uint32_t*>( p ) == *reinterpret_cast<const uint32_t*>( q ) && *reinterpret_cast<const uint32_t*>( p+4 ) == *reinterpret_cast<const uint32_t*>( q+4 ) && *reinterpret_cast<const uint32_t*>( p+8 ) == *reinterpret_cast<const uint32_t*>( q+8 ) && *reinterpret_cast<const uint32_t*>( p+12) == *reinterpret_cast<const uint32_t*>( q+12); } inline bool my_memcmp( unsigned char const * p, unsigned char const * q, size_t n ) { for( size_t i = 0; i < n; ++i ) { if( p[i] != q[i] ) return false; } return true; } int main() { boost::uuids::uuid id1 = {}, id2 = {}; int const N = 100000000; DWORD t1 = timeGetTime(); int s = 0; for( int i = 0; i < N; ++i ) { //s += ( id1 == id2 ); //s += Eq16( id1.data, id2.data ); //s += memcmp( id1.data, id2.data, 16 ); s += my_memcmp( id1.data, id2.data, 16 ); id2.data[ i % 16 ] += i & 0xFF; } DWORD t2 = timeGetTime(); std::cout << s << ": " << t2 - t1 << std::endl; }