
Michael Kochetkov wrote:
I have 6 years old Xeon 5130 and for your original code (with zeros):
The numbers I cited are for id1 random, id2 equal to id1. boost::uuids::uuid id1 = boost::uuids::random_generator()(), id2 = id1; The random id1 avoids the smartness to keep a zero in a register, and id2 being equal to it at the start leads to a predictable nonzero number of equalities (48830 out of the 100M total). This is what I get with 2010: operator== 48830: 410 Eq16 48830: 299 memcmp 48830: 400 (I changed it to s += memcmp( id1.data, id2.data, 16 ) == 0; to match the others. The original gave something: 356. 2010 is smart enough here to avoid the trailing byte loop; it sees that the length is divisible by 16 and only does DWORD compares.) my_memcmp 48830: 272 There you go. CPUs are odd beasts. I tried, just for the fun of it, the totally unoptimized by hand template<class It1, class It2> inline bool my_equal( It1 first, It1 last, It2 first2 ) { for( ; first != last; ++first, ++first2 ) { if( *first != *first2 ) return false; } return true; } and then s += my_equal( id1.begin(), id1.end(), id2.begin() ); and what do you think? 48830: 274 Go figure.
2. the investigation of opinions of boost people (I have made another inquiry in "[boost] [function] The cost of boost::function" thread) shows that boost still is not ready for production usage.
Don't use it in production then. I do, and it works well for me. Already has for ten years or so. Appendix A, program: #include <boost/uuid/uuid.hpp> #include <boost/uuid/random_generator.hpp> #include <iostream> #include <windows.h> #include <mmsystem.h> #pragma comment( lib, "winmm.lib" ) typedef unsigned uint32_t; inline bool Eq16( unsigned char const * p, unsigned char const * q ) { return *reinterpret_cast<const uint32_t*>( p ) == *reinterpret_cast<const uint32_t*>( q ) && *reinterpret_cast<const uint32_t*>( p+4 ) == *reinterpret_cast<const uint32_t*>( q+4 ) && *reinterpret_cast<const uint32_t*>( p+8 ) == *reinterpret_cast<const uint32_t*>( q+8 ) && *reinterpret_cast<const uint32_t*>( p+12) == *reinterpret_cast<const uint32_t*>( q+12); } inline bool my_memcmp( unsigned char const * p, unsigned char const * q, size_t n ) { for( size_t i = 0; i < n; ++i ) { if( p[i] != q[i] ) return false; } return true; } template<class It1, class It2> inline bool my_equal( It1 first, It1 last, It2 first2 ) { for( ; first != last; ++first, ++first2 ) { if( *first != *first2 ) return false; } return true; } int main() { boost::uuids::uuid id1 = boost::uuids::random_generator()(), id2 = id1; int const N = 100000000; DWORD t1 = timeGetTime(); int s = 0; for( int i = 0; i < N; ++i ) { //s += ( id1 == id2 ); //s += Eq16( id1.data, id2.data ); //s += memcmp( id1.data, id2.data, 16 ) == 0; //s += my_memcmp( id1.data, id2.data, 16 ); s += my_equal( id1.begin(), id1.end(), id2.begin() ); id2.data[ i % 16 ] += i & 0xFF; } DWORD t2 = timeGetTime(); std::cout << s << ": " << t2 - t1 << std::endl; }