
On Mon, 17 Jan 2005 09:37:34 +0100, Matthias Troyer <troyer@itp.phys.ethz.ch> wrote:
I'm very interested if this scales well for a larger number of keys, of the order of 10^6 - 10^8 keys. I guess that for those sizes the scaling will already be exponential. Can you tell us what the maximum number of keys is that can be efficiently treated?
Matthias
Howdy, Perfect hashing and statistically reasonable hashing are quite different but practically the same ;-) For 2^128 keys you might like to try something these papers allude to: R. Uzgalis and M. Tong. Hashing myths. Technical Report 97, Department of Computer Science University of Auckland, July 1994. and John Hamer. Hashing Revisited. Proceedings of the 7th annual conference on Innovation and technology in computer science education Aarhus, Denmark SESSION: Algorithms Pages: 80 - 83 Year of Publication: 2002 ISBN:1-58113-499-1 I feel for R. Uzgalis though, as I think of these two papers as Hamer and Tong ;-) Hope this helps, matt. ______________________________ PS: A bit of bad code to show how it works is below, feel free to use it to start something decent if you like: Not sure if the code works or it is a w.i.p. as it is from an odd set of files I have on this machine. Looks about right though. /*------------------------------------------------------------------- created: 2004-4-16 15:58 filename: hash.hpp author: Matt Hurd purpose: ---------------------------------------------------------------------*/ #ifndef hash_hpp_2004416 #define hash_hpp_2004416 #include <boost/random.hpp> #include <vector> namespace net { static unsigned char hash_byte_transform_1[256] = { 129, 186, 126, 139, 50, 173, 232, 9, 144, 73, 174, 153, 203, 110, 206, 254, 150, 83, 108, 109, 249, 55, 231, 62, 115, 191, 101, 105, 252, 237, 49, 182, 64, 37, 13, 46, 152, 239, 42, 18, 247, 195, 86, 98, 102, 215, 227, 200, 65, 246, 80, 118, 250, 207, 214, 210, 149, 8, 241, 85, 243, 107, 199, 188, 202, 59, 162, 25, 103, 204, 43, 168, 221, 197, 53, 253, 234, 19, 20, 15, 242, 7, 56, 190, 233, 14, 79, 4, 163, 29, 170, 194, 184, 72, 240, 16, 45, 176, 31, 196, 238, 235, 140, 217, 93, 39, 143, 164, 40, 78, 99, 17, 212, 216, 166, 171, 167, 142, 224, 97, 169, 75, 48, 159, 180, 141, 209, 104, 28, 116, 44, 255, 47, 156, 77, 58, 91, 181, 245, 87, 0, 160, 133, 111, 26, 6, 230, 95, 70, 161, 137, 89, 68, 172, 244, 90, 1, 223, 81, 33, 67, 30, 132, 82, 236, 100, 138, 36, 183, 251, 165, 179, 60, 185, 66, 123, 61, 218, 125, 189, 3, 178, 92, 35, 34, 130, 69, 198, 177, 32, 63, 208, 21, 94, 193, 134, 71, 10, 154, 96, 128, 157, 54, 187, 120, 192, 145, 41, 11, 225, 57, 52, 131, 23, 84, 222, 74, 151, 127, 27, 5, 219, 228, 201, 113, 76, 38, 136, 2, 88, 146, 106, 22, 147, 213, 205, 114, 226, 229, 24, 175, 122, 124, 135, 121, 158, 220, 12, 211, 117, 112, 51, 148, 248, 119, 155 }; class hash { public: static unsigned char transform( unsigned char val) { return hash_byte_transform_1[val]; } template< typename T > static std::size_t transform( const T& val_a ) { union unholy { T val_; unsigned char byte_[sizeof(T)]; }; const unholy * t = reinterpret_cast<const unholy*>(&val_a); std::size_t result = 123456789; result ^= hash_byte_transform_1[t->byte_[0]]; for (int i = 1; i < sizeof(T); ++i) { result = (result << 8) | (result >> (8 * (sizeof(std::size_t) - 1))); result ^= hash_byte_transform_1[t->byte_[i]]; } return result; } template <> static std::size_t transform( const std::string& val ) { const int size = val.length(); std::size_t result = 123456789; if ( size > 0 ) { std::string::const_iterator i = val.begin(); std::string::const_iterator i_end = val.end(); result ^= hash_byte_transform_1[ unsigned char(*i) ]; for ( ;i != i_end; ++i) { result = (result << 8) | (result >> (8 * (sizeof(std::size_t) - 1))); result ^= hash_byte_transform_1[ unsigned char(*i) ]; } } return result; } static void prepare_random_list() { boost::mt19937 rng; rng.seed( 42 ); // the base to the meaning of life is chance boost::uniform_int<> byte_range(0,255); boost::variate_generator<boost::mt19937, boost::uniform_int<>
die(rng, byte_range);
std::vector<int> v; v.resize(256); // initialise for (std::size_t i = 0; i < 256; ++i) { v[i] = i; } // shuffle for (std::size_t i = 0; i < 256; ++i) { std::swap( v[i], v[255 - (die() % (256 - i))] ); } // output results for pasting for (std::size_t i = 0; i < 256; ++i) { std::cout << v[i] << ", "; } std::cout << "\n"; } }; } // namespace #endif