
Here is the C++ pseudo-code: namespace atomic { template<typename T> class var; template<typename T> class once; namespace mb { enum naked_e { naked }; enum fence_e { fence }; enum release_e { release }; enum depends_e { depends }; enum acquire_depends_e { acquire_depends }; } namespace externlib_api { class hashed_mutex; template<typename T> extern T load(T volatile*, mb::naked_e) const throw(); template<typename T> extern T load(T volatile*, mb::depends_e) const throw(); template<typename T> extern void store(T volatile*, T const&, mb::release_e) throw(); template<typename T> extern void store(T volatile*, T const&, mb::naked_e) throw(); template<typename T> extern bool cas(T volatile*, T const&, T const&, mb::acquire_depends_e) throw(); template<typename T> extern bool cas(T volatile*, T const&, T const&, mb::fence_e) throw(); } template<typename T> class var { mutable T volatile m_state; public: var() throw() {} var(T const &state) throw() : m_state(state) {} public: inline T load(mb::depends_e) const throw() { return externlib_api::load(&m_state, mb::depends); } inline T load(mb::naked_e) const throw() { return externlib_api::load(&m_state, mb::naked); } public: inline void store(T const &xchg, mb::fence_e) throw() { externlib_api::store(&m_state, xchg, mb::fence); } inline void store(T const &xchg, mb::naked_e) throw() { externlib_api::store(&m_state, xchg, mb::naked); } public: inline bool cas(T const &cmp, T const &xchg, mb::fence_e) throw() { return externlib_api::cas(&m_state, cmp, xchg, mb::fence); } inline bool cas(T const &cmp, T const &xchg, mb::acquire_depends_e) throw() { return externlib_api::cas(&m_state, cmp, xchg, mb::acquire_depends); } }; template<typename T> class once { var<T*> m_state; var<intword_t> m_count; public: once() throw() : m_state(0), m_count(1) {} ~once() throw() { if (try_dec()) { try { delete local; } catch(...) { assert(false); throw; } } } private: bool try_dec() throw() { intword_t local; do { local = m_count.load(mb::naked); if (! local || local < 1) { return false; } // use mb::fence to cover *both acquire and release // wrt the result of the decrement } while(! m_count.cas(local, local - 1, mb::fence)); return (local == 1); } bool try_inc() throw() { intword_t local; do { local = m_count.load(mb::naked); if (! local || local < 1) { return false; } } while(! m_count.cas(local, local + 1, mb::acquire_depends)); return true; } public: // atomic load / thread-saftey: strong T* load() const { T *local = m_state.load(mb::depends); if (! local) { externlib_api::hashed_mutex::guard_t const &lock(this); if (! try_inc()) { return 0; } local = m_state.load(mb::naked); if (! local) { // call try_dec on exceptions... local = new T; m_state.store(local, mb::release); } } else if(! try_inc()) { return 0; } return local; } // dec the refcount void dec() { if (try_dec()) { delete local; } } }; } okay, here is sample usage now: static atomic::once<foo> g_foo; void some_threads(...) { for(;;) { // whatever... foo *myfoo = g_foo.load(); if (myfoo) { // use myfoo... // okay, we are finished with myfoo g_foo.dec(); } } } This should be a workable solution to the static initialization problem... What do you all think?