
[...]
your once<> class has a constructor, so it is therefore dynamically initialized, rather than statically, so there is a potential race condition in calling the constructor.
Here is one without a ctor: template<typename T> class once { var<T*> m_state; var<intword_t> m_count; public: // once() throw() : m_state(0), m_count(0) {} // m_state and m_count must be init to NULL! ~once() throw() { if (try_dec()) { try { delete local; } catch(...) { assert(false); throw; } } } private: bool try_dec() throw() { intword_t local; do { local = m_count.load(mb::naked); if (local < 0) { return false; } // use mb::fence to cover *both acquire and release // wrt the result of the decrement } while(! m_count.cas(local, local - 1, mb::fence)); return (local == -1); } bool try_inc() throw() { intword_t local; do { local = m_count.load(mb::naked); if (local < 0) { return false; } } while(! m_count.cas(local, local + 1, mb::acquire_depends)); return true; } public: // atomic load / thread-saftey: strong T* load() const { T *local = m_state.load(mb::depends); if (! local) { externlib_api::hashed_mutex::guard_t const &lock(this); if (! try_inc()) { return 0; } local = m_state.load(mb::naked); if (! local) { // call try_dec on exceptions... local = new T; m_state.store(local, mb::release); } } else if(! try_inc()) { return 0; } return local; } // dec the refcount void dec() { if (try_dec()) { delete m_state.load(mb::depends); } } }; The destructor runs if m_state is not null and if the refcount drops to -1. I can't really see a problem here...