
Hello, It just occurred to me that the thread safety problems with std::string are caused by the fact that the library is trying to decode the user intention from what methods he calls, instead of letting him declare it, and check at compile time that he is complying with the declared intention. I'm thinking to a solution in which the user has the choice of an immutable string, that behaves much like a ref counted "const char *" (or a String in java), and a mutable string as Java's StringBuilder. Something similar has been proposed also during the discussion on SuperString, and now it founds a further motivation for thread safety reasons. For performance reasons (that is involved here, otherwise a basic deep copy implementation would suffice to solve all problems), I think a third usage type can be added: temporary strings, that are used to mimic move semantics, until they are introduced into the language. Now some sketched code, to explain better what I mean (I leaved out a lot of details as templated charT, charTraits and allocator): namespace boost { namespace strings { namespace detail { class repr {}; } // the moveable, temporary string // invariant: rep has only one ref class temp_string { mutable intrusive_ptr<repr> rep; private: // used only by other two classes in release() friend class imm_string: friend class string_builder; temp_string(intrusive_ptr<repr> r):rep(r) {} public: temp_string(const temp_string& t):rep(t.take()) {} // move semantics temp_string(const string_builder&r):rep(r.rep->clone()) {} // always deep copy builders temp_string(const imm_string&r):rep(r.rep->clone()) {} // deep copy here // this is called when the ownership of the rep is going to be taken intrusive_ptr<repr> take() const { intrusive_ptr<repr> r=rep; rep.reset(); return r; } // all mutable operations that need to be chained are defined here temp_string& append(const temp_string&); temp_string& append(const imm_string&); temp_string& append(const string_builder&); }; // all free functions and free operators return temp_string template<typename S1,typename S2> temp_string operator +(const S1& a, const S2& b) { temp_string r(a), r.append(b); return r; } // the immutable string behaves as const char * // the content cannot be changed, but you can assign a new content to it. // release() can be used when we want cast from a string type to an other, // destroying the source (it is a move-semantic cast). // otherwise cast is implicit, but does deep copy class imm_string { friend temp_string; intrusive_ptr<repr> rep; public: imm_string(const temp_string& t):rep(t.take()) {} // move temp->imm // autogenerated copy constructor and operator= do shallow copy char operator[](unsigned i) { return rep->at(i); } temp_string release() { // see comment above the class declaration intrusive_ptr<repr> t=rep; rep.reset(); if(t.refs()==1) return temp_string(t); return temp_string(t->clone()); } }; // the string builder behaves as vector<char> // the content can be changed, but you can assign a new content to it // release() can be used when we want cast from a string type to an other, // destroying the source (it is a move-semantic cast). class string_builder { friend temp_string; intrusive_ptr<repr> rep; public: string_builder(const temp_string& t):rep(t.take()) {} // move temp-> builder string_builder(const string_builder&r):rep(r.rep->clone()) {} // always deep copy builders string_builder& operator=(const temp_string& t) { rep=t.take(); } string_builder& operator=(const string_builder& t) { if(rep->capacity()>t.size()) rep->copy(t.rep); else rep=t.rep->clone(); return *this; } // non chainable mutating operators are defined here char& operator[](unsigned i) { return rep->at(i); } temp_string release() { // see comment above the class declaration intrusive_ptr<repr> t=rep; rep.reset(); // we can assume as invariant that string_builder always owns the rep (refcount==1) return temp_string(t); } // mutating operators and methods are defined in terms of temp_string template<typename S2> string_builder& operator+=(const S2& r) { temp_string t(release()); this->operator=(t+r); } template<typename S2> string_builder&append(const S2& r) { temp_string t(release()); t.append(r); this->operator=(t); } }; }} This scheme addresses the performance problems noted in http://www.sgi.com/tech/stl/string_discussion.html for reference counted strings with unshareable state (like the g++ implementation), because now shareable/unshareable state is assigned by the user at compile time, and can be explicitly changed. In this way the user will know what to expect from the performance point of view, and the semantics will be (to my eyes) more clear. Corrado -- __________________________________________________________________________ dott. Corrado Zoccolo mailto: czoccolo (at) gmail.com PhD - Department of Computer Science - University of Pisa, Italy --------------------------------------------------------------------------