
I'm working on yet another Unicode string class/library from another set of features and requirements. * It is designed around the codepoint concept. * It uses (currently forward-) iterators for encoding and decoding. * It has a minimal interface, mostly constructors and iterator access. * Most other functions can (hopefully) be free functions. * It uses basic_string as backend. * It has fast access to underlying basic_string. * It is (currently) using some C++0X features (mainly decltype). * It is (currently) immutable and shares data, and thus fast to copy. Some of these features and requirements may be unacceptable to some of you, but I'm open to suggestions and comments. // Copyright (c) 2011 Anders Dalvander. // // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) template <typename encoding> class basic_text { public: typedef encoding encoding_type; typedef typename encoding_type::codeunit_type codeunit_type; typedef typename encoding_type::codepoint_type codepoint_type; typedef std::basic_string<codeunit_type> string_type; typedef typename string_type::const_iterator codeunit_iterator; typedef typename encoding_type::decode_iterator<codeunit_iterator> codepoint_iterator; typedef codepoint_iterator const_iterator; typedef codepoint_iterator iterator; basic_text() : s(std::make_shared<string_type>()) { } template <typename other_encoding> basic_text(const basic_text<other_encoding>& text) : s(std::make_shared<string_type>( encoding_type::encode_iterator<decltype(std::begin(text))> (std::begin(text), std::begin(text), std::end(text)), encoding_type::encode_iterator<decltype(std::begin(text))> (std::end(text), std::begin(text), std::end(text)))) { } // TODO: Use some default_encoding traits type. template <typename container> explicit basic_text(const container& c) : s(std::make_shared<string_type>( encoding_type::encode_iterator<decltype(std::begin(c))> (std::begin(c), std::begin(c), std::end(c)), encoding_type::encode_iterator<decltype(std::begin(c))> (std::end(c), std::begin(c), std::end(c)))) { } template <typename codepoint_iterator> basic_text(codepoint_iterator first, codepoint_iterator last) : s(std::make_shared<string_type>( encoding_type::encode_iterator<codepoint_iterator> (first, first, last), encoding_type::encode_iterator<codepoint_iterator> (last, first, last))) { } codepoint_iterator begin() const { return codepoint_iterator (codeunit_begin(), codeunit_begin(), codeunit_end()); } codepoint_iterator end() const { return codepoint_iterator (codeunit_end(), codeunit_begin(), codeunit_end()); } codeunit_iterator codeunit_begin() const { return std::begin(*s); } codeunit_iterator codeunit_end() const { return std::end(*s); } const string_type& str() const { return *s; } const codeunit_type* c_str() const { return s->c_str(); } private: typedef std::shared_ptr<const string_type> pointer_type; pointer_type s; }; typedef undefined-type utf8_encoding; typedef basic_text<utf8_encoding> u8text; typedef undefined-type utf16_encoding; typedef basic_text<utf16_encoding> u16text; typedef undefined-type utf32_encoding; typedef basic_text<utf32_encoding> u32text; typedef undefined-type wchar_encoding; typedef basic_text<wchar_encoding> wtext; typedef undefined-type ascii_encoding; typedef basic_text<ascii_encoding> ascii_text; Usage: int main() { const uint32_t cps[] = {0x41,0x42,0x80,0x800,0x10000,0x10ffff}; // construct from codepoint range u8text u8txt(std::begin(cps), std::end(cps)); // construct from encoded container, // currently treats each element as a codepoint u8text u8txt2("test"); // sharing is caring u8text u8txt3 = u8txt; // construct from codepoint range u16text u16txt(std::begin(cps), std::end(cps)); // construct from text, transcodes range u16text u16txt2 = u8txt; // construct from text, transcodes range u32text u32txt = u8txt; // using policy (possible extension) ascii_text ascii(u8txt, replace_policy(0xff)); } void OpenFileWin32(const u8text& txt) { CloseHandle(CreateFileW(wtext(txt).c_str(), ...)) } typedef undefined-type posix_encoding; typedef basic_text<posix_encoding> posixtext; void OpenFilePosix(const u8text& txt) { close(open(posixtext(txt).c_str(), ...)) } Regards, Anders Dalvander -- WWFSMD?