utf8_codecvt.hpp 1.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556
  1. //
  2. // Copyright (c) 2015 Artyom Beilis (Tonkikh)
  3. //
  4. // Distributed under the Boost Software License, Version 1.0.
  5. // https://www.boost.org/LICENSE_1_0.txt
  6. #ifndef BOOST_LOCALE_UTF8_CODECVT_HPP
  7. #define BOOST_LOCALE_UTF8_CODECVT_HPP
  8. #include <boost/locale/generic_codecvt.hpp>
  9. #include <boost/locale/utf.hpp>
  10. #include <boost/cstdint.hpp>
  11. #include <locale>
  12. namespace boost { namespace locale {
  13. /// \brief Geneneric utf8 codecvt facet, it allows to convert UTF-8 strings to UTF-16 and UTF-32 using wchar_t,
  14. /// char32_t and char16_t
  15. template<typename CharType>
  16. class utf8_codecvt : public generic_codecvt<CharType, utf8_codecvt<CharType>> {
  17. public:
  18. struct state_type {};
  19. utf8_codecvt(size_t refs = 0) : generic_codecvt<CharType, utf8_codecvt<CharType>>(refs) {}
  20. static int max_encoding_length() { return 4; }
  21. static state_type initial_state(generic_codecvt_base::initial_convertion_state /* unused */)
  22. {
  23. return state_type();
  24. }
  25. static utf::code_point to_unicode(state_type&, const char*& begin, const char* end)
  26. {
  27. const char* p = begin;
  28. utf::code_point c = utf::utf_traits<char>::decode(p, end);
  29. if(c != utf::illegal && c != utf::incomplete)
  30. begin = p;
  31. return c;
  32. }
  33. static utf::code_point from_unicode(state_type&, utf::code_point u, char* begin, const char* end)
  34. {
  35. if(!utf::is_valid_codepoint(u))
  36. return utf::illegal;
  37. int width;
  38. if((width = utf::utf_traits<char>::width(u)) > end - begin)
  39. return utf::incomplete;
  40. utf::utf_traits<char>::encode(u, begin);
  41. return width;
  42. }
  43. };
  44. }} // namespace boost::locale
  45. #endif