conversion.hpp 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299
  1. //
  2. // Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
  3. //
  4. // Distributed under the Boost Software License, Version 1.0.
  5. // https://www.boost.org/LICENSE_1_0.txt
  6. #ifndef BOOST_LOCALE_CONVERTER_HPP_INCLUDED
  7. #define BOOST_LOCALE_CONVERTER_HPP_INCLUDED
  8. #include <boost/locale/util/string.hpp>
  9. #include <locale>
  10. #ifdef BOOST_MSVC
  11. # pragma warning(push)
  12. # pragma warning(disable : 4275 4251 4231 4660)
  13. #endif
  14. namespace boost { namespace locale {
  15. /// \defgroup convert Text Conversions
  16. ///
  17. /// This module provides various function for string manipulation like Unicode normalization, case conversion etc.
  18. /// @{
  19. /// \brief This class provides base flags for text manipulation. It is used as base for converter facet.
  20. class converter_base {
  21. public:
  22. /// The flag used for facet - the type of operation to perform
  23. enum conversion_type {
  24. normalization, ///< Apply Unicode normalization on the text
  25. upper_case, ///< Convert text to upper case
  26. lower_case, ///< Convert text to lower case
  27. case_folding, ///< Fold case in the text
  28. title_case ///< Convert text to title case
  29. };
  30. };
  31. template<typename CharType>
  32. class converter;
  33. #ifdef BOOST_LOCALE_DOXYGEN
  34. ///
  35. /// \brief The facet that implements text manipulation
  36. ///
  37. /// It is used to performs text conversion operations defined by \ref converter_base::conversion_type.
  38. /// It is specialized for four types of characters \c char, \c wchar_t, \c char16_t, \c char32_t
  39. template<typename Char>
  40. class BOOST_LOCALE_DECL converter : public converter_base, public std::locale::facet {
  41. public:
  42. /// Locale identification
  43. static std::locale::id id;
  44. /// Standard constructor
  45. converter(size_t refs = 0) : std::locale::facet(refs) {}
  46. /// Convert text in range [\a begin, \a end) according to conversion method \a how. Parameter
  47. /// \a flags is used for specification of normalization method like nfd, nfc etc.
  48. virtual std::basic_string<Char>
  49. convert(conversion_type how, const Char* begin, const Char* end, int flags = 0) const = 0;
  50. };
  51. #else
  52. template<>
  53. class BOOST_LOCALE_DECL converter<char> : public converter_base, public std::locale::facet {
  54. public:
  55. static std::locale::id id;
  56. converter(size_t refs = 0) : std::locale::facet(refs) {}
  57. ~converter();
  58. virtual std::string convert(conversion_type how, const char* begin, const char* end, int flags = 0) const = 0;
  59. };
  60. template<>
  61. class BOOST_LOCALE_DECL converter<wchar_t> : public converter_base, public std::locale::facet {
  62. public:
  63. static std::locale::id id;
  64. converter(size_t refs = 0) : std::locale::facet(refs) {}
  65. ~converter();
  66. virtual std::wstring
  67. convert(conversion_type how, const wchar_t* begin, const wchar_t* end, int flags = 0) const = 0;
  68. };
  69. # ifdef BOOST_LOCALE_ENABLE_CHAR16_T
  70. template<>
  71. class BOOST_LOCALE_DECL converter<char16_t> : public converter_base, public std::locale::facet {
  72. public:
  73. static std::locale::id id;
  74. converter(size_t refs = 0) : std::locale::facet(refs) {}
  75. ~converter();
  76. virtual std::u16string
  77. convert(conversion_type how, const char16_t* begin, const char16_t* end, int flags = 0) const = 0;
  78. };
  79. # endif
  80. # ifdef BOOST_LOCALE_ENABLE_CHAR32_T
  81. template<>
  82. class BOOST_LOCALE_DECL converter<char32_t> : public converter_base, public std::locale::facet {
  83. public:
  84. static std::locale::id id;
  85. converter(size_t refs = 0) : std::locale::facet(refs) {}
  86. ~converter();
  87. virtual std::u32string
  88. convert(conversion_type how, const char32_t* begin, const char32_t* end, int flags = 0) const = 0;
  89. };
  90. # endif
  91. #endif
  92. /// The type that defined <a href="http://unicode.org/reports/tr15/#Norm_Forms">normalization form</a>
  93. enum norm_type {
  94. norm_nfd, ///< Canonical decomposition
  95. norm_nfc, ///< Canonical decomposition followed by canonical composition
  96. norm_nfkd, ///< Compatibility decomposition
  97. norm_nfkc, ///< Compatibility decomposition followed by canonical composition.
  98. norm_default = norm_nfc, ///< Default normalization - canonical decomposition followed by canonical composition
  99. };
  100. /// Normalize Unicode string in range [begin,end) according to \ref norm_type "normalization form" \a n
  101. ///
  102. /// Note: This function receives only Unicode strings, i.e.: UTF-8, UTF-16 or UTF-32. It does not take
  103. /// in account the locale encoding, because Unicode decomposition and composition are meaningless outside
  104. /// of a Unicode character set.
  105. ///
  106. /// \note throws std::bad_cast if loc does not have \ref converter facet installed
  107. template<typename CharType>
  108. std::basic_string<CharType> normalize(const CharType* begin,
  109. const CharType* end,
  110. norm_type n = norm_default,
  111. const std::locale& loc = std::locale())
  112. {
  113. return std::use_facet<converter<CharType>>(loc).convert(converter_base::normalization, begin, end, n);
  114. }
  115. /// Normalize Unicode string \a str according to \ref norm_type "normalization form" \a n
  116. ///
  117. /// Note: This function receives only Unicode strings, i.e.: UTF-8, UTF-16 or UTF-32. It does not take
  118. /// in account the locale encoding, because Unicode decomposition and composition are meaningless outside
  119. /// of a Unicode character set.
  120. ///
  121. /// \note throws std::bad_cast if loc does not have \ref converter facet installed
  122. template<typename CharType>
  123. std::basic_string<CharType> normalize(const std::basic_string<CharType>& str,
  124. norm_type n = norm_default,
  125. const std::locale& loc = std::locale())
  126. {
  127. return normalize(str.data(), str.data() + str.size(), n, loc);
  128. }
  129. /// Normalize NULL terminated Unicode string \a str according to \ref norm_type "normalization form" \a n
  130. ///
  131. /// Note: This function receives only Unicode strings, i.e.: UTF-8, UTF-16 or UTF-32. It does not take
  132. /// in account the locale encoding, because Unicode decomposition and composition are meaningless outside
  133. /// of a Unicode character set.
  134. ///
  135. /// \note throws std::bad_cast if loc does not have \ref converter facet installed
  136. template<typename CharType>
  137. std::basic_string<CharType>
  138. normalize(const CharType* str, norm_type n = norm_default, const std::locale& loc = std::locale())
  139. {
  140. return normalize(str, util::str_end(str), n, loc);
  141. }
  142. ///////////////////////////////////////////////////
  143. /// Convert a string in range [begin,end) to upper case according to locale \a loc
  144. ///
  145. /// \note throws std::bad_cast if loc does not have \ref converter facet installed
  146. template<typename CharType>
  147. std::basic_string<CharType>
  148. to_upper(const CharType* begin, const CharType* end, const std::locale& loc = std::locale())
  149. {
  150. return std::use_facet<converter<CharType>>(loc).convert(converter_base::upper_case, begin, end);
  151. }
  152. /// Convert a string \a str to upper case according to locale \a loc
  153. ///
  154. /// \note throws std::bad_cast if loc does not have \ref converter facet installed
  155. template<typename CharType>
  156. std::basic_string<CharType> to_upper(const std::basic_string<CharType>& str, const std::locale& loc = std::locale())
  157. {
  158. return to_upper(str.data(), str.data() + str.size(), loc);
  159. }
  160. /// Convert a NULL terminated string \a str to upper case according to locale \a loc
  161. ///
  162. /// \note throws std::bad_cast if loc does not have \ref converter facet installed
  163. template<typename CharType>
  164. std::basic_string<CharType> to_upper(const CharType* str, const std::locale& loc = std::locale())
  165. {
  166. return to_upper(str, util::str_end(str), loc);
  167. }
  168. ///////////////////////////////////////////////////
  169. /// Convert a string in range [begin,end) to lower case according to locale \a loc
  170. ///
  171. /// \note throws std::bad_cast if loc does not have \ref converter facet installed
  172. template<typename CharType>
  173. std::basic_string<CharType>
  174. to_lower(const CharType* begin, const CharType* end, const std::locale& loc = std::locale())
  175. {
  176. return std::use_facet<converter<CharType>>(loc).convert(converter_base::lower_case, begin, end);
  177. }
  178. /// Convert a string \a str to lower case according to locale \a loc
  179. ///
  180. /// \note throws std::bad_cast if loc does not have \ref converter facet installed
  181. template<typename CharType>
  182. std::basic_string<CharType> to_lower(const std::basic_string<CharType>& str, const std::locale& loc = std::locale())
  183. {
  184. return to_lower(str.data(), str.data() + str.size(), loc);
  185. }
  186. /// Convert a NULL terminated string \a str to lower case according to locale \a loc
  187. ///
  188. /// \note throws std::bad_cast if loc does not have \ref converter facet installed
  189. template<typename CharType>
  190. std::basic_string<CharType> to_lower(const CharType* str, const std::locale& loc = std::locale())
  191. {
  192. return to_lower(str, util::str_end(str), loc);
  193. }
  194. ///////////////////////////////////////////////////
  195. /// Convert a string in range [begin,end) to title case according to locale \a loc
  196. ///
  197. /// \note throws std::bad_cast if loc does not have \ref converter facet installed
  198. template<typename CharType>
  199. std::basic_string<CharType>
  200. to_title(const CharType* begin, const CharType* end, const std::locale& loc = std::locale())
  201. {
  202. return std::use_facet<converter<CharType>>(loc).convert(converter_base::title_case, begin, end);
  203. }
  204. /// Convert a string \a str to title case according to locale \a loc
  205. ///
  206. /// \note throws std::bad_cast if loc does not have \ref converter facet installed
  207. template<typename CharType>
  208. std::basic_string<CharType> to_title(const std::basic_string<CharType>& str, const std::locale& loc = std::locale())
  209. {
  210. return to_title(str.data(), str.data() + str.size(), loc);
  211. }
  212. /// Convert a NULL terminated string \a str to title case according to locale \a loc
  213. ///
  214. /// \note throws std::bad_cast if loc does not have \ref converter facet installed
  215. template<typename CharType>
  216. std::basic_string<CharType> to_title(const CharType* str, const std::locale& loc = std::locale())
  217. {
  218. return to_title(str, util::str_end(str), loc);
  219. }
  220. ///////////////////////////////////////////////////
  221. /// Fold case of a string in range [begin,end) according to locale \a loc
  222. ///
  223. /// \note throws std::bad_cast if loc does not have \ref converter facet installed
  224. template<typename CharType>
  225. std::basic_string<CharType>
  226. fold_case(const CharType* begin, const CharType* end, const std::locale& loc = std::locale())
  227. {
  228. return std::use_facet<converter<CharType>>(loc).convert(converter_base::case_folding, begin, end);
  229. }
  230. /// Fold case of a string \a str according to locale \a loc
  231. ///
  232. /// \note throws std::bad_cast if loc does not have \ref converter facet installed
  233. template<typename CharType>
  234. std::basic_string<CharType> fold_case(const std::basic_string<CharType>& str,
  235. const std::locale& loc = std::locale())
  236. {
  237. return fold_case(str.data(), str.data() + str.size(), loc);
  238. }
  239. /// Fold case of a NULL terminated string \a str according to locale \a loc
  240. ///
  241. /// \note throws std::bad_cast if loc does not have \ref converter facet installed
  242. template<typename CharType>
  243. std::basic_string<CharType> fold_case(const CharType* str, const std::locale& loc = std::locale())
  244. {
  245. return fold_case(str, util::str_end(str), loc);
  246. }
  247. ///@}
  248. }} // namespace boost::locale
  249. #ifdef BOOST_MSVC
  250. # pragma warning(pop)
  251. #endif
  252. /// \example conversions.cpp
  253. ///
  254. /// Example of using various text conversion functions.
  255. ///
  256. /// \example wconversions.cpp
  257. ///
  258. /// Example of using various text conversion functions with wide strings.
  259. #endif