collator.hpp 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204
  1. //
  2. // Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
  3. //
  4. // Distributed under the Boost Software License, Version 1.0.
  5. // https://www.boost.org/LICENSE_1_0.txt
  6. #ifndef BOOST_LOCALE_COLLATOR_HPP_INCLUDED
  7. #define BOOST_LOCALE_COLLATOR_HPP_INCLUDED
  8. #include <boost/locale/config.hpp>
  9. #include <locale>
  10. #ifdef BOOST_MSVC
  11. # pragma warning(push)
  12. # pragma warning(disable : 4275 4251 4231 4660)
  13. #endif
  14. namespace boost { namespace locale {
  15. class info;
  16. /// \defgroup collation Collation
  17. ///
  18. /// This module introduces collation related classes
  19. /// @{
  20. /// Unicode collation level types
  21. enum class collate_level {
  22. primary = 0, ///< 1st collation level: base letters
  23. secondary = 1, ///< 2nd collation level: letters and accents
  24. tertiary = 2, ///< 3rd collation level: letters, accents and case
  25. quaternary = 3, ///< 4th collation level: letters, accents, case and punctuation
  26. identical = 4 ///< identical collation level: include code-point comparison
  27. };
  28. class BOOST_DEPRECATED("Use collate_level") collator_base {
  29. public:
  30. using level_type = collate_level;
  31. static constexpr auto primary = collate_level::primary;
  32. static constexpr auto secondary = collate_level::secondary;
  33. static constexpr auto tertiary = collate_level::tertiary;
  34. static constexpr auto quaternary = collate_level::quaternary;
  35. static constexpr auto identical = collate_level::identical;
  36. };
  37. /// \brief Collation facet.
  38. ///
  39. /// It reimplements standard C++ std::collate,
  40. /// allowing usage of std::locale for direct string comparison
  41. template<typename CharType>
  42. class collator : public std::collate<CharType> {
  43. public:
  44. /// Type of the underlying character
  45. typedef CharType char_type;
  46. /// Type of string used with this facet
  47. typedef std::basic_string<CharType> string_type;
  48. /// Compare two strings in rage [b1,e1), [b2,e2) according using a collation level \a level. Calls do_compare
  49. ///
  50. /// Returns -1 if the first of the two strings sorts before the seconds, returns 1 if sorts after and 0 if
  51. /// they considered equal.
  52. int compare(collate_level level,
  53. const char_type* b1,
  54. const char_type* e1,
  55. const char_type* b2,
  56. const char_type* e2) const
  57. {
  58. return do_compare(level, b1, e1, b2, e2);
  59. }
  60. /// Create a binary string that can be compared to other in order to get collation order. The string is created
  61. /// for text in range [b,e). It is useful for collation of multiple strings for text.
  62. ///
  63. /// The transformation follows these rules:
  64. /// \code
  65. /// compare(level,b1,e1,b2,e2) == sign( transform(level,b1,e1).compare(transform(level,b2,e2)) );
  66. /// \endcode
  67. ///
  68. /// Calls do_transform
  69. string_type transform(collate_level level, const char_type* b, const char_type* e) const
  70. {
  71. return do_transform(level, b, e);
  72. }
  73. /// Calculate a hash of a text in range [b,e). The value can be used for collation sensitive string comparison.
  74. ///
  75. /// If compare(level,b1,e1,b2,e2) == 0 then hash(level,b1,e1) == hash(level,b2,e2)
  76. ///
  77. /// Calls do_hash
  78. long hash(collate_level level, const char_type* b, const char_type* e) const { return do_hash(level, b, e); }
  79. /// Compare two strings \a l and \a r using collation level \a level
  80. ///
  81. /// Returns -1 if the first of the two strings sorts before the seconds, returns 1 if sorts after and 0 if
  82. /// they considered equal.
  83. int compare(collate_level level, const string_type& l, const string_type& r) const
  84. {
  85. return do_compare(level, l.data(), l.data() + l.size(), r.data(), r.data() + r.size());
  86. }
  87. /// Calculate a hash that can be used for collation sensitive string comparison of a string \a s
  88. ///
  89. /// If compare(level,s1,s2) == 0 then hash(level,s1) == hash(level,s2)
  90. long hash(collate_level level, const string_type& s) const
  91. {
  92. return do_hash(level, s.data(), s.data() + s.size());
  93. }
  94. /// Create a binary string from string \a s, that can be compared to other, useful for collation of multiple
  95. /// strings.
  96. ///
  97. /// The transformation follows these rules:
  98. /// \code
  99. /// compare(level,s1,s2) == sign( transform(level,s1).compare(transform(level,s2)) );
  100. /// \endcode
  101. string_type transform(collate_level level, const string_type& s) const
  102. {
  103. return do_transform(level, s.data(), s.data() + s.size());
  104. }
  105. protected:
  106. /// constructor of the collator object
  107. collator(size_t refs = 0) : std::collate<CharType>(refs) {}
  108. /// This function is used to override default collation function that does not take in account collation level.
  109. /// Uses primary level
  110. int
  111. do_compare(const char_type* b1, const char_type* e1, const char_type* b2, const char_type* e2) const override
  112. {
  113. return do_compare(collate_level::identical, b1, e1, b2, e2);
  114. }
  115. /// This function is used to override default collation function that does not take in account collation level.
  116. /// Uses primary level
  117. string_type do_transform(const char_type* b, const char_type* e) const override
  118. {
  119. return do_transform(collate_level::identical, b, e);
  120. }
  121. /// This function is used to override default collation function that does not take in account collation level.
  122. /// Uses primary level
  123. long do_hash(const char_type* b, const char_type* e) const override
  124. {
  125. return do_hash(collate_level::identical, b, e);
  126. }
  127. /// Actual function that performs comparison between the strings. For details see compare member function. Can
  128. /// be overridden.
  129. virtual int do_compare(collate_level level,
  130. const char_type* b1,
  131. const char_type* e1,
  132. const char_type* b2,
  133. const char_type* e2) const = 0;
  134. /// Actual function that performs transformation. For details see transform member function. Can be overridden.
  135. virtual string_type do_transform(collate_level level, const char_type* b, const char_type* e) const = 0;
  136. /// Actual function that calculates hash. For details see hash member function. Can be overridden.
  137. virtual long do_hash(collate_level level, const char_type* b, const char_type* e) const = 0;
  138. };
  139. /// \brief This class can be used in STL algorithms and containers for comparison of strings
  140. /// with a level other than primary
  141. ///
  142. /// For example:
  143. ///
  144. /// \code
  145. /// std::map<std::string,std::string,comparator<char,collate_level::secondary> > data;
  146. /// \endcode
  147. ///
  148. /// Would create a map the keys of which are sorted using secondary collation level
  149. template<typename CharType, collate_level default_level = collate_level::identical>
  150. struct comparator {
  151. public:
  152. /// Create a comparator class for locale \a l and with collation leval \a level
  153. ///
  154. /// \note throws std::bad_cast if l does not have \ref collator facet installed
  155. comparator(const std::locale& l = std::locale(), collate_level level = default_level) :
  156. locale_(l), level_(level)
  157. {}
  158. /// Compare two strings -- equivalent to return left < right according to collation rules
  159. bool operator()(const std::basic_string<CharType>& left, const std::basic_string<CharType>& right) const
  160. {
  161. return std::use_facet<collator<CharType>>(locale_).compare(level_, left, right) < 0;
  162. }
  163. private:
  164. std::locale locale_;
  165. collate_level level_;
  166. };
  167. ///@}
  168. }} // namespace boost::locale
  169. #ifdef BOOST_MSVC
  170. # pragma warning(pop)
  171. #endif
  172. ///
  173. /// \example collate.cpp
  174. /// Example of using collation functions
  175. ///
  176. #endif