basic_parser.hpp 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729
  1. //
  2. // Copyright (c) 2019 Vinnie Falco (vinnie.falco@gmail.com)
  3. // Copyright (c) 2020 Krystian Stasiowski (sdkrystian@gmail.com)
  4. //
  5. // Distributed under the Boost Software License, Version 1.0. (See accompanying
  6. // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  7. //
  8. // Official repository: https://github.com/boostorg/json
  9. //
  10. #ifndef BOOST_JSON_BASIC_PARSER_HPP
  11. #define BOOST_JSON_BASIC_PARSER_HPP
  12. #include <boost/json/detail/config.hpp>
  13. #include <boost/json/detail/except.hpp>
  14. #include <boost/json/error.hpp>
  15. #include <boost/json/kind.hpp>
  16. #include <boost/json/parse_options.hpp>
  17. #include <boost/json/detail/stack.hpp>
  18. #include <boost/json/detail/stream.hpp>
  19. #include <boost/json/detail/utf8.hpp>
  20. /* VFALCO NOTE
  21. This file is in the detail namespace because it
  22. is not allowed to be included directly by users,
  23. who should be including <boost/json/basic_parser.hpp>
  24. instead, which provides the member function definitions.
  25. The source code is arranged this way to keep compile
  26. times down.
  27. */
  28. namespace boost {
  29. namespace json {
  30. /** An incremental SAX parser for serialized JSON.
  31. This implements a SAX-style parser, invoking a
  32. caller-supplied handler with each parsing event.
  33. To use, first declare a variable of type
  34. `basic_parser<T>` where `T` meets the handler
  35. requirements specified below. Then call
  36. @ref write_some one or more times with the input,
  37. setting `more = false` on the final buffer.
  38. The parsing events are realized through member
  39. function calls on the handler, which exists
  40. as a data member of the parser.
  41. \n
  42. The parser may dynamically allocate intermediate
  43. storage as needed to accommodate the nesting level
  44. of the input JSON. On subsequent invocations, the
  45. parser can cheaply re-use this memory, improving
  46. performance. This storage is freed when the
  47. parser is destroyed
  48. @par Usage
  49. To get the declaration and function definitions
  50. for this class it is necessary to include this
  51. file instead:
  52. @code
  53. #include <boost/json/basic_parser_impl.hpp>
  54. @endcode
  55. Users who wish to parse JSON into the DOM container
  56. @ref value will not use this class directly; instead
  57. they will create an instance of @ref parser or
  58. @ref stream_parser and use that instead. Alternatively,
  59. they may call the function @ref parse. This class is
  60. designed for users who wish to perform custom actions
  61. instead of building a @ref value. For example, to
  62. produce a DOM from an external library.
  63. \n
  64. @note
  65. By default, only conforming JSON using UTF-8
  66. encoding is accepted. However, select non-compliant
  67. syntax can be allowed by construction using a
  68. @ref parse_options set to desired values.
  69. @par Handler
  70. The handler provided must be implemented as an
  71. object of class type which defines each of the
  72. required event member functions below. The event
  73. functions return a `bool` where `true` indicates
  74. success, and `false` indicates failure. If the
  75. member function returns `false`, it must set
  76. the error code to a suitable value. This error
  77. code will be returned by the write function to
  78. the caller.
  79. \n
  80. Handlers are required to declare the maximum
  81. limits on various elements. If these limits
  82. are exceeded during parsing, then parsing
  83. fails with an error.
  84. \n
  85. The following declaration meets the parser's
  86. handler requirements:
  87. @code
  88. struct handler
  89. {
  90. /// The maximum number of elements allowed in an array
  91. static constexpr std::size_t max_array_size = -1;
  92. /// The maximum number of elements allowed in an object
  93. static constexpr std::size_t max_object_size = -1;
  94. /// The maximum number of characters allowed in a string
  95. static constexpr std::size_t max_string_size = -1;
  96. /// The maximum number of characters allowed in a key
  97. static constexpr std::size_t max_key_size = -1;
  98. /// Called once when the JSON parsing begins.
  99. ///
  100. /// @return `true` on success.
  101. /// @param ec Set to the error, if any occurred.
  102. ///
  103. bool on_document_begin( error_code& ec );
  104. /// Called when the JSON parsing is done.
  105. ///
  106. /// @return `true` on success.
  107. /// @param ec Set to the error, if any occurred.
  108. ///
  109. bool on_document_end( error_code& ec );
  110. /// Called when the beginning of an array is encountered.
  111. ///
  112. /// @return `true` on success.
  113. /// @param ec Set to the error, if any occurred.
  114. ///
  115. bool on_array_begin( error_code& ec );
  116. /// Called when the end of the current array is encountered.
  117. ///
  118. /// @return `true` on success.
  119. /// @param n The number of elements in the array.
  120. /// @param ec Set to the error, if any occurred.
  121. ///
  122. bool on_array_end( std::size_t n, error_code& ec );
  123. /// Called when the beginning of an object is encountered.
  124. ///
  125. /// @return `true` on success.
  126. /// @param ec Set to the error, if any occurred.
  127. ///
  128. bool on_object_begin( error_code& ec );
  129. /// Called when the end of the current object is encountered.
  130. ///
  131. /// @return `true` on success.
  132. /// @param n The number of elements in the object.
  133. /// @param ec Set to the error, if any occurred.
  134. ///
  135. bool on_object_end( std::size_t n, error_code& ec );
  136. /// Called with characters corresponding to part of the current string.
  137. ///
  138. /// @return `true` on success.
  139. /// @param s The partial characters
  140. /// @param n The total size of the string thus far
  141. /// @param ec Set to the error, if any occurred.
  142. ///
  143. bool on_string_part( string_view s, std::size_t n, error_code& ec );
  144. /// Called with the last characters corresponding to the current string.
  145. ///
  146. /// @return `true` on success.
  147. /// @param s The remaining characters
  148. /// @param n The total size of the string
  149. /// @param ec Set to the error, if any occurred.
  150. ///
  151. bool on_string( string_view s, std::size_t n, error_code& ec );
  152. /// Called with characters corresponding to part of the current key.
  153. ///
  154. /// @return `true` on success.
  155. /// @param s The partial characters
  156. /// @param n The total size of the key thus far
  157. /// @param ec Set to the error, if any occurred.
  158. ///
  159. bool on_key_part( string_view s, std::size_t n, error_code& ec );
  160. /// Called with the last characters corresponding to the current key.
  161. ///
  162. /// @return `true` on success.
  163. /// @param s The remaining characters
  164. /// @param n The total size of the key
  165. /// @param ec Set to the error, if any occurred.
  166. ///
  167. bool on_key( string_view s, std::size_t n, error_code& ec );
  168. /// Called with the characters corresponding to part of the current number.
  169. ///
  170. /// @return `true` on success.
  171. /// @param s The partial characters
  172. /// @param ec Set to the error, if any occurred.
  173. ///
  174. bool on_number_part( string_view s, error_code& ec );
  175. /// Called when a signed integer is parsed.
  176. ///
  177. /// @return `true` on success.
  178. /// @param i The value
  179. /// @param s The remaining characters
  180. /// @param ec Set to the error, if any occurred.
  181. ///
  182. bool on_int64( int64_t i, string_view s, error_code& ec );
  183. /// Called when an unsigend integer is parsed.
  184. ///
  185. /// @return `true` on success.
  186. /// @param u The value
  187. /// @param s The remaining characters
  188. /// @param ec Set to the error, if any occurred.
  189. ///
  190. bool on_uint64( uint64_t u, string_view s, error_code& ec );
  191. /// Called when a double is parsed.
  192. ///
  193. /// @return `true` on success.
  194. /// @param d The value
  195. /// @param s The remaining characters
  196. /// @param ec Set to the error, if any occurred.
  197. ///
  198. bool on_double( double d, string_view s, error_code& ec );
  199. /// Called when a boolean is parsed.
  200. ///
  201. /// @return `true` on success.
  202. /// @param b The value
  203. /// @param s The remaining characters
  204. /// @param ec Set to the error, if any occurred.
  205. ///
  206. bool on_bool( bool b, error_code& ec );
  207. /// Called when a null is parsed.
  208. ///
  209. /// @return `true` on success.
  210. /// @param ec Set to the error, if any occurred.
  211. ///
  212. bool on_null( error_code& ec );
  213. /// Called with characters corresponding to part of the current comment.
  214. ///
  215. /// @return `true` on success.
  216. /// @param s The partial characters.
  217. /// @param ec Set to the error, if any occurred.
  218. ///
  219. bool on_comment_part( string_view s, error_code& ec );
  220. /// Called with the last characters corresponding to the current comment.
  221. ///
  222. /// @return `true` on success.
  223. /// @param s The remaining characters
  224. /// @param ec Set to the error, if any occurred.
  225. ///
  226. bool on_comment( string_view s, error_code& ec );
  227. };
  228. @endcode
  229. @see
  230. @ref parse,
  231. @ref stream_parser,
  232. [Validating parser example](../../doc/html/json/examples.html#json.examples.validate).
  233. @headerfile <boost/json/basic_parser.hpp>
  234. */
  235. template<class Handler>
  236. class basic_parser
  237. {
  238. enum class state : char
  239. {
  240. doc1, doc2, doc3, doc4,
  241. com1, com2, com3, com4,
  242. nul1, nul2, nul3,
  243. tru1, tru2, tru3,
  244. fal1, fal2, fal3, fal4,
  245. str1, str2, str3, str4,
  246. str5, str6, str7, str8,
  247. sur1, sur2, sur3,
  248. sur4, sur5, sur6,
  249. obj1, obj2, obj3, obj4,
  250. obj5, obj6, obj7, obj8,
  251. obj9, obj10, obj11,
  252. arr1, arr2, arr3,
  253. arr4, arr5, arr6,
  254. num1, num2, num3, num4,
  255. num5, num6, num7, num8,
  256. exp1, exp2, exp3,
  257. val1, val2
  258. };
  259. struct number
  260. {
  261. uint64_t mant;
  262. int bias;
  263. int exp;
  264. bool frac;
  265. bool neg;
  266. };
  267. // optimization: must come first
  268. Handler h_;
  269. number num_;
  270. error_code ec_;
  271. detail::stack st_;
  272. detail::utf8_sequence seq_;
  273. unsigned u1_;
  274. unsigned u2_;
  275. bool more_; // false for final buffer
  276. bool done_ = false; // true on complete parse
  277. bool clean_ = true; // write_some exited cleanly
  278. const char* end_;
  279. parse_options opt_;
  280. // how many levels deeper the parser can go
  281. std::size_t depth_ = opt_.max_depth;
  282. inline void reserve();
  283. inline const char* sentinel();
  284. inline bool incomplete(
  285. const detail::const_stream_wrapper& cs);
  286. #ifdef __INTEL_COMPILER
  287. #pragma warning push
  288. #pragma warning disable 2196
  289. #endif
  290. BOOST_NOINLINE
  291. inline
  292. const char*
  293. suspend_or_fail(state st);
  294. BOOST_NOINLINE
  295. inline
  296. const char*
  297. suspend_or_fail(
  298. state st,
  299. std::size_t n);
  300. BOOST_NOINLINE
  301. inline
  302. const char*
  303. fail(const char* p) noexcept;
  304. BOOST_NOINLINE
  305. inline
  306. const char*
  307. fail(
  308. const char* p,
  309. error ev,
  310. source_location const* loc) noexcept;
  311. BOOST_NOINLINE
  312. inline
  313. const char*
  314. maybe_suspend(
  315. const char* p,
  316. state st);
  317. BOOST_NOINLINE
  318. inline
  319. const char*
  320. maybe_suspend(
  321. const char* p,
  322. state st,
  323. std::size_t n);
  324. BOOST_NOINLINE
  325. inline
  326. const char*
  327. maybe_suspend(
  328. const char* p,
  329. state st,
  330. const number& num);
  331. BOOST_NOINLINE
  332. inline
  333. const char*
  334. suspend(
  335. const char* p,
  336. state st);
  337. BOOST_NOINLINE
  338. inline
  339. const char*
  340. suspend(
  341. const char* p,
  342. state st,
  343. const number& num);
  344. #ifdef __INTEL_COMPILER
  345. #pragma warning pop
  346. #endif
  347. template<bool StackEmpty_/*, bool Terminal_*/>
  348. const char* parse_comment(const char* p,
  349. std::integral_constant<bool, StackEmpty_> stack_empty,
  350. /*std::integral_constant<bool, Terminal_>*/ bool terminal);
  351. template<bool StackEmpty_>
  352. const char* parse_document(const char* p,
  353. std::integral_constant<bool, StackEmpty_> stack_empty);
  354. template<bool StackEmpty_, bool AllowComments_/*,
  355. bool AllowTrailing_, bool AllowBadUTF8_*/>
  356. const char* parse_value(const char* p,
  357. std::integral_constant<bool, StackEmpty_> stack_empty,
  358. std::integral_constant<bool, AllowComments_> allow_comments,
  359. /*std::integral_constant<bool, AllowTrailing_>*/ bool allow_trailing,
  360. /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8);
  361. template<bool StackEmpty_, bool AllowComments_/*,
  362. bool AllowTrailing_, bool AllowBadUTF8_*/>
  363. const char* resume_value(const char* p,
  364. std::integral_constant<bool, StackEmpty_> stack_empty,
  365. std::integral_constant<bool, AllowComments_> allow_comments,
  366. /*std::integral_constant<bool, AllowTrailing_>*/ bool allow_trailing,
  367. /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8);
  368. template<bool StackEmpty_, bool AllowComments_/*,
  369. bool AllowTrailing_, bool AllowBadUTF8_*/>
  370. const char* parse_object(const char* p,
  371. std::integral_constant<bool, StackEmpty_> stack_empty,
  372. std::integral_constant<bool, AllowComments_> allow_comments,
  373. /*std::integral_constant<bool, AllowTrailing_>*/ bool allow_trailing,
  374. /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8);
  375. template<bool StackEmpty_, bool AllowComments_/*,
  376. bool AllowTrailing_, bool AllowBadUTF8_*/>
  377. const char* parse_array(const char* p,
  378. std::integral_constant<bool, StackEmpty_> stack_empty,
  379. std::integral_constant<bool, AllowComments_> allow_comments,
  380. /*std::integral_constant<bool, AllowTrailing_>*/ bool allow_trailing,
  381. /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8);
  382. template<bool StackEmpty_>
  383. const char* parse_null(const char* p,
  384. std::integral_constant<bool, StackEmpty_> stack_empty);
  385. template<bool StackEmpty_>
  386. const char* parse_true(const char* p,
  387. std::integral_constant<bool, StackEmpty_> stack_empty);
  388. template<bool StackEmpty_>
  389. const char* parse_false(const char* p,
  390. std::integral_constant<bool, StackEmpty_> stack_empty);
  391. template<bool StackEmpty_, bool IsKey_/*,
  392. bool AllowBadUTF8_*/>
  393. const char* parse_string(const char* p,
  394. std::integral_constant<bool, StackEmpty_> stack_empty,
  395. std::integral_constant<bool, IsKey_> is_key,
  396. /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8);
  397. template<bool StackEmpty_, char First_>
  398. const char* parse_number(const char* p,
  399. std::integral_constant<bool, StackEmpty_> stack_empty,
  400. std::integral_constant<char, First_> first);
  401. template<bool StackEmpty_, bool IsKey_/*,
  402. bool AllowBadUTF8_*/>
  403. const char* parse_unescaped(const char* p,
  404. std::integral_constant<bool, StackEmpty_> stack_empty,
  405. std::integral_constant<bool, IsKey_> is_key,
  406. /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8);
  407. template<bool StackEmpty_/*, bool IsKey_,
  408. bool AllowBadUTF8_*/>
  409. const char* parse_escaped(
  410. const char* p,
  411. std::size_t total,
  412. std::integral_constant<bool, StackEmpty_> stack_empty,
  413. /*std::integral_constant<bool, IsKey_>*/ bool is_key,
  414. /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8);
  415. // intentionally private
  416. std::size_t
  417. depth() const noexcept
  418. {
  419. return opt_.max_depth - depth_;
  420. }
  421. public:
  422. /// Copy constructor (deleted)
  423. basic_parser(
  424. basic_parser const&) = delete;
  425. /// Copy assignment (deleted)
  426. basic_parser& operator=(
  427. basic_parser const&) = delete;
  428. /** Destructor.
  429. All dynamically allocated internal memory is freed.
  430. @par Effects
  431. @code
  432. this->handler().~Handler()
  433. @endcode
  434. @par Complexity
  435. Same as `~Handler()`.
  436. @par Exception Safety
  437. Same as `~Handler()`.
  438. */
  439. ~basic_parser() = default;
  440. /** Constructor.
  441. This function constructs the parser with
  442. the specified options, with any additional
  443. arguments forwarded to the handler's constructor.
  444. @par Complexity
  445. Same as `Handler( std::forward< Args >( args )... )`.
  446. @par Exception Safety
  447. Same as `Handler( std::forward< Args >( args )... )`.
  448. @param opt Configuration settings for the parser.
  449. If this structure is default constructed, the
  450. parser will accept only standard JSON.
  451. @param args Optional additional arguments
  452. forwarded to the handler's constructor.
  453. @see parse_options
  454. */
  455. template<class... Args>
  456. explicit
  457. basic_parser(
  458. parse_options const& opt,
  459. Args&&... args);
  460. /** Return a reference to the handler.
  461. This function provides access to the constructed
  462. instance of the handler owned by the parser.
  463. @par Complexity
  464. Constant.
  465. @par Exception Safety
  466. No-throw guarantee.
  467. */
  468. Handler&
  469. handler() noexcept
  470. {
  471. return h_;
  472. }
  473. /** Return a reference to the handler.
  474. This function provides access to the constructed
  475. instance of the handler owned by the parser.
  476. @par Complexity
  477. Constant.
  478. @par Exception Safety
  479. No-throw guarantee.
  480. */
  481. Handler const&
  482. handler() const noexcept
  483. {
  484. return h_;
  485. }
  486. /** Return the last error.
  487. This returns the last error code which
  488. was generated in the most recent call
  489. to @ref write_some.
  490. @par Complexity
  491. Constant.
  492. @par Exception Safety
  493. No-throw guarantee.
  494. */
  495. error_code
  496. last_error() const noexcept
  497. {
  498. return ec_;
  499. }
  500. /** Return true if a complete JSON has been parsed.
  501. This function returns `true` when all of these
  502. conditions are met:
  503. @li A complete serialized JSON has been
  504. presented to the parser, and
  505. @li No error or exception has occurred since the
  506. parser was constructed, or since the last call
  507. to @ref reset,
  508. @par Complexity
  509. Constant.
  510. @par Exception Safety
  511. No-throw guarantee.
  512. */
  513. bool
  514. done() const noexcept
  515. {
  516. return done_;
  517. }
  518. /** Reset the state, to parse a new document.
  519. This function discards the current parsing
  520. state, to prepare for parsing a new document.
  521. Dynamically allocated temporary memory used
  522. by the implementation is not deallocated.
  523. @par Complexity
  524. Constant.
  525. @par Exception Safety
  526. No-throw guarantee.
  527. */
  528. void
  529. reset() noexcept;
  530. /** Indicate a parsing failure.
  531. This changes the state of the parser to indicate
  532. that the parse has failed. A parser implementation
  533. can use this to fail the parser if needed due to
  534. external inputs.
  535. @note
  536. If `!ec`, the stored error code is unspecified.
  537. @par Complexity
  538. Constant.
  539. @par Exception Safety
  540. No-throw guarantee.
  541. @param ec The error code to set. If the code does
  542. not indicate failure, an implementation-defined
  543. error code that indicates failure will be stored
  544. instead.
  545. */
  546. void
  547. fail(error_code ec) noexcept;
  548. /** Parse some of an input string as JSON, incrementally.
  549. This function parses the JSON in the specified
  550. buffer, calling the handler to emit each SAX
  551. parsing event. The parse proceeds from the
  552. current state, which is at the beginning of a
  553. new JSON or in the middle of the current JSON
  554. if any characters were already parsed.
  555. \n
  556. The characters in the buffer are processed
  557. starting from the beginning, until one of the
  558. following conditions is met:
  559. @li All of the characters in the buffer
  560. have been parsed, or
  561. @li Some of the characters in the buffer
  562. have been parsed and the JSON is complete, or
  563. @li A parsing error occurs.
  564. The supplied buffer does not need to contain the
  565. entire JSON. Subsequent calls can provide more
  566. serialized data, allowing JSON to be processed
  567. incrementally. The end of the serialized JSON
  568. can be indicated by passing `more = false`.
  569. @par Complexity
  570. Linear in `size`.
  571. @par Exception Safety
  572. Basic guarantee.
  573. Calls to the handler may throw.
  574. Upon error or exception, subsequent calls will
  575. fail until @ref reset is called to parse a new JSON.
  576. @return The number of characters successfully
  577. parsed, which may be smaller than `size`.
  578. @param more `true` if there are possibly more
  579. buffers in the current JSON, otherwise `false`.
  580. @param data A pointer to a buffer of `size`
  581. characters to parse.
  582. @param size The number of characters pointed to
  583. by `data`.
  584. @param ec Set to the error, if any occurred.
  585. */
  586. /** @{ */
  587. std::size_t
  588. write_some(
  589. bool more,
  590. char const* data,
  591. std::size_t size,
  592. error_code& ec);
  593. std::size_t
  594. write_some(
  595. bool more,
  596. char const* data,
  597. std::size_t size,
  598. std::error_code& ec);
  599. /** @} */
  600. };
  601. } // namespace json
  602. } // namespace boost
  603. #endif