pattern.ipp 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945
  1. //
  2. // Copyright (c) 2022 Alan de Freitas (alandefreitas@gmail.com)
  3. //
  4. // Distributed under the Boost Software License, Version 1.0. (See accompanying
  5. // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  6. //
  7. // Official repository: https://github.com/boostorg/url
  8. //
  9. #ifndef BOOST_URL_DETAIL_IMPL_PATTERN_IPP
  10. #define BOOST_URL_DETAIL_IMPL_PATTERN_IPP
  11. #include <boost/url/detail/pattern.hpp>
  12. #include <boost/url/detail/pct_format.hpp>
  13. #include <boost/url/detail/replacement_field_rule.hpp>
  14. #include <boost/url/rfc/detail/host_rule.hpp>
  15. #include <boost/url/rfc/detail/path_rules.hpp>
  16. #include <boost/url/rfc/detail/port_rule.hpp>
  17. #include <boost/url/rfc/detail/scheme_rule.hpp>
  18. namespace boost {
  19. namespace urls {
  20. namespace detail {
  21. static constexpr auto lhost_chars = host_chars + ':';
  22. void
  23. pattern::
  24. apply(
  25. url_base& u,
  26. format_args const& args) const
  27. {
  28. // measure total
  29. struct sizes
  30. {
  31. std::size_t scheme = 0;
  32. std::size_t user = 0;
  33. std::size_t pass = 0;
  34. std::size_t host = 0;
  35. std::size_t port = 0;
  36. std::size_t path = 0;
  37. std::size_t query = 0;
  38. std::size_t frag = 0;
  39. };
  40. sizes n;
  41. format_parse_context pctx(nullptr, nullptr, 0);
  42. measure_context mctx(args);
  43. if (!scheme.empty())
  44. {
  45. pctx = {scheme, pctx.next_arg_id()};
  46. n.scheme = pct_vmeasure(
  47. grammar::alpha_chars, pctx, mctx);
  48. mctx.advance_to(0);
  49. }
  50. if (has_authority)
  51. {
  52. if (has_user)
  53. {
  54. pctx = {user, pctx.next_arg_id()};
  55. n.user = pct_vmeasure(
  56. user_chars, pctx, mctx);
  57. mctx.advance_to(0);
  58. if (has_pass)
  59. {
  60. pctx = {pass, pctx.next_arg_id()};
  61. n.pass = pct_vmeasure(
  62. password_chars, pctx, mctx);
  63. mctx.advance_to(0);
  64. }
  65. }
  66. if (host.starts_with('['))
  67. {
  68. BOOST_ASSERT(host.ends_with(']'));
  69. pctx = {host.substr(1, host.size() - 2), pctx.next_arg_id()};
  70. n.host = pct_vmeasure(
  71. lhost_chars, pctx, mctx) + 2;
  72. mctx.advance_to(0);
  73. }
  74. else
  75. {
  76. pctx = {host, pctx.next_arg_id()};
  77. n.host = pct_vmeasure(
  78. host_chars, pctx, mctx);
  79. mctx.advance_to(0);
  80. }
  81. if (has_port)
  82. {
  83. pctx = {port, pctx.next_arg_id()};
  84. n.port = pct_vmeasure(
  85. grammar::digit_chars, pctx, mctx);
  86. mctx.advance_to(0);
  87. }
  88. }
  89. if (!path.empty())
  90. {
  91. pctx = {path, pctx.next_arg_id()};
  92. n.path = pct_vmeasure(
  93. path_chars, pctx, mctx);
  94. mctx.advance_to(0);
  95. }
  96. if (has_query)
  97. {
  98. pctx = {query, pctx.next_arg_id()};
  99. n.query = pct_vmeasure(
  100. query_chars, pctx, mctx);
  101. mctx.advance_to(0);
  102. }
  103. if (has_frag)
  104. {
  105. pctx = {frag, pctx.next_arg_id()};
  106. n.frag = pct_vmeasure(
  107. fragment_chars, pctx, mctx);
  108. mctx.advance_to(0);
  109. }
  110. std::size_t const n_total =
  111. n.scheme +
  112. (n.scheme != 0) * 1 + // ":"
  113. has_authority * 2 + // "//"
  114. n.user +
  115. has_pass * 1 + // ":"
  116. n.pass +
  117. has_user * 1 + // "@"
  118. n.host +
  119. has_port * 1 + // ":"
  120. n.port +
  121. n.path +
  122. has_query * 1 + // "?"
  123. n.query +
  124. has_frag * 1 + // "#"
  125. n.frag;
  126. u.reserve(n_total);
  127. // Apply
  128. pctx = {nullptr, nullptr, 0};
  129. format_context fctx(nullptr, args);
  130. url_base::op_t op(u);
  131. using parts = parts_base;
  132. if (!scheme.empty())
  133. {
  134. auto dest = u.resize_impl(
  135. parts::id_scheme,
  136. n.scheme + 1, op);
  137. pctx = {scheme, pctx.next_arg_id()};
  138. fctx.advance_to(dest);
  139. const char* dest1 = pct_vformat(
  140. grammar::alpha_chars, pctx, fctx);
  141. dest[n.scheme] = ':';
  142. // validate
  143. if (!grammar::parse({dest, dest1}, scheme_rule()))
  144. {
  145. throw_invalid_argument();
  146. }
  147. }
  148. if (has_authority)
  149. {
  150. if (has_user)
  151. {
  152. auto dest = u.set_user_impl(
  153. n.user, op);
  154. pctx = {user, pctx.next_arg_id()};
  155. fctx.advance_to(dest);
  156. char const* dest1 = pct_vformat(
  157. user_chars, pctx, fctx);
  158. u.impl_.decoded_[parts::id_user] =
  159. pct_string_view(dest, dest1 - dest)
  160. ->decoded_size();
  161. if (has_pass)
  162. {
  163. char* destp = u.set_password_impl(
  164. n.pass, op);
  165. pctx = {pass, pctx.next_arg_id()};
  166. fctx.advance_to(destp);
  167. dest1 = pct_vformat(
  168. password_chars, pctx, fctx);
  169. u.impl_.decoded_[parts::id_pass] =
  170. pct_string_view({destp, dest1})
  171. ->decoded_size() + 1;
  172. }
  173. }
  174. auto dest = u.set_host_impl(
  175. n.host, op);
  176. if (host.starts_with('['))
  177. {
  178. BOOST_ASSERT(host.ends_with(']'));
  179. pctx = {host.substr(1, host.size() - 2), pctx.next_arg_id()};
  180. *dest++ = '[';
  181. fctx.advance_to(dest);
  182. char* dest1 =
  183. pct_vformat(lhost_chars, pctx, fctx);
  184. *dest1++ = ']';
  185. u.impl_.decoded_[parts::id_host] =
  186. pct_string_view(dest - 1, dest1 - dest)
  187. ->decoded_size();
  188. }
  189. else
  190. {
  191. pctx = {host, pctx.next_arg_id()};
  192. fctx.advance_to(dest);
  193. char const* dest1 =
  194. pct_vformat(host_chars, pctx, fctx);
  195. u.impl_.decoded_[parts::id_host] =
  196. pct_string_view(dest, dest1 - dest)
  197. ->decoded_size();
  198. }
  199. auto uh = u.encoded_host();
  200. auto h = grammar::parse(uh, host_rule).value();
  201. std::memcpy(
  202. u.impl_.ip_addr_,
  203. h.addr,
  204. sizeof(u.impl_.ip_addr_));
  205. u.impl_.host_type_ = h.host_type;
  206. if (has_port)
  207. {
  208. dest = u.set_port_impl(n.port, op);
  209. pctx = {port, pctx.next_arg_id()};
  210. fctx.advance_to(dest);
  211. char const* dest1 = pct_vformat(
  212. grammar::digit_chars, pctx, fctx);
  213. u.impl_.decoded_[parts::id_port] =
  214. pct_string_view(dest, dest1 - dest)
  215. ->decoded_size() + 1;
  216. string_view up = {dest - 1, dest1};
  217. auto p = grammar::parse(up, detail::port_part_rule).value();
  218. if (p.has_port)
  219. u.impl_.port_number_ = p.port_number;
  220. }
  221. }
  222. if (!path.empty())
  223. {
  224. auto dest = u.resize_impl(
  225. parts::id_path,
  226. n.path, op);
  227. pctx = {path, pctx.next_arg_id()};
  228. fctx.advance_to(dest);
  229. auto dest1 = pct_vformat(
  230. path_chars, pctx, fctx);
  231. pct_string_view npath(dest, dest1 - dest);
  232. u.impl_.decoded_[parts::id_path] +=
  233. npath.decoded_size();
  234. if (!npath.empty())
  235. {
  236. u.impl_.nseg_ = std::count(
  237. npath.begin() + 1,
  238. npath.end(), '/') + 1;
  239. }
  240. // handle edge cases
  241. // 1) path is first component and the
  242. // first segment contains an unencoded ':'
  243. // This is impossible because the template
  244. // "{}" would be a host.
  245. if (u.scheme().empty() &&
  246. !u.has_authority())
  247. {
  248. auto fseg = u.encoded_segments().front();
  249. std::size_t nc = std::count(
  250. fseg.begin(), fseg.end(), ':');
  251. if (nc)
  252. {
  253. std::size_t diff = nc * 2;
  254. u.reserve(n_total + diff);
  255. dest = u.resize_impl(
  256. parts::id_path,
  257. n.path + diff, op);
  258. char* dest0 = dest + diff;
  259. std::memmove(dest0, dest, n.path);
  260. while (dest0 != dest)
  261. {
  262. if (*dest0 != ':')
  263. {
  264. *dest++ = *dest0++;
  265. }
  266. else
  267. {
  268. *dest++ = '%';
  269. *dest++ = '3';
  270. *dest++ = 'A';
  271. dest0++;
  272. }
  273. }
  274. }
  275. }
  276. // 2) url has no authority and path
  277. // starts with "//"
  278. if (!u.has_authority() &&
  279. u.encoded_path().starts_with("//"))
  280. {
  281. u.reserve(n_total + 2);
  282. dest = u.resize_impl(
  283. parts::id_path,
  284. n.path + 2, op);
  285. std::memmove(dest + 2, dest, n.path);
  286. *dest++ = '/';
  287. *dest = '.';
  288. }
  289. }
  290. if (has_query)
  291. {
  292. auto dest = u.resize_impl(
  293. parts::id_query,
  294. n.query + 1, op);
  295. *dest++ = '?';
  296. pctx = {query, pctx.next_arg_id()};
  297. fctx.advance_to(dest);
  298. auto dest1 = pct_vformat(
  299. query_chars, pctx, fctx);
  300. pct_string_view nquery(dest, dest1 - dest);
  301. u.impl_.decoded_[parts::id_query] +=
  302. nquery.decoded_size() + 1;
  303. if (!nquery.empty())
  304. {
  305. u.impl_.nparam_ = std::count(
  306. nquery.begin(),
  307. nquery.end(), '&') + 1;
  308. }
  309. }
  310. if (has_frag)
  311. {
  312. auto dest = u.resize_impl(
  313. parts::id_frag,
  314. n.frag + 1, op);
  315. *dest++ = '#';
  316. pctx = {frag, pctx.next_arg_id()};
  317. fctx.advance_to(dest);
  318. auto dest1 = pct_vformat(
  319. fragment_chars, pctx, fctx);
  320. u.impl_.decoded_[parts::id_frag] +=
  321. make_pct_string_view(
  322. string_view(dest, dest1 - dest))
  323. ->decoded_size() + 1;
  324. }
  325. }
  326. // This rule represents a pct-encoded string
  327. // that contains an arbitrary number of
  328. // replacement ids in it
  329. template<class CharSet>
  330. struct pct_encoded_fmt_string_rule_t
  331. {
  332. using value_type = pct_string_view;
  333. constexpr
  334. pct_encoded_fmt_string_rule_t(
  335. CharSet const& cs) noexcept
  336. : cs_(cs)
  337. {
  338. }
  339. template<class CharSet_>
  340. friend
  341. constexpr
  342. auto
  343. pct_encoded_fmt_string_rule(
  344. CharSet_ const& cs) noexcept ->
  345. pct_encoded_fmt_string_rule_t<CharSet_>;
  346. result<value_type>
  347. parse(
  348. char const*& it,
  349. char const* end) const noexcept
  350. {
  351. auto const start = it;
  352. if(it == end)
  353. {
  354. // this might be empty
  355. return {};
  356. }
  357. // consume some with literal rule
  358. // this might be an empty literal
  359. auto literal_rule = pct_encoded_rule(cs_);
  360. auto rv = literal_rule.parse(it, end);
  361. while (rv)
  362. {
  363. auto it0 = it;
  364. // consume some with replacement id
  365. // rule
  366. if (!replacement_field_rule.parse(it, end))
  367. {
  368. it = it0;
  369. break;
  370. }
  371. rv = literal_rule.parse(it, end);
  372. }
  373. return string_view(start, it - start);
  374. }
  375. private:
  376. CharSet cs_;
  377. };
  378. template<class CharSet>
  379. constexpr
  380. auto
  381. pct_encoded_fmt_string_rule(
  382. CharSet const& cs) noexcept ->
  383. pct_encoded_fmt_string_rule_t<CharSet>
  384. {
  385. // If an error occurs here it means that
  386. // the value of your type does not meet
  387. // the requirements. Please check the
  388. // documentation!
  389. static_assert(
  390. grammar::is_charset<CharSet>::value,
  391. "CharSet requirements not met");
  392. return pct_encoded_fmt_string_rule_t<CharSet>(cs);
  393. }
  394. // This rule represents a regular string with
  395. // only chars from the specified charset and
  396. // an arbitrary number of replacement ids in it
  397. template<class CharSet>
  398. struct fmt_token_rule_t
  399. {
  400. using value_type = pct_string_view;
  401. constexpr
  402. fmt_token_rule_t(
  403. CharSet const& cs) noexcept
  404. : cs_(cs)
  405. {
  406. }
  407. template<class CharSet_>
  408. friend
  409. constexpr
  410. auto
  411. fmt_token_rule(
  412. CharSet_ const& cs) noexcept ->
  413. fmt_token_rule_t<CharSet_>;
  414. result<value_type>
  415. parse(
  416. char const*& it,
  417. char const* end) const noexcept
  418. {
  419. auto const start = it;
  420. BOOST_ASSERT(it != end);
  421. /*
  422. // This should never happen because
  423. // all tokens are optional and will
  424. // already return `none`:
  425. if(it == end)
  426. {
  427. BOOST_URL_RETURN_EC(
  428. grammar::error::need_more);
  429. }
  430. */
  431. // consume some with literal rule
  432. // this might be an empty literal
  433. auto partial_token_rule =
  434. grammar::optional_rule(
  435. grammar::token_rule(cs_));
  436. auto rv = partial_token_rule.parse(it, end);
  437. while (rv)
  438. {
  439. auto it0 = it;
  440. // consume some with replacement id
  441. if (!replacement_field_rule.parse(it, end))
  442. {
  443. // no replacement and no more cs
  444. // before: nothing else to consume
  445. it = it0;
  446. break;
  447. }
  448. // after {...}, consume any more chars
  449. // in the charset
  450. rv = partial_token_rule.parse(it, end);
  451. }
  452. if(it == start)
  453. {
  454. // it != end but we consumed nothing
  455. BOOST_URL_RETURN_EC(
  456. grammar::error::need_more);
  457. }
  458. return string_view(start, it - start);
  459. }
  460. private:
  461. CharSet cs_;
  462. };
  463. template<class CharSet>
  464. constexpr
  465. auto
  466. fmt_token_rule(
  467. CharSet const& cs) noexcept ->
  468. fmt_token_rule_t<CharSet>
  469. {
  470. // If an error occurs here it means that
  471. // the value of your type does not meet
  472. // the requirements. Please check the
  473. // documentation!
  474. static_assert(
  475. grammar::is_charset<CharSet>::value,
  476. "CharSet requirements not met");
  477. return fmt_token_rule_t<CharSet>(cs);
  478. }
  479. struct userinfo_template_rule_t
  480. {
  481. struct value_type
  482. {
  483. string_view user;
  484. string_view password;
  485. bool has_password = false;
  486. };
  487. auto
  488. parse(
  489. char const*& it,
  490. char const* end
  491. ) const noexcept ->
  492. result<value_type>
  493. {
  494. static constexpr auto uchars =
  495. unreserved_chars +
  496. sub_delim_chars;
  497. static constexpr auto pwchars =
  498. uchars + ':';
  499. value_type t;
  500. // user
  501. static constexpr auto user_fmt_rule =
  502. pct_encoded_fmt_string_rule(uchars);
  503. auto rv = grammar::parse(
  504. it, end, user_fmt_rule);
  505. BOOST_ASSERT(rv);
  506. t.user = *rv;
  507. // ':'
  508. if( it == end ||
  509. *it != ':')
  510. {
  511. t.has_password = false;
  512. t.password = {};
  513. return t;
  514. }
  515. ++it;
  516. // pass
  517. static constexpr auto pass_fmt_rule =
  518. pct_encoded_fmt_string_rule(grammar::ref(pwchars));
  519. rv = grammar::parse(
  520. it, end, pass_fmt_rule);
  521. BOOST_ASSERT(rv);
  522. t.has_password = true;
  523. t.password = *rv;
  524. return t;
  525. }
  526. };
  527. constexpr userinfo_template_rule_t userinfo_template_rule{};
  528. struct host_template_rule_t
  529. {
  530. using value_type = string_view;
  531. auto
  532. parse(
  533. char const*& it,
  534. char const* end
  535. ) const noexcept ->
  536. result<value_type>
  537. {
  538. if(it == end)
  539. {
  540. // empty host
  541. return {};
  542. }
  543. // the host type will be ultimately
  544. // validated when applying the replacement
  545. // strings. Any chars allowed in hosts
  546. // are allowed here.
  547. if (*it != '[')
  548. {
  549. // IPv4address and reg-name have the
  550. // same char sets.
  551. constexpr auto any_host_template_rule =
  552. pct_encoded_fmt_string_rule(host_chars);
  553. auto rv = grammar::parse(
  554. it, end, any_host_template_rule);
  555. // any_host_template_rule can always
  556. // be empty, so it's never invalid
  557. BOOST_ASSERT(rv);
  558. return detail::to_sv(*rv);
  559. }
  560. // IP-literals need to be enclosed in
  561. // "[]" if using ':' in the template
  562. // string, because the ':' would be
  563. // ambiguous with the port in fmt string.
  564. // The "[]:" can be used in replacement
  565. // strings without the "[]" though.
  566. constexpr auto ip_literal_template_rule =
  567. pct_encoded_fmt_string_rule(lhost_chars);
  568. auto it0 = it;
  569. auto rv = grammar::parse(
  570. it, end,
  571. grammar::optional_rule(
  572. grammar::tuple_rule(
  573. grammar::squelch(
  574. grammar::delim_rule('[')),
  575. ip_literal_template_rule,
  576. grammar::squelch(
  577. grammar::delim_rule(']')))));
  578. // ip_literal_template_rule can always
  579. // be empty, so it's never invalid, but
  580. // the rule might fail to match the
  581. // closing "]"
  582. BOOST_ASSERT(rv);
  583. return string_view{it0, it};
  584. }
  585. };
  586. constexpr host_template_rule_t host_template_rule{};
  587. struct authority_template_rule_t
  588. {
  589. using value_type = pattern;
  590. result<value_type>
  591. parse(
  592. char const*& it,
  593. char const* end
  594. ) const noexcept
  595. {
  596. pattern u;
  597. // [ userinfo "@" ]
  598. {
  599. auto rv = grammar::parse(
  600. it, end,
  601. grammar::optional_rule(
  602. grammar::tuple_rule(
  603. userinfo_template_rule,
  604. grammar::squelch(
  605. grammar::delim_rule('@')))));
  606. BOOST_ASSERT(rv);
  607. if(rv->has_value())
  608. {
  609. auto& r = **rv;
  610. u.has_user = true;
  611. u.user = r.user;
  612. u.has_pass = r.has_password;
  613. u.pass = r.password;
  614. }
  615. }
  616. // host
  617. {
  618. auto rv = grammar::parse(
  619. it, end,
  620. host_template_rule);
  621. // host is allowed to be empty
  622. BOOST_ASSERT(rv);
  623. u.host = *rv;
  624. }
  625. // [ ":" port ]
  626. {
  627. constexpr auto port_template_rule =
  628. grammar::optional_rule(
  629. fmt_token_rule(grammar::digit_chars));
  630. auto it0 = it;
  631. auto rv = grammar::parse(
  632. it, end,
  633. grammar::tuple_rule(
  634. grammar::squelch(
  635. grammar::delim_rule(':')),
  636. port_template_rule));
  637. if (!rv)
  638. {
  639. it = it0;
  640. }
  641. else
  642. {
  643. u.has_port = true;
  644. if (rv->has_value())
  645. {
  646. u.port = **rv;
  647. }
  648. }
  649. }
  650. return u;
  651. }
  652. };
  653. constexpr authority_template_rule_t authority_template_rule{};
  654. struct scheme_template_rule_t
  655. {
  656. using value_type = string_view;
  657. result<value_type>
  658. parse(
  659. char const*& it,
  660. char const* end) const noexcept
  661. {
  662. auto const start = it;
  663. if(it == end)
  664. {
  665. // scheme can't be empty
  666. BOOST_URL_RETURN_EC(
  667. grammar::error::mismatch);
  668. }
  669. if(!grammar::alpha_chars(*it) &&
  670. *it != '{')
  671. {
  672. // expected alpha
  673. BOOST_URL_RETURN_EC(
  674. grammar::error::mismatch);
  675. }
  676. // it starts with replacement id or alpha char
  677. if (!grammar::alpha_chars(*it))
  678. {
  679. if (!replacement_field_rule.parse(it, end))
  680. {
  681. // replacement_field_rule is invalid
  682. BOOST_URL_RETURN_EC(
  683. grammar::error::mismatch);
  684. }
  685. }
  686. else
  687. {
  688. // skip first
  689. ++it;
  690. }
  691. static
  692. constexpr
  693. grammar::lut_chars scheme_chars(
  694. "0123456789" "+-."
  695. "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
  696. "abcdefghijklmnopqrstuvwxyz");
  697. // non-scheme chars might be a new
  698. // replacement-id or just an invalid char
  699. it = grammar::find_if_not(
  700. it, end, scheme_chars);
  701. while (it != end)
  702. {
  703. auto it0 = it;
  704. if (!replacement_field_rule.parse(it, end))
  705. {
  706. it = it0;
  707. break;
  708. }
  709. it = grammar::find_if_not(
  710. it, end, scheme_chars);
  711. }
  712. return string_view(start, it - start);
  713. }
  714. };
  715. constexpr scheme_template_rule_t scheme_template_rule{};
  716. // This rule should consider all url types at the
  717. // same time according to the format string
  718. // - relative urls with no scheme/authority
  719. // - absolute urls have no fragment
  720. struct pattern_rule_t
  721. {
  722. using value_type = pattern;
  723. result<value_type>
  724. parse(
  725. char const*& it,
  726. char const* const end
  727. ) const noexcept
  728. {
  729. pattern u;
  730. // optional scheme
  731. {
  732. auto it0 = it;
  733. auto rv = grammar::parse(
  734. it, end,
  735. grammar::tuple_rule(
  736. scheme_template_rule,
  737. grammar::squelch(
  738. grammar::delim_rule(':'))));
  739. if(rv)
  740. u.scheme = *rv;
  741. else
  742. it = it0;
  743. }
  744. // hier_part (authority + path)
  745. // if there are less than 2 chars left,
  746. // we are parsing the path
  747. if (it == end)
  748. {
  749. // this is over, so we can consider
  750. // that a "path-empty"
  751. return u;
  752. }
  753. if(end - it == 1)
  754. {
  755. // only one char left
  756. // it can be a single separator "/",
  757. // representing an empty absolute path,
  758. // or a single-char segment
  759. if(*it == '/')
  760. {
  761. // path-absolute
  762. u.path = {it, 1};
  763. ++it;
  764. return u;
  765. }
  766. // this can be a:
  767. // - path-noscheme if there's no scheme, or
  768. // - path-rootless with a single char, or
  769. // - path-empty (and consume nothing)
  770. if (!u.scheme.empty() ||
  771. *it != ':')
  772. {
  773. // path-rootless with a single char
  774. // this needs to be a segment because
  775. // the authority needs two slashes
  776. // "//"
  777. // path-noscheme also matches here
  778. // because we already validated the
  779. // first char
  780. auto rv = grammar::parse(
  781. it, end, urls::detail::segment_rule);
  782. if(! rv)
  783. return rv.error();
  784. u.path = *rv;
  785. }
  786. return u;
  787. }
  788. // authority
  789. if( it[0] == '/' &&
  790. it[1] == '/')
  791. {
  792. // "//" always indicates authority
  793. it += 2;
  794. auto rv = grammar::parse(
  795. it, end,
  796. authority_template_rule);
  797. // authority is allowed to be empty
  798. BOOST_ASSERT(rv);
  799. u.has_authority = true;
  800. u.has_user = rv->has_user;
  801. u.user = rv->user;
  802. u.has_pass = rv->has_pass;
  803. u.pass = rv->pass;
  804. u.host = rv->host;
  805. u.has_port = rv->has_port;
  806. u.port = rv->port;
  807. }
  808. // the authority requires an absolute path
  809. // or an empty path
  810. if (it == end ||
  811. (u.has_authority &&
  812. (*it != '/' &&
  813. *it != '?' &&
  814. *it != '#')))
  815. {
  816. // path-empty
  817. return u;
  818. }
  819. // path-abempty
  820. // consume the whole path at once because
  821. // we're going to count number of segments
  822. // later after the replacements happen
  823. static constexpr auto segment_fmt_rule =
  824. pct_encoded_fmt_string_rule(path_chars);
  825. auto rp = grammar::parse(
  826. it, end, segment_fmt_rule);
  827. // path-abempty is allowed to be empty
  828. BOOST_ASSERT(rp);
  829. u.path = *rp;
  830. // [ "?" query ]
  831. {
  832. static constexpr auto query_fmt_rule =
  833. pct_encoded_fmt_string_rule(query_chars);
  834. auto rv = grammar::parse(
  835. it, end,
  836. grammar::tuple_rule(
  837. grammar::squelch(
  838. grammar::delim_rule('?')),
  839. query_fmt_rule));
  840. // query is allowed to be empty but
  841. // delim rule is not
  842. if (rv)
  843. {
  844. u.has_query = true;
  845. u.query = *rv;
  846. }
  847. }
  848. // [ "#" fragment ]
  849. {
  850. static constexpr auto frag_fmt_rule =
  851. pct_encoded_fmt_string_rule(fragment_chars);
  852. auto rv = grammar::parse(
  853. it, end,
  854. grammar::tuple_rule(
  855. grammar::squelch(
  856. grammar::delim_rule('#')),
  857. frag_fmt_rule));
  858. // frag is allowed to be empty but
  859. // delim rule is not
  860. if (rv)
  861. {
  862. u.has_frag = true;
  863. u.frag = *rv;
  864. }
  865. }
  866. return u;
  867. }
  868. };
  869. constexpr pattern_rule_t pattern_rule{};
  870. result<pattern>
  871. parse_pattern(
  872. string_view s)
  873. {
  874. return grammar::parse(
  875. s, pattern_rule);
  876. }
  877. } // detail
  878. } // urls
  879. } // boost
  880. #endif