JSONWriter.h 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460
  1. /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
  2. /* vim: set ts=8 sts=2 et sw=2 tw=80: */
  3. /* This Source Code Form is subject to the terms of the Mozilla Public
  4. * License, v. 2.0. If a copy of the MPL was not distributed with this
  5. * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
  6. /* A JSON pretty-printer class. */
  7. // A typical JSON-writing library requires you to first build up a data
  8. // structure that represents a JSON object and then serialize it (to file, or
  9. // somewhere else). This approach makes for a clean API, but building the data
  10. // structure takes up memory. Sometimes that isn't desirable, such as when the
  11. // JSON data is produced for memory reporting.
  12. //
  13. // The JSONWriter class instead allows JSON data to be written out
  14. // incrementally without building up large data structures.
  15. //
  16. // The API is slightly uglier than you would see in a typical JSON-writing
  17. // library, but still fairly easy to use. It's possible to generate invalid
  18. // JSON with JSONWriter, but typically the most basic testing will identify any
  19. // such problems.
  20. //
  21. // Similarly, there are no RAII facilities for automatically closing objects
  22. // and arrays. These would be nice if you are generating all your code within
  23. // nested functions, but in other cases you'd have to maintain an explicit
  24. // stack of RAII objects and manually unwind it, which is no better than just
  25. // calling "end" functions. Furthermore, the consequences of forgetting to
  26. // close an object or array are obvious and, again, will be identified via
  27. // basic testing, unlike other cases where RAII is typically used (e.g. smart
  28. // pointers) and the consequences of defects are more subtle.
  29. //
  30. // Importantly, the class does solve the two hard problems of JSON
  31. // pretty-printing, which are (a) correctly escaping strings, and (b) adding
  32. // appropriate indentation and commas between items.
  33. //
  34. // By default, every property is placed on its own line. However, it is
  35. // possible to request that objects and arrays be placed entirely on a single
  36. // line, which can reduce output size significantly in some cases.
  37. //
  38. // Strings used (for property names and string property values) are |const
  39. // char*| throughout, and can be ASCII or UTF-8.
  40. //
  41. // EXAMPLE
  42. // -------
  43. // Assume that |MyWriteFunc| is a class that implements |JSONWriteFunc|. The
  44. // following code:
  45. //
  46. // JSONWriter w(MakeUnique<MyWriteFunc>());
  47. // w.Start();
  48. // {
  49. // w.NullProperty("null");
  50. // w.BoolProperty("bool", true);
  51. // w.IntProperty("int", 1);
  52. // w.StartArrayProperty("array");
  53. // {
  54. // w.StringElement("string");
  55. // w.StartObjectElement();
  56. // {
  57. // w.DoubleProperty("double", 3.4);
  58. // w.StartArrayProperty("single-line array", w.SingleLineStyle);
  59. // {
  60. // w.IntElement(1);
  61. // w.StartObjectElement(); // SingleLineStyle is inherited from
  62. // w.EndObjectElement(); // above for this collection
  63. // }
  64. // w.EndArray();
  65. // }
  66. // w.EndObjectElement();
  67. // }
  68. // w.EndArrayProperty();
  69. // }
  70. // w.End();
  71. //
  72. // will produce pretty-printed output for the following JSON object:
  73. //
  74. // {
  75. // "null": null,
  76. // "bool": true,
  77. // "int": 1,
  78. // "array": [
  79. // "string",
  80. // {
  81. // "double": 3.4,
  82. // "single-line array": [1, {}]
  83. // }
  84. // ]
  85. // }
  86. //
  87. // The nesting in the example code is obviously optional, but can aid
  88. // readability.
  89. #ifndef mozilla_JSONWriter_h
  90. #define mozilla_JSONWriter_h
  91. #include "mozilla/double-conversion.h"
  92. #include "mozilla/IntegerPrintfMacros.h"
  93. #include "mozilla/PodOperations.h"
  94. #include "mozilla/Sprintf.h"
  95. #include "mozilla/UniquePtr.h"
  96. #include "mozilla/Vector.h"
  97. #include <stdio.h>
  98. namespace mozilla {
  99. // A quasi-functor for JSONWriter. We don't use a true functor because that
  100. // requires templatizing JSONWriter, and the templatization seeps to lots of
  101. // places we don't want it to.
  102. class JSONWriteFunc
  103. {
  104. public:
  105. virtual void Write(const char* aStr) = 0;
  106. virtual ~JSONWriteFunc() {}
  107. };
  108. // Ideally this would be within |EscapedString| but when compiling with GCC
  109. // on Linux that caused link errors, whereas this formulation didn't.
  110. namespace detail {
  111. extern MFBT_DATA const char gTwoCharEscapes[256];
  112. } // namespace detail
  113. class JSONWriter
  114. {
  115. // From http://www.ietf.org/rfc/rfc4627.txt:
  116. //
  117. // "All Unicode characters may be placed within the quotation marks except
  118. // for the characters that must be escaped: quotation mark, reverse
  119. // solidus, and the control characters (U+0000 through U+001F)."
  120. //
  121. // This implementation uses two-char escape sequences where possible, namely:
  122. //
  123. // \", \\, \b, \f, \n, \r, \t
  124. //
  125. // All control characters not in the above list are represented with a
  126. // six-char escape sequence, e.g. '\u000b' (a.k.a. '\v').
  127. //
  128. class EscapedString
  129. {
  130. // Only one of |mUnownedStr| and |mOwnedStr| are ever non-null. |mIsOwned|
  131. // indicates which one is in use. They're not within a union because that
  132. // wouldn't work with UniquePtr.
  133. bool mIsOwned;
  134. const char* mUnownedStr;
  135. UniquePtr<char[]> mOwnedStr;
  136. void SanityCheck() const
  137. {
  138. MOZ_ASSERT_IF( mIsOwned, mOwnedStr.get() && !mUnownedStr);
  139. MOZ_ASSERT_IF(!mIsOwned, !mOwnedStr.get() && mUnownedStr);
  140. }
  141. static char hexDigitToAsciiChar(uint8_t u)
  142. {
  143. u = u & 0xf;
  144. return u < 10 ? '0' + u : 'a' + (u - 10);
  145. }
  146. public:
  147. explicit EscapedString(const char* aStr)
  148. : mUnownedStr(nullptr)
  149. , mOwnedStr(nullptr)
  150. {
  151. const char* p;
  152. // First, see if we need to modify the string.
  153. size_t nExtra = 0;
  154. p = aStr;
  155. while (true) {
  156. uint8_t u = *p; // ensure it can't be interpreted as negative
  157. if (u == 0) {
  158. break;
  159. }
  160. if (detail::gTwoCharEscapes[u]) {
  161. nExtra += 1;
  162. } else if (u <= 0x1f) {
  163. nExtra += 5;
  164. }
  165. p++;
  166. }
  167. if (nExtra == 0) {
  168. // No escapes needed. Easy.
  169. mIsOwned = false;
  170. mUnownedStr = aStr;
  171. return;
  172. }
  173. // Escapes are needed. We'll create a new string.
  174. mIsOwned = true;
  175. size_t len = (p - aStr) + nExtra;
  176. mOwnedStr = MakeUnique<char[]>(len + 1);
  177. p = aStr;
  178. size_t i = 0;
  179. while (true) {
  180. uint8_t u = *p; // ensure it can't be interpreted as negative
  181. if (u == 0) {
  182. mOwnedStr[i] = 0;
  183. break;
  184. }
  185. if (detail::gTwoCharEscapes[u]) {
  186. mOwnedStr[i++] = '\\';
  187. mOwnedStr[i++] = detail::gTwoCharEscapes[u];
  188. } else if (u <= 0x1f) {
  189. mOwnedStr[i++] = '\\';
  190. mOwnedStr[i++] = 'u';
  191. mOwnedStr[i++] = '0';
  192. mOwnedStr[i++] = '0';
  193. mOwnedStr[i++] = hexDigitToAsciiChar((u & 0x00f0) >> 4);
  194. mOwnedStr[i++] = hexDigitToAsciiChar(u & 0x000f);
  195. } else {
  196. mOwnedStr[i++] = u;
  197. }
  198. p++;
  199. }
  200. }
  201. ~EscapedString()
  202. {
  203. SanityCheck();
  204. }
  205. const char* get() const
  206. {
  207. SanityCheck();
  208. return mIsOwned ? mOwnedStr.get() : mUnownedStr;
  209. }
  210. };
  211. public:
  212. // Collections (objects and arrays) are printed in a multi-line style by
  213. // default. This can be changed to a single-line style if SingleLineStyle is
  214. // specified. If a collection is printed in single-line style, every nested
  215. // collection within it is also printed in single-line style, even if
  216. // multi-line style is requested.
  217. enum CollectionStyle {
  218. MultiLineStyle, // the default
  219. SingleLineStyle
  220. };
  221. protected:
  222. const UniquePtr<JSONWriteFunc> mWriter;
  223. Vector<bool, 8> mNeedComma; // do we need a comma at depth N?
  224. Vector<bool, 8> mNeedNewlines; // do we need newlines at depth N?
  225. size_t mDepth; // the current nesting depth
  226. void Indent()
  227. {
  228. for (size_t i = 0; i < mDepth; i++) {
  229. mWriter->Write(" ");
  230. }
  231. }
  232. // Adds whatever is necessary (maybe a comma, and then a newline and
  233. // whitespace) to separate an item (property or element) from what's come
  234. // before.
  235. void Separator()
  236. {
  237. if (mNeedComma[mDepth]) {
  238. mWriter->Write(",");
  239. }
  240. if (mDepth > 0 && mNeedNewlines[mDepth]) {
  241. mWriter->Write("\n");
  242. Indent();
  243. } else if (mNeedComma[mDepth]) {
  244. mWriter->Write(" ");
  245. }
  246. }
  247. void PropertyNameAndColon(const char* aName)
  248. {
  249. EscapedString escapedName(aName);
  250. mWriter->Write("\"");
  251. mWriter->Write(escapedName.get());
  252. mWriter->Write("\": ");
  253. }
  254. void Scalar(const char* aMaybePropertyName, const char* aStringValue)
  255. {
  256. Separator();
  257. if (aMaybePropertyName) {
  258. PropertyNameAndColon(aMaybePropertyName);
  259. }
  260. mWriter->Write(aStringValue);
  261. mNeedComma[mDepth] = true;
  262. }
  263. void QuotedScalar(const char* aMaybePropertyName, const char* aStringValue)
  264. {
  265. Separator();
  266. if (aMaybePropertyName) {
  267. PropertyNameAndColon(aMaybePropertyName);
  268. }
  269. mWriter->Write("\"");
  270. mWriter->Write(aStringValue);
  271. mWriter->Write("\"");
  272. mNeedComma[mDepth] = true;
  273. }
  274. void NewVectorEntries()
  275. {
  276. // If these tiny allocations OOM we might as well just crash because we
  277. // must be in serious memory trouble.
  278. MOZ_RELEASE_ASSERT(mNeedComma.resizeUninitialized(mDepth + 1));
  279. MOZ_RELEASE_ASSERT(mNeedNewlines.resizeUninitialized(mDepth + 1));
  280. mNeedComma[mDepth] = false;
  281. mNeedNewlines[mDepth] = true;
  282. }
  283. void StartCollection(const char* aMaybePropertyName, const char* aStartChar,
  284. CollectionStyle aStyle = MultiLineStyle)
  285. {
  286. Separator();
  287. if (aMaybePropertyName) {
  288. mWriter->Write("\"");
  289. mWriter->Write(aMaybePropertyName);
  290. mWriter->Write("\": ");
  291. }
  292. mWriter->Write(aStartChar);
  293. mNeedComma[mDepth] = true;
  294. mDepth++;
  295. NewVectorEntries();
  296. mNeedNewlines[mDepth] =
  297. mNeedNewlines[mDepth - 1] && aStyle == MultiLineStyle;
  298. }
  299. // Adds the whitespace and closing char necessary to end a collection.
  300. void EndCollection(const char* aEndChar)
  301. {
  302. if (mNeedNewlines[mDepth]) {
  303. mWriter->Write("\n");
  304. mDepth--;
  305. Indent();
  306. } else {
  307. mDepth--;
  308. }
  309. mWriter->Write(aEndChar);
  310. }
  311. public:
  312. explicit JSONWriter(UniquePtr<JSONWriteFunc> aWriter)
  313. : mWriter(Move(aWriter))
  314. , mNeedComma()
  315. , mNeedNewlines()
  316. , mDepth(0)
  317. {
  318. NewVectorEntries();
  319. }
  320. // Returns the JSONWriteFunc passed in at creation, for temporary use. The
  321. // JSONWriter object still owns the JSONWriteFunc.
  322. JSONWriteFunc* WriteFunc() const { return mWriter.get(); }
  323. // For all the following functions, the "Prints:" comment indicates what the
  324. // basic output looks like. However, it doesn't indicate the whitespace and
  325. // trailing commas, which are automatically added as required.
  326. //
  327. // All property names and string properties are escaped as necessary.
  328. // Prints: {
  329. void Start(CollectionStyle aStyle = MultiLineStyle)
  330. {
  331. StartCollection(nullptr, "{", aStyle);
  332. }
  333. // Prints: }
  334. void End() { EndCollection("}\n"); }
  335. // Prints: "<aName>": null
  336. void NullProperty(const char* aName)
  337. {
  338. Scalar(aName, "null");
  339. }
  340. // Prints: null
  341. void NullElement() { NullProperty(nullptr); }
  342. // Prints: "<aName>": <aBool>
  343. void BoolProperty(const char* aName, bool aBool)
  344. {
  345. Scalar(aName, aBool ? "true" : "false");
  346. }
  347. // Prints: <aBool>
  348. void BoolElement(bool aBool) { BoolProperty(nullptr, aBool); }
  349. // Prints: "<aName>": <aInt>
  350. void IntProperty(const char* aName, int64_t aInt)
  351. {
  352. char buf[64];
  353. SprintfLiteral(buf, "%" PRId64, aInt);
  354. Scalar(aName, buf);
  355. }
  356. // Prints: <aInt>
  357. void IntElement(int64_t aInt) { IntProperty(nullptr, aInt); }
  358. // Prints: "<aName>": <aDouble>
  359. void DoubleProperty(const char* aName, double aDouble)
  360. {
  361. static const size_t buflen = 64;
  362. char buf[buflen];
  363. const double_conversion::DoubleToStringConverter &converter =
  364. double_conversion::DoubleToStringConverter::EcmaScriptConverter();
  365. double_conversion::StringBuilder builder(buf, buflen);
  366. converter.ToShortest(aDouble, &builder);
  367. Scalar(aName, builder.Finalize());
  368. }
  369. // Prints: <aDouble>
  370. void DoubleElement(double aDouble) { DoubleProperty(nullptr, aDouble); }
  371. // Prints: "<aName>": "<aStr>"
  372. void StringProperty(const char* aName, const char* aStr)
  373. {
  374. EscapedString escapedStr(aStr);
  375. QuotedScalar(aName, escapedStr.get());
  376. }
  377. // Prints: "<aStr>"
  378. void StringElement(const char* aStr) { StringProperty(nullptr, aStr); }
  379. // Prints: "<aName>": [
  380. void StartArrayProperty(const char* aName,
  381. CollectionStyle aStyle = MultiLineStyle)
  382. {
  383. StartCollection(aName, "[", aStyle);
  384. }
  385. // Prints: [
  386. void StartArrayElement(CollectionStyle aStyle = MultiLineStyle)
  387. {
  388. StartArrayProperty(nullptr, aStyle);
  389. }
  390. // Prints: ]
  391. void EndArray() { EndCollection("]"); }
  392. // Prints: "<aName>": {
  393. void StartObjectProperty(const char* aName,
  394. CollectionStyle aStyle = MultiLineStyle)
  395. {
  396. StartCollection(aName, "{", aStyle);
  397. }
  398. // Prints: {
  399. void StartObjectElement(CollectionStyle aStyle = MultiLineStyle)
  400. {
  401. StartObjectProperty(nullptr, aStyle);
  402. }
  403. // Prints: }
  404. void EndObject() { EndCollection("}"); }
  405. };
  406. } // namespace mozilla
  407. #endif /* mozilla_JSONWriter_h */