rippled
Loading...
Searching...
No Matches
json_reader.cpp
1#include <xrpl/basics/contract.h>
2#include <xrpl/json/json_reader.h>
3#include <xrpl/json/json_value.h>
4
5#include <algorithm>
6#include <cctype>
7#include <cstdint>
8#include <cstdio>
9#include <cstring>
10#include <istream>
11#include <stdexcept>
12#include <string>
13
14namespace Json {
15// Implementation of class Reader
16// ////////////////////////////////
17
18static std::string
19codePointToUTF8(unsigned int cp)
20{
21 std::string result;
22
23 // based on description from http://en.wikipedia.org/wiki/UTF-8
24
25 if (cp <= 0x7f)
26 {
27 result.resize(1);
28 result[0] = static_cast<char>(cp);
29 }
30 else if (cp <= 0x7FF)
31 {
32 result.resize(2);
33 result[1] = static_cast<char>(0x80 | (0x3f & cp));
34 result[0] = static_cast<char>(0xC0 | (0x1f & (cp >> 6)));
35 }
36 else if (cp <= 0xFFFF)
37 {
38 result.resize(3);
39 result[2] = static_cast<char>(0x80 | (0x3f & cp));
40 result[1] = 0x80 | static_cast<char>((0x3f & (cp >> 6)));
41 result[0] = 0xE0 | static_cast<char>((0xf & (cp >> 12)));
42 }
43 else if (cp <= 0x10FFFF)
44 {
45 result.resize(4);
46 result[3] = static_cast<char>(0x80 | (0x3f & cp));
47 result[2] = static_cast<char>(0x80 | (0x3f & (cp >> 6)));
48 result[1] = static_cast<char>(0x80 | (0x3f & (cp >> 12)));
49 result[0] = static_cast<char>(0xF0 | (0x7 & (cp >> 18)));
50 }
51
52 return result;
53}
54
55// Class Reader
56// //////////////////////////////////////////////////////////////////
57
58bool
59Reader::parse(std::string const& document, Value& root)
60{
61 document_ = document;
62 char const* begin = document_.c_str();
63 char const* end = begin + document_.length();
64 return parse(begin, end, root);
65}
66
67bool
69{
70 // std::istream_iterator<char> begin(sin);
71 // std::istream_iterator<char> end;
72 // Those would allow streamed input from a file, if parse() were a
73 // template function.
74
75 // Since std::string is reference-counted, this at least does not
76 // create an extra copy.
77 std::string doc;
78 std::getline(sin, doc, (char)EOF);
79 return parse(doc, root);
80}
81
82bool
83Reader::parse(char const* beginDoc, char const* endDoc, Value& root)
84{
85 begin_ = beginDoc;
86 end_ = endDoc;
88 lastValueEnd_ = 0;
89 lastValue_ = 0;
90 errors_.clear();
91
92 while (!nodes_.empty())
93 nodes_.pop();
94
95 nodes_.push(&root);
96 bool const successful = readValue(0);
97 Token token{};
98 skipCommentTokens(token);
99
100 if (!root.isNull() && !root.isArray() && !root.isObject())
101 {
102 // Set error location to start of doc, ideally should be first token
103 // found in doc
104 token.type_ = tokenError;
105 token.start_ = beginDoc;
106 token.end_ = endDoc;
107 addError("A valid JSON document must be either an array or an object value.", token);
108 return false;
109 }
110
111 return successful;
112}
113
114bool
115Reader::readValue(unsigned depth)
116{
117 Token token{};
118 skipCommentTokens(token);
119 if (depth > nest_limit)
120 return addError("Syntax error: maximum nesting depth exceeded", token);
121 bool successful = true;
122
123 switch (token.type_)
124 {
125 case tokenObjectBegin:
126 successful = readObject(token, depth);
127 break;
128
129 case tokenArrayBegin:
130 successful = readArray(token, depth);
131 break;
132
133 case tokenInteger:
134 successful = decodeNumber(token);
135 break;
136
137 case tokenDouble:
138 successful = decodeDouble(token);
139 break;
140
141 case tokenString:
142 successful = decodeString(token);
143 break;
144
145 case tokenTrue:
146 currentValue() = true;
147 break;
148
149 case tokenFalse:
150 currentValue() = false;
151 break;
152
153 case tokenNull:
154 currentValue() = Value();
155 break;
156
157 default:
158 return addError("Syntax error: value, object or array expected.", token);
159 }
160
161 return successful;
162}
163
164void
166{
167 do
168 {
169 readToken(token);
170 } while (token.type_ == tokenComment);
171}
172
173bool
174Reader::expectToken(TokenType type, Token& token, char const* message)
175{
176 readToken(token);
177
178 if (token.type_ != type)
179 return addError(message, token);
180
181 return true;
182}
183
184bool
186{
187 skipSpaces();
188 token.start_ = current_;
189 Char const c = getNextChar();
190 bool ok = true;
191
192 switch (c)
193 {
194 case '{':
195 token.type_ = tokenObjectBegin;
196 break;
197
198 case '}':
199 token.type_ = tokenObjectEnd;
200 break;
201
202 case '[':
203 token.type_ = tokenArrayBegin;
204 break;
205
206 case ']':
207 token.type_ = tokenArrayEnd;
208 break;
209
210 case '"':
211 token.type_ = tokenString;
212 ok = readString();
213 break;
214
215 case '/':
216 token.type_ = tokenComment;
217 ok = readComment();
218 break;
219
220 case '0':
221 case '1':
222 case '2':
223 case '3':
224 case '4':
225 case '5':
226 case '6':
227 case '7':
228 case '8':
229 case '9':
230 case '-':
231 token.type_ = readNumber();
232 break;
233
234 case 't':
235 token.type_ = tokenTrue;
236 ok = match("rue", 3);
237 break;
238
239 case 'f':
240 token.type_ = tokenFalse;
241 ok = match("alse", 4); // cspell:disable-line
242 break;
243
244 case 'n':
245 token.type_ = tokenNull;
246 ok = match("ull", 3);
247 break;
248
249 case ',':
251 break;
252
253 case ':':
255 break;
256
257 case 0:
258 token.type_ = tokenEndOfStream;
259 break;
260
261 default:
262 ok = false;
263 break;
264 }
265
266 if (!ok)
267 token.type_ = tokenError;
268
269 token.end_ = current_;
270 return true;
271}
272
273void
275{
276 while (current_ != end_)
277 {
278 Char const c = *current_;
279
280 if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
281 {
282 ++current_;
283 }
284 else
285 {
286 break;
287 }
288 }
289}
290
291bool
292Reader::match(Location pattern, int patternLength)
293{
294 if (end_ - current_ < patternLength)
295 return false;
296
297 int index = patternLength;
298
299 while ((index--) != 0)
300 {
301 if (current_[index] != pattern[index])
302 return false;
303 }
304
305 current_ += patternLength;
306 return true;
307}
308
309bool
311{
312 Char const c = getNextChar();
313
314 if (c == '*')
315 return readCStyleComment();
316
317 if (c == '/')
318 return readCppStyleComment();
319
320 return false;
321}
322
323bool
325{
326 while (current_ != end_)
327 {
328 Char const c = getNextChar();
329
330 if (c == '*' && *current_ == '/')
331 break;
332 }
333
334 return getNextChar() == '/';
335}
336
337bool
339{
340 while (current_ != end_)
341 {
342 Char const c = getNextChar();
343
344 if (c == '\r' || c == '\n')
345 break;
346 }
347
348 return true;
349}
350
353{
354 static char const extended_tokens[] = {'.', 'e', 'E', '+', '-'};
355
356 TokenType type = tokenInteger;
357
358 if (current_ != end_)
359 {
360 if (*current_ == '-')
361 ++current_;
362
363 while (current_ != end_)
364 {
365 if (std::isdigit(static_cast<unsigned char>(*current_)) == 0)
366 {
367 auto ret =
368 std::find(std::begin(extended_tokens), std::end(extended_tokens), *current_);
369
370 if (ret == std::end(extended_tokens))
371 break;
372
373 type = tokenDouble;
374 }
375
376 ++current_;
377 }
378 }
379
380 return type;
381}
382
383bool
385{
386 Char c = 0;
387
388 while (current_ != end_)
389 {
390 c = getNextChar();
391
392 if (c == '\\')
393 {
394 getNextChar();
395 }
396 else if (c == '"')
397 {
398 break;
399 }
400 }
401
402 return c == '"';
403}
404
405bool
406Reader::readObject(Token& tokenStart, unsigned depth)
407{
408 Token tokenName{};
409 std::string name;
411
412 while (readToken(tokenName))
413 {
414 bool initialTokenOk = true;
415
416 while (tokenName.type_ == tokenComment && initialTokenOk)
417 initialTokenOk = readToken(tokenName);
418
419 if (!initialTokenOk)
420 break;
421
422 if (tokenName.type_ == tokenObjectEnd && name.empty()) // empty object
423 return true;
424
425 if (tokenName.type_ != tokenString)
426 break;
427
428 name = "";
429
430 if (!decodeString(tokenName, name))
432
433 Token colon{};
434
435 if (!readToken(colon) || colon.type_ != tokenMemberSeparator)
436 {
437 return addErrorAndRecover(
438 "Missing ':' after object member name", colon, tokenObjectEnd);
439 }
440
441 // Reject duplicate names
442 if (currentValue().isMember(name))
443 return addError("Key '" + name + "' appears twice.", tokenName);
444
445 Value& value = currentValue()[name];
446 nodes_.push(&value);
447 bool const ok = readValue(depth + 1);
448 nodes_.pop();
449
450 if (!ok) // error already set
452
453 Token comma{};
454
455 if (!readToken(comma) ||
456 (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator &&
457 comma.type_ != tokenComment))
458 {
459 return addErrorAndRecover(
460 "Missing ',' or '}' in object declaration", comma, tokenObjectEnd);
461 }
462
463 bool finalizeTokenOk = true;
464
465 while (comma.type_ == tokenComment && finalizeTokenOk)
466 finalizeTokenOk = readToken(comma);
467
468 if (comma.type_ == tokenObjectEnd)
469 return true;
470 }
471
472 return addErrorAndRecover("Missing '}' or object member name", tokenName, tokenObjectEnd);
473}
474
475bool
476Reader::readArray(Token& tokenStart, unsigned depth)
477{
479 skipSpaces();
480
481 if (*current_ == ']') // empty array
482 {
483 Token endArray{};
484 readToken(endArray);
485 return true;
486 }
487
488 int index = 0;
489
490 while (true)
491 {
492 Value& value = currentValue()[index++];
493 nodes_.push(&value);
494 bool ok = readValue(depth + 1);
495 nodes_.pop();
496
497 if (!ok) // error already set
499
500 Token token{};
501 // Accept Comment after last item in the array.
502 ok = readToken(token);
503
504 while (token.type_ == tokenComment && ok)
505 {
506 ok = readToken(token);
507 }
508
509 bool const badTokenType =
510 (token.type_ != tokenArraySeparator && token.type_ != tokenArrayEnd);
511
512 if (!ok || badTokenType)
513 {
514 return addErrorAndRecover(
515 "Missing ',' or ']' in array declaration", token, tokenArrayEnd);
516 }
517
518 if (token.type_ == tokenArrayEnd)
519 break;
520 }
521
522 return true;
523}
524
525bool
527{
528 Location current = token.start_;
529 bool const isNegative = *current == '-';
530
531 if (isNegative)
532 ++current;
533
534 if (current == token.end_)
535 {
536 return addError(
537 "'" + std::string(token.start_, token.end_) + "' is not a valid number.", token);
538 }
539
540 // The existing Json integers are 32-bit so using a 64-bit value here avoids
541 // overflows in the conversion code below.
542 std::int64_t value = 0;
543
544 static_assert(
545 sizeof(value) > sizeof(Value::maxUInt),
546 "The JSON integer overflow logic will need to be reworked.");
547
548 while (current < token.end_ && (value <= Value::maxUInt))
549 {
550 Char const c = *current++;
551
552 if (c < '0' || c > '9')
553 {
554 return addError(
555 "'" + std::string(token.start_, token.end_) + "' is not a number.", token);
556 }
557
558 value = (value * 10) + (c - '0');
559 }
560
561 // More tokens left -> input is larger than largest possible return value
562 if (current != token.end_)
563 {
564 return addError(
565 "'" + std::string(token.start_, token.end_) + "' exceeds the allowable range.", token);
566 }
567
568 if (isNegative)
569 {
570 value = -value;
571
572 if (value < Value::minInt || value > Value::maxInt)
573 {
574 return addError(
575 "'" + std::string(token.start_, token.end_) + "' exceeds the allowable range.",
576 token);
577 }
578
579 currentValue() = static_cast<Value::Int>(value);
580 }
581 else
582 {
583 if (value > Value::maxUInt)
584 {
585 return addError(
586 "'" + std::string(token.start_, token.end_) + "' exceeds the allowable range.",
587 token);
588 }
589
590 // If it's representable as a signed integer, construct it as one.
591 if (value <= Value::maxInt)
592 {
593 currentValue() = static_cast<Value::Int>(value);
594 }
595 else
596 {
597 currentValue() = static_cast<Value::UInt>(value);
598 }
599 }
600
601 return true;
602}
603
604bool
606{
607 double value = 0;
608 int const bufferSize = 32;
609 int count = 0;
610 int const length = int(token.end_ - token.start_);
611 // Sanity check to avoid buffer overflow exploits.
612 if (length < 0)
613 {
614 return addError("Unable to parse token length", token);
615 }
616 // Avoid using a string constant for the format control string given to
617 // sscanf, as this can cause hard to debug crashes on OS X. See here for
618 // more info:
619 //
620 // http://developer.apple.com/library/mac/#DOCUMENTATION/DeveloperTools/gcc-4.0.1/gcc/Incompatibilities.html
621 char format[] = "%lf";
622 if (length <= bufferSize)
623 {
624 Char buffer[bufferSize + 1];
625 memcpy(buffer, token.start_, length);
626 buffer[length] = 0;
627 count = sscanf(buffer, format, &value);
628 }
629 else
630 {
631 std::string const buffer(token.start_, token.end_);
632 count = sscanf(buffer.c_str(), format, &value);
633 }
634 if (count != 1)
635 return addError("'" + std::string(token.start_, token.end_) + "' is not a number.", token);
636 currentValue() = value;
637 return true;
638}
639
640bool
642{
643 std::string decoded;
644
645 if (!decodeString(token, decoded))
646 return false;
647
648 currentValue() = decoded;
649 return true;
650}
651
652bool
654{
655 decoded.reserve(token.end_ - token.start_ - 2);
656 Location current = token.start_ + 1; // skip '"'
657 Location end = token.end_ - 1; // do not include '"'
658
659 while (current != end)
660 {
661 Char const c = *current++;
662
663 if (c == '"')
664 {
665 break;
666 }
667 if (c == '\\')
668 {
669 if (current == end)
670 return addError("Empty escape sequence in string", token, current);
671
672 Char const escape = *current++;
673
674 switch (escape)
675 {
676 case '"':
677 decoded += '"';
678 break;
679
680 case '/':
681 decoded += '/';
682 break;
683
684 case '\\':
685 decoded += '\\';
686 break;
687
688 case 'b':
689 decoded += '\b';
690 break;
691
692 case 'f':
693 decoded += '\f';
694 break;
695
696 case 'n':
697 decoded += '\n';
698 break;
699
700 case 'r':
701 decoded += '\r';
702 break;
703
704 case 't':
705 decoded += '\t';
706 break;
707
708 case 'u': {
709 unsigned int unicode = 0;
710
711 if (!decodeUnicodeCodePoint(token, current, end, unicode))
712 return false;
713
714 decoded += codePointToUTF8(unicode);
715 }
716 break;
717
718 default:
719 return addError("Bad escape sequence in string", token, current);
720 }
721 }
722 else
723 {
724 decoded += c;
725 }
726 }
727
728 return true;
729}
730
731bool
732Reader::decodeUnicodeCodePoint(Token& token, Location& current, Location end, unsigned int& unicode)
733{
734 if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
735 return false;
736
737 if (unicode >= 0xD800 && unicode <= 0xDBFF)
738 {
739 // surrogate pairs
740 if (end - current < 6)
741 {
742 return addError(
743 "additional six characters expected to parse unicode surrogate "
744 "pair.",
745 token,
746 current);
747 }
748
749 unsigned int surrogatePair = 0;
750
751 if (*current != '\\' || *(current + 1) != 'u')
752 {
753 return addError(
754 "expecting another \\u token to begin the second half of a unicode surrogate pair",
755 token,
756 current);
757 }
758
759 current += 2; // skip two characters checked above
760
761 if (!decodeUnicodeEscapeSequence(token, current, end, surrogatePair))
762 return false;
763
764 unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
765 }
766
767 return true;
768}
769
770bool
772 Token& token,
773 Location& current,
774 Location end,
775 unsigned int& unicode)
776{
777 if (end - current < 4)
778 {
779 return addError(
780 "Bad unicode escape sequence in string: four digits expected.", token, current);
781 }
782
783 unicode = 0;
784
785 for (int index = 0; index < 4; ++index)
786 {
787 Char const c = *current++;
788 unicode *= 16;
789
790 if (c >= '0' && c <= '9')
791 {
792 unicode += c - '0';
793 }
794 else if (c >= 'a' && c <= 'f')
795 {
796 unicode += c - 'a' + 10;
797 }
798 else if (c >= 'A' && c <= 'F')
799 {
800 unicode += c - 'A' + 10;
801 }
802 else
803 {
804 return addError(
805 "Bad unicode escape sequence in string: hexadecimal digit "
806 "expected.",
807 token,
808 current);
809 }
810 }
811
812 return true;
813}
814
815bool
816Reader::addError(std::string const& message, Token& token, Location extra)
817{
818 ErrorInfo info;
819 info.token_ = token;
820 info.message_ = message;
821 info.extra_ = extra;
822 errors_.push_back(info);
823 return false;
824}
825
826bool
828{
829 int const errorCount = int(errors_.size());
830 Token skip{};
831
832 while (true)
833 {
834 if (!readToken(skip))
835 errors_.resize(errorCount); // discard errors caused by recovery
836
837 if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
838 break;
839 }
840
841 errors_.resize(errorCount);
842 return false;
843}
844
845bool
846Reader::addErrorAndRecover(std::string const& message, Token& token, TokenType skipUntilToken)
847{
848 addError(message, token);
849 return recoverFromError(skipUntilToken);
850}
851
852Value&
854{
855 return *(nodes_.top());
856}
857
860{
861 if (current_ == end_)
862 return 0;
863
864 return *current_++;
865}
866
867void
868Reader::getLocationLineAndColumn(Location location, int& line, int& column) const
869{
870 Location current = begin_;
871 Location lastLineStart = current;
872 line = 0;
873
874 while (current < location && current != end_)
875 {
876 Char const c = *current++;
877
878 if (c == '\r')
879 {
880 if (*current == '\n')
881 ++current;
882
883 lastLineStart = current;
884 ++line;
885 }
886 else if (c == '\n')
887 {
888 lastLineStart = current;
889 ++line;
890 }
891 }
892
893 // column & line start at 1
894 column = int(location - lastLineStart) + 1;
895 ++line;
896}
897
900{
901 int line = 0, column = 0;
902 getLocationLineAndColumn(location, line, column);
903 return "Line " + std::to_string(line) + ", Column " + std::to_string(column);
904}
905
908{
909 std::string formattedMessage;
910
911 for (Errors::const_iterator itError = errors_.begin(); itError != errors_.end(); ++itError)
912 {
913 ErrorInfo const& error = *itError;
914 formattedMessage += "* " + getLocationLineAndColumn(error.token_.start_) + "\n";
915 formattedMessage += " " + error.message_ + "\n";
916
917 if (error.extra_ != nullptr)
918 formattedMessage += "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
919 }
920
921 return formattedMessage;
922}
923
926{
927 Json::Reader reader;
928 bool const ok = reader.parse(sin, root);
929
930 // XRPL_ASSERT(ok, "Json::operator>>() : parse succeeded");
931 if (!ok)
932 xrpl::Throw<std::runtime_error>(reader.getFormattedErrorMessages());
933
934 return sin;
935}
936
937} // namespace Json
T begin(T... args)
T c_str(T... args)
Unserialize a JSON document into a Value.
Definition json_reader.h:17
void skipCommentTokens(Token &token)
bool addErrorAndRecover(std::string const &message, Token &token, TokenType skipUntilToken)
bool decodeDouble(Token &token)
Location current_
Location begin_
bool match(Location pattern, int patternLength)
bool expectToken(TokenType type, Token &token, char const *message)
bool decodeNumber(Token &token)
Char const * Location
Definition json_reader.h:20
bool decodeUnicodeEscapeSequence(Token &token, Location &current, Location end, unsigned int &unicode)
Location lastValueEnd_
std::string getFormattedErrorMessages() const
Returns a user friendly string that list errors in the parsed document.
bool readCppStyleComment()
Location end_
static constexpr unsigned nest_limit
Definition json_reader.h:70
bool readToken(Token &token)
bool readValue(unsigned depth)
Value & currentValue()
Value * lastValue_
void getLocationLineAndColumn(Location location, int &line, int &column) const
bool recoverFromError(TokenType skipUntilToken)
bool parse(std::string const &document, Value &root)
Read a Value from a JSON document.
bool decodeUnicodeCodePoint(Token &token, Location &current, Location end, unsigned int &unicode)
bool decodeString(Token &token)
bool readArray(Token &token, unsigned depth)
bool addError(std::string const &message, Token &token, Location extra=0)
bool readCStyleComment()
Reader::TokenType readNumber()
bool readObject(Token &token, unsigned depth)
std::string document_
Represents a JSON value.
Definition json_value.h:130
Json::UInt UInt
Definition json_value.h:137
Json::Int Int
Definition json_value.h:138
static constexpr Int maxInt
Definition json_value.h:143
static constexpr UInt maxUInt
Definition json_value.h:144
T clear(T... args)
T empty(T... args)
T end(T... args)
T find(T... args)
T getline(T... args)
T is_same_v
JSON (JavaScript Object Notation).
Definition json_errors.h:5
std::istream & operator>>(std::istream &, Value &)
Read from 'sin' into 'root'.
@ arrayValue
array value (ordered list)
Definition json_value.h:25
@ objectValue
object value (collection of name/value pairs).
Definition json_value.h:26
static std::string codePointToUTF8(unsigned int cp)
T pop(T... args)
T push_back(T... args)
T push(T... args)
T reserve(T... args)
T resize(T... args)
T length(T... args)
T to_string(T... args)
T top(T... args)