rippled
Loading...
Searching...
No Matches
json_reader.cpp
1#include <xrpl/basics/contract.h>
2#include <xrpl/json/json_reader.h>
3#include <xrpl/json/json_value.h>
4
5#include <algorithm>
6#include <cctype>
7#include <cstdint>
8#include <cstdio>
9#include <cstring>
10#include <istream>
11#include <stdexcept>
12#include <string>
13
14namespace Json {
15// Implementation of class Reader
16// ////////////////////////////////
17
18static std::string
19codePointToUTF8(unsigned int cp)
20{
21 std::string result;
22
23 // based on description from http://en.wikipedia.org/wiki/UTF-8
24
25 if (cp <= 0x7f)
26 {
27 result.resize(1);
28 result[0] = static_cast<char>(cp);
29 }
30 else if (cp <= 0x7FF)
31 {
32 result.resize(2);
33 result[1] = static_cast<char>(0x80 | (0x3f & cp));
34 result[0] = static_cast<char>(0xC0 | (0x1f & (cp >> 6)));
35 }
36 else if (cp <= 0xFFFF)
37 {
38 result.resize(3);
39 result[2] = static_cast<char>(0x80 | (0x3f & cp));
40 result[1] = 0x80 | static_cast<char>((0x3f & (cp >> 6)));
41 result[0] = 0xE0 | static_cast<char>((0xf & (cp >> 12)));
42 }
43 else if (cp <= 0x10FFFF)
44 {
45 result.resize(4);
46 result[3] = static_cast<char>(0x80 | (0x3f & cp));
47 result[2] = static_cast<char>(0x80 | (0x3f & (cp >> 6)));
48 result[1] = static_cast<char>(0x80 | (0x3f & (cp >> 12)));
49 result[0] = static_cast<char>(0xF0 | (0x7 & (cp >> 18)));
50 }
51
52 return result;
53}
54
55// Class Reader
56// //////////////////////////////////////////////////////////////////
57
58bool
59Reader::parse(std::string const& document, Value& root)
60{
61 document_ = document;
62 char const* begin = document_.c_str();
63 char const* end = begin + document_.length();
64 return parse(begin, end, root);
65}
66
67bool
69{
70 // std::istream_iterator<char> begin(sin);
71 // std::istream_iterator<char> end;
72 // Those would allow streamed input from a file, if parse() were a
73 // template function.
74
75 // Since std::string is reference-counted, this at least does not
76 // create an extra copy.
77 std::string doc;
78 std::getline(sin, doc, (char)EOF);
79 return parse(doc, root);
80}
81
82bool
83Reader::parse(char const* beginDoc, char const* endDoc, Value& root)
84{
85 begin_ = beginDoc;
86 end_ = endDoc;
88 lastValueEnd_ = 0;
89 lastValue_ = 0;
90 errors_.clear();
91
92 while (!nodes_.empty())
93 nodes_.pop();
94
95 nodes_.push(&root);
96 bool successful = readValue(0);
97 Token token;
98 skipCommentTokens(token);
99
100 if (!root.isNull() && !root.isArray() && !root.isObject())
101 {
102 // Set error location to start of doc, ideally should be first token
103 // found in doc
104 token.type_ = tokenError;
105 token.start_ = beginDoc;
106 token.end_ = endDoc;
107 addError(
108 "A valid JSON document must be either an array or an object value.",
109 token);
110 return false;
111 }
112
113 return successful;
114}
115
116bool
117Reader::readValue(unsigned depth)
118{
119 Token token;
120 skipCommentTokens(token);
121 if (depth > nest_limit)
122 return addError("Syntax error: maximum nesting depth exceeded", token);
123 bool successful = true;
124
125 switch (token.type_)
126 {
127 case tokenObjectBegin:
128 successful = readObject(token, depth);
129 break;
130
131 case tokenArrayBegin:
132 successful = readArray(token, depth);
133 break;
134
135 case tokenInteger:
136 successful = decodeNumber(token);
137 break;
138
139 case tokenDouble:
140 successful = decodeDouble(token);
141 break;
142
143 case tokenString:
144 successful = decodeString(token);
145 break;
146
147 case tokenTrue:
148 currentValue() = true;
149 break;
150
151 case tokenFalse:
152 currentValue() = false;
153 break;
154
155 case tokenNull:
156 currentValue() = Value();
157 break;
158
159 default:
160 return addError(
161 "Syntax error: value, object or array expected.", token);
162 }
163
164 return successful;
165}
166
167void
169{
170 do
171 {
172 readToken(token);
173 } while (token.type_ == tokenComment);
174}
175
176bool
177Reader::expectToken(TokenType type, Token& token, char const* message)
178{
179 readToken(token);
180
181 if (token.type_ != type)
182 return addError(message, token);
183
184 return true;
185}
186
187bool
189{
190 skipSpaces();
191 token.start_ = current_;
192 Char c = getNextChar();
193 bool ok = true;
194
195 switch (c)
196 {
197 case '{':
198 token.type_ = tokenObjectBegin;
199 break;
200
201 case '}':
202 token.type_ = tokenObjectEnd;
203 break;
204
205 case '[':
206 token.type_ = tokenArrayBegin;
207 break;
208
209 case ']':
210 token.type_ = tokenArrayEnd;
211 break;
212
213 case '"':
214 token.type_ = tokenString;
215 ok = readString();
216 break;
217
218 case '/':
219 token.type_ = tokenComment;
220 ok = readComment();
221 break;
222
223 case '0':
224 case '1':
225 case '2':
226 case '3':
227 case '4':
228 case '5':
229 case '6':
230 case '7':
231 case '8':
232 case '9':
233 case '-':
234 token.type_ = readNumber();
235 break;
236
237 case 't':
238 token.type_ = tokenTrue;
239 ok = match("rue", 3);
240 break;
241
242 case 'f':
243 token.type_ = tokenFalse;
244 ok = match("alse", 4);
245 break;
246
247 case 'n':
248 token.type_ = tokenNull;
249 ok = match("ull", 3);
250 break;
251
252 case ',':
254 break;
255
256 case ':':
258 break;
259
260 case 0:
261 token.type_ = tokenEndOfStream;
262 break;
263
264 default:
265 ok = false;
266 break;
267 }
268
269 if (!ok)
270 token.type_ = tokenError;
271
272 token.end_ = current_;
273 return true;
274}
275
276void
278{
279 while (current_ != end_)
280 {
281 Char c = *current_;
282
283 if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
284 ++current_;
285 else
286 break;
287 }
288}
289
290bool
291Reader::match(Location pattern, int patternLength)
292{
293 if (end_ - current_ < patternLength)
294 return false;
295
296 int index = patternLength;
297
298 while (index--)
299 if (current_[index] != pattern[index])
300 return false;
301
302 current_ += patternLength;
303 return true;
304}
305
306bool
308{
309 Char c = getNextChar();
310
311 if (c == '*')
312 return readCStyleComment();
313
314 if (c == '/')
315 return readCppStyleComment();
316
317 return false;
318}
319
320bool
322{
323 while (current_ != end_)
324 {
325 Char c = getNextChar();
326
327 if (c == '*' && *current_ == '/')
328 break;
329 }
330
331 return getNextChar() == '/';
332}
333
334bool
336{
337 while (current_ != end_)
338 {
339 Char c = getNextChar();
340
341 if (c == '\r' || c == '\n')
342 break;
343 }
344
345 return true;
346}
347
350{
351 static char const extended_tokens[] = {'.', 'e', 'E', '+', '-'};
352
353 TokenType type = tokenInteger;
354
355 if (current_ != end_)
356 {
357 if (*current_ == '-')
358 ++current_;
359
360 while (current_ != end_)
361 {
362 if (!std::isdigit(static_cast<unsigned char>(*current_)))
363 {
364 auto ret = std::find(
365 std::begin(extended_tokens),
366 std::end(extended_tokens),
367 *current_);
368
369 if (ret == std::end(extended_tokens))
370 break;
371
372 type = tokenDouble;
373 }
374
375 ++current_;
376 }
377 }
378
379 return type;
380}
381
382bool
384{
385 Char c = 0;
386
387 while (current_ != end_)
388 {
389 c = getNextChar();
390
391 if (c == '\\')
392 getNextChar();
393 else if (c == '"')
394 break;
395 }
396
397 return c == '"';
398}
399
400bool
401Reader::readObject(Token& tokenStart, unsigned depth)
402{
403 Token tokenName;
404 std::string name;
406
407 while (readToken(tokenName))
408 {
409 bool initialTokenOk = true;
410
411 while (tokenName.type_ == tokenComment && initialTokenOk)
412 initialTokenOk = readToken(tokenName);
413
414 if (!initialTokenOk)
415 break;
416
417 if (tokenName.type_ == tokenObjectEnd && name.empty()) // empty object
418 return true;
419
420 if (tokenName.type_ != tokenString)
421 break;
422
423 name = "";
424
425 if (!decodeString(tokenName, name))
427
428 Token colon;
429
430 if (!readToken(colon) || colon.type_ != tokenMemberSeparator)
431 {
432 return addErrorAndRecover(
433 "Missing ':' after object member name", colon, tokenObjectEnd);
434 }
435
436 // Reject duplicate names
437 if (currentValue().isMember(name))
438 return addError("Key '" + name + "' appears twice.", tokenName);
439
440 Value& value = currentValue()[name];
441 nodes_.push(&value);
442 bool ok = readValue(depth + 1);
443 nodes_.pop();
444
445 if (!ok) // error already set
447
448 Token comma;
449
450 if (!readToken(comma) ||
451 (comma.type_ != tokenObjectEnd &&
452 comma.type_ != tokenArraySeparator && comma.type_ != tokenComment))
453 {
454 return addErrorAndRecover(
455 "Missing ',' or '}' in object declaration",
456 comma,
458 }
459
460 bool finalizeTokenOk = true;
461
462 while (comma.type_ == tokenComment && finalizeTokenOk)
463 finalizeTokenOk = readToken(comma);
464
465 if (comma.type_ == tokenObjectEnd)
466 return true;
467 }
468
469 return addErrorAndRecover(
470 "Missing '}' or object member name", tokenName, tokenObjectEnd);
471}
472
473bool
474Reader::readArray(Token& tokenStart, unsigned depth)
475{
477 skipSpaces();
478
479 if (*current_ == ']') // empty array
480 {
481 Token endArray;
482 readToken(endArray);
483 return true;
484 }
485
486 int index = 0;
487
488 while (true)
489 {
490 Value& value = currentValue()[index++];
491 nodes_.push(&value);
492 bool ok = readValue(depth + 1);
493 nodes_.pop();
494
495 if (!ok) // error already set
497
498 Token token;
499 // Accept Comment after last item in the array.
500 ok = readToken(token);
501
502 while (token.type_ == tokenComment && ok)
503 {
504 ok = readToken(token);
505 }
506
507 bool badTokenType =
508 (token.type_ != tokenArraySeparator &&
509 token.type_ != tokenArrayEnd);
510
511 if (!ok || badTokenType)
512 {
513 return addErrorAndRecover(
514 "Missing ',' or ']' in array declaration",
515 token,
517 }
518
519 if (token.type_ == tokenArrayEnd)
520 break;
521 }
522
523 return true;
524}
525
526bool
528{
529 Location current = token.start_;
530 bool isNegative = *current == '-';
531
532 if (isNegative)
533 ++current;
534
535 if (current == token.end_)
536 {
537 return addError(
538 "'" + std::string(token.start_, token.end_) +
539 "' is not a valid number.",
540 token);
541 }
542
543 // The existing Json integers are 32-bit so using a 64-bit value here avoids
544 // overflows in the conversion code below.
545 std::int64_t value = 0;
546
547 static_assert(
548 sizeof(value) > sizeof(Value::maxUInt),
549 "The JSON integer overflow logic will need to be reworked.");
550
551 while (current < token.end_ && (value <= Value::maxUInt))
552 {
553 Char c = *current++;
554
555 if (c < '0' || c > '9')
556 {
557 return addError(
558 "'" + std::string(token.start_, token.end_) +
559 "' is not a number.",
560 token);
561 }
562
563 value = (value * 10) + (c - '0');
564 }
565
566 // More tokens left -> input is larger than largest possible return value
567 if (current != token.end_)
568 {
569 return addError(
570 "'" + std::string(token.start_, token.end_) +
571 "' exceeds the allowable range.",
572 token);
573 }
574
575 if (isNegative)
576 {
577 value = -value;
578
579 if (value < Value::minInt || value > Value::maxInt)
580 {
581 return addError(
582 "'" + std::string(token.start_, token.end_) +
583 "' exceeds the allowable range.",
584 token);
585 }
586
587 currentValue() = static_cast<Value::Int>(value);
588 }
589 else
590 {
591 if (value > Value::maxUInt)
592 {
593 return addError(
594 "'" + std::string(token.start_, token.end_) +
595 "' exceeds the allowable range.",
596 token);
597 }
598
599 // If it's representable as a signed integer, construct it as one.
600 if (value <= Value::maxInt)
601 currentValue() = static_cast<Value::Int>(value);
602 else
603 currentValue() = static_cast<Value::UInt>(value);
604 }
605
606 return true;
607}
608
609bool
611{
612 double value = 0;
613 int const bufferSize = 32;
614 int count;
615 int length = int(token.end_ - token.start_);
616 // Sanity check to avoid buffer overflow exploits.
617 if (length < 0)
618 {
619 return addError("Unable to parse token length", token);
620 }
621 // Avoid using a string constant for the format control string given to
622 // sscanf, as this can cause hard to debug crashes on OS X. See here for
623 // more info:
624 //
625 // http://developer.apple.com/library/mac/#DOCUMENTATION/DeveloperTools/gcc-4.0.1/gcc/Incompatibilities.html
626 char format[] = "%lf";
627 if (length <= bufferSize)
628 {
629 Char buffer[bufferSize + 1];
630 memcpy(buffer, token.start_, length);
631 buffer[length] = 0;
632 count = sscanf(buffer, format, &value);
633 }
634 else
635 {
636 std::string buffer(token.start_, token.end_);
637 count = sscanf(buffer.c_str(), format, &value);
638 }
639 if (count != 1)
640 return addError(
641 "'" + std::string(token.start_, token.end_) + "' is not a number.",
642 token);
643 currentValue() = value;
644 return true;
645}
646
647bool
649{
650 std::string decoded;
651
652 if (!decodeString(token, decoded))
653 return false;
654
655 currentValue() = decoded;
656 return true;
657}
658
659bool
661{
662 decoded.reserve(token.end_ - token.start_ - 2);
663 Location current = token.start_ + 1; // skip '"'
664 Location end = token.end_ - 1; // do not include '"'
665
666 while (current != end)
667 {
668 Char c = *current++;
669
670 if (c == '"')
671 break;
672 else if (c == '\\')
673 {
674 if (current == end)
675 return addError(
676 "Empty escape sequence in string", token, current);
677
678 Char escape = *current++;
679
680 switch (escape)
681 {
682 case '"':
683 decoded += '"';
684 break;
685
686 case '/':
687 decoded += '/';
688 break;
689
690 case '\\':
691 decoded += '\\';
692 break;
693
694 case 'b':
695 decoded += '\b';
696 break;
697
698 case 'f':
699 decoded += '\f';
700 break;
701
702 case 'n':
703 decoded += '\n';
704 break;
705
706 case 'r':
707 decoded += '\r';
708 break;
709
710 case 't':
711 decoded += '\t';
712 break;
713
714 case 'u': {
715 unsigned int unicode;
716
717 if (!decodeUnicodeCodePoint(token, current, end, unicode))
718 return false;
719
720 decoded += codePointToUTF8(unicode);
721 }
722 break;
723
724 default:
725 return addError(
726 "Bad escape sequence in string", token, current);
727 }
728 }
729 else
730 {
731 decoded += c;
732 }
733 }
734
735 return true;
736}
737
738bool
740 Token& token,
741 Location& current,
742 Location end,
743 unsigned int& unicode)
744{
745 if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
746 return false;
747
748 if (unicode >= 0xD800 && unicode <= 0xDBFF)
749 {
750 // surrogate pairs
751 if (end - current < 6)
752 return addError(
753 "additional six characters expected to parse unicode surrogate "
754 "pair.",
755 token,
756 current);
757
758 unsigned int surrogatePair;
759
760 if (*(current++) == '\\' && *(current++) == 'u')
761 {
762 if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair))
763 {
764 unicode = 0x10000 + ((unicode & 0x3FF) << 10) +
765 (surrogatePair & 0x3FF);
766 }
767 else
768 return false;
769 }
770 else
771 return addError(
772 "expecting another \\u token to begin the second half of a "
773 "unicode surrogate pair",
774 token,
775 current);
776 }
777
778 return true;
779}
780
781bool
783 Token& token,
784 Location& current,
785 Location end,
786 unsigned int& unicode)
787{
788 if (end - current < 4)
789 return addError(
790 "Bad unicode escape sequence in string: four digits expected.",
791 token,
792 current);
793
794 unicode = 0;
795
796 for (int index = 0; index < 4; ++index)
797 {
798 Char c = *current++;
799 unicode *= 16;
800
801 if (c >= '0' && c <= '9')
802 unicode += c - '0';
803 else if (c >= 'a' && c <= 'f')
804 unicode += c - 'a' + 10;
805 else if (c >= 'A' && c <= 'F')
806 unicode += c - 'A' + 10;
807 else
808 return addError(
809 "Bad unicode escape sequence in string: hexadecimal digit "
810 "expected.",
811 token,
812 current);
813 }
814
815 return true;
816}
817
818bool
819Reader::addError(std::string const& message, Token& token, Location extra)
820{
821 ErrorInfo info;
822 info.token_ = token;
823 info.message_ = message;
824 info.extra_ = extra;
825 errors_.push_back(info);
826 return false;
827}
828
829bool
831{
832 int errorCount = int(errors_.size());
833 Token skip;
834
835 while (true)
836 {
837 if (!readToken(skip))
838 errors_.resize(errorCount); // discard errors caused by recovery
839
840 if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
841 break;
842 }
843
844 errors_.resize(errorCount);
845 return false;
846}
847
848bool
850 std::string const& message,
851 Token& token,
852 TokenType skipUntilToken)
853{
854 addError(message, token);
855 return recoverFromError(skipUntilToken);
856}
857
858Value&
860{
861 return *(nodes_.top());
862}
863
866{
867 if (current_ == end_)
868 return 0;
869
870 return *current_++;
871}
872
873void
874Reader::getLocationLineAndColumn(Location location, int& line, int& column)
875 const
876{
877 Location current = begin_;
878 Location lastLineStart = current;
879 line = 0;
880
881 while (current < location && current != end_)
882 {
883 Char c = *current++;
884
885 if (c == '\r')
886 {
887 if (*current == '\n')
888 ++current;
889
890 lastLineStart = current;
891 ++line;
892 }
893 else if (c == '\n')
894 {
895 lastLineStart = current;
896 ++line;
897 }
898 }
899
900 // column & line start at 1
901 column = int(location - lastLineStart) + 1;
902 ++line;
903}
904
907{
908 int line, column;
909 getLocationLineAndColumn(location, line, column);
910 return "Line " + std::to_string(line) + ", Column " +
911 std::to_string(column);
912}
913
916{
917 std::string formattedMessage;
918
919 for (Errors::const_iterator itError = errors_.begin();
920 itError != errors_.end();
921 ++itError)
922 {
923 ErrorInfo const& error = *itError;
924 formattedMessage +=
925 "* " + getLocationLineAndColumn(error.token_.start_) + "\n";
926 formattedMessage += " " + error.message_ + "\n";
927
928 if (error.extra_)
929 formattedMessage += "See " +
930 getLocationLineAndColumn(error.extra_) + " for detail.\n";
931 }
932
933 return formattedMessage;
934}
935
938{
939 Json::Reader reader;
940 bool ok = reader.parse(sin, root);
941
942 // XRPL_ASSERT(ok, "Json::operator>>() : parse succeeded");
943 if (!ok)
944 ripple::Throw<std::runtime_error>(reader.getFormatedErrorMessages());
945
946 return sin;
947}
948
949} // namespace Json
T begin(T... args)
T c_str(T... args)
Unserialize a JSON document into a Value.
Definition json_reader.h:20
void skipCommentTokens(Token &token)
bool addErrorAndRecover(std::string const &message, Token &token, TokenType skipUntilToken)
bool decodeDouble(Token &token)
std::string getFormatedErrorMessages() const
Returns a user friendly string that list errors in the parsed document.
Location current_
Location begin_
bool match(Location pattern, int patternLength)
bool expectToken(TokenType type, Token &token, char const *message)
bool decodeNumber(Token &token)
Char const * Location
Definition json_reader.h:23
bool decodeUnicodeEscapeSequence(Token &token, Location &current, Location end, unsigned int &unicode)
Location lastValueEnd_
bool readCppStyleComment()
Location end_
static constexpr unsigned nest_limit
Definition json_reader.h:73
bool readToken(Token &token)
bool readValue(unsigned depth)
Value & currentValue()
Value * lastValue_
void getLocationLineAndColumn(Location location, int &line, int &column) const
bool recoverFromError(TokenType skipUntilToken)
bool parse(std::string const &document, Value &root)
Read a Value from a JSON document.
bool decodeUnicodeCodePoint(Token &token, Location &current, Location end, unsigned int &unicode)
bool decodeString(Token &token)
bool readArray(Token &token, unsigned depth)
bool addError(std::string const &message, Token &token, Location extra=0)
bool readCStyleComment()
Reader::TokenType readNumber()
bool readObject(Token &token, unsigned depth)
std::string document_
Represents a JSON value.
Definition json_value.h:130
Json::UInt UInt
Definition json_value.h:137
static UInt const maxUInt
Definition json_value.h:144
Json::Int Int
Definition json_value.h:138
static Int const maxInt
Definition json_value.h:143
T clear(T... args)
T empty(T... args)
T end(T... args)
T find(T... args)
T getline(T... args)
T is_same_v
JSON (JavaScript Object Notation).
Definition json_errors.h:6
std::istream & operator>>(std::istream &, Value &)
Read from 'sin' into 'root'.
@ arrayValue
array value (ordered list)
Definition json_value.h:25
@ objectValue
object value (collection of name/value pairs).
Definition json_value.h:26
static std::string codePointToUTF8(unsigned int cp)
T pop(T... args)
T push_back(T... args)
T push(T... args)
T reserve(T... args)
T resize(T... args)
T length(T... args)
T to_string(T... args)
T top(T... args)