xrpld
Loading...
Searching...
No Matches
json_reader.cpp
1#include <xrpl/json/json_reader.h>
2
3#include <xrpl/basics/contract.h>
4#include <xrpl/json/json_value.h>
5
6#include <algorithm>
7#include <cctype>
8#include <cstdint>
9#include <cstdio>
10#include <cstring>
11#include <istream>
12#include <stdexcept>
13#include <string>
14
15namespace json {
16// Implementation of class Reader
17// ////////////////////////////////
18
19static std::string
20codePointToUTF8(unsigned int cp)
21{
22 std::string result;
23
24 // based on description from http://en.wikipedia.org/wiki/UTF-8
25
26 if (cp <= 0x7f)
27 {
28 result.resize(1);
29 result[0] = static_cast<char>(cp);
30 }
31 else if (cp <= 0x7FF)
32 {
33 result.resize(2);
34 result[1] = static_cast<char>(0x80 | (0x3f & cp));
35 result[0] = static_cast<char>(0xC0 | (0x1f & (cp >> 6)));
36 }
37 else if (cp <= 0xFFFF)
38 {
39 result.resize(3);
40 result[2] = static_cast<char>(0x80 | (0x3f & cp));
41 result[1] = 0x80 | static_cast<char>((0x3f & (cp >> 6)));
42 result[0] = 0xE0 | static_cast<char>((0xf & (cp >> 12)));
43 }
44 else if (cp <= 0x10FFFF)
45 {
46 result.resize(4);
47 result[3] = static_cast<char>(0x80 | (0x3f & cp));
48 result[2] = static_cast<char>(0x80 | (0x3f & (cp >> 6)));
49 result[1] = static_cast<char>(0x80 | (0x3f & (cp >> 12)));
50 result[0] = static_cast<char>(0xF0 | (0x7 & (cp >> 18)));
51 }
52
53 return result;
54}
55
56// Class Reader
57// //////////////////////////////////////////////////////////////////
58
59bool
60Reader::parse(std::string const& document, Value& root)
61{
62 document_ = document;
63 char const* begin = document_.c_str();
64 char const* end = begin + document_.length();
65 return parse(begin, end, root);
66}
67
68bool
70{
71 // std::istream_iterator<char> begin(sin);
72 // std::istream_iterator<char> end;
73 // Those would allow streamed input from a file, if parse() were a
74 // template function.
75
76 // Since std::string is reference-counted, this at least does not
77 // create an extra copy.
78 std::string doc;
79 std::getline(sin, doc, (char)EOF);
80 return parse(doc, root);
81}
82
83bool
84Reader::parse(char const* beginDoc, char const* endDoc, Value& root)
85{
86 begin_ = beginDoc;
87 end_ = endDoc;
89 lastValueEnd_ = 0;
90 lastValue_ = 0;
91 errors_.clear();
92
93 while (!nodes_.empty())
94 nodes_.pop();
95
96 nodes_.push(&root);
97 bool const successful = readValue(0);
98 Token token{};
99 skipCommentTokens(token);
100
101 if (!root.isNull() && !root.isArray() && !root.isObject())
102 {
103 // Set error location to start of doc, ideally should be first token
104 // found in doc
105 token.type = TokenType::Error;
106 token.start = beginDoc;
107 token.end = endDoc;
108 addError("A valid JSON document must be either an array or an object value.", token);
109 return false;
110 }
111
112 return successful;
113}
114
115bool
116Reader::readValue(unsigned depth)
117{
118 Token token{};
119 skipCommentTokens(token);
120 if (depth > kNestLimit)
121 return addError("Syntax error: maximum nesting depth exceeded", token);
122 bool successful = true;
123
124 switch (token.type)
125 {
127 successful = readObject(token, depth);
128 break;
129
131 successful = readArray(token, depth);
132 break;
133
135 successful = decodeNumber(token);
136 break;
137
139 successful = decodeDouble(token);
140 break;
141
143 successful = decodeString(token);
144 break;
145
146 case TokenType::True:
147 currentValue() = true;
148 break;
149
150 case TokenType::False:
151 currentValue() = false;
152 break;
153
154 case TokenType::Null:
155 currentValue() = Value();
156 break;
157
158 default:
159 return addError("Syntax error: value, object or array expected.", token);
160 }
161
162 return successful;
163}
164
165void
167{
168 do
169 {
170 readToken(token);
171 } while (token.type == TokenType::Comment);
172}
173
174bool
175Reader::expectToken(TokenType type, Token& token, char const* message)
176{
177 readToken(token);
178
179 if (token.type != type)
180 return addError(message, token);
181
182 return true;
183}
184
185bool
187{
188 skipSpaces();
189 token.start = current_;
190 Char const c = getNextChar();
191 bool ok = true;
192
193 switch (c)
194 {
195 case '{':
197 break;
198
199 case '}':
201 break;
202
203 case '[':
205 break;
206
207 case ']':
209 break;
210
211 case '"':
212 token.type = TokenType::String;
213 ok = readString();
214 break;
215
216 case '/':
217 token.type = TokenType::Comment;
218 ok = readComment();
219 break;
220
221 case '0':
222 case '1':
223 case '2':
224 case '3':
225 case '4':
226 case '5':
227 case '6':
228 case '7':
229 case '8':
230 case '9':
231 case '-':
232 token.type = readNumber();
233 break;
234
235 case 't':
236 token.type = TokenType::True;
237 ok = match("rue", 3);
238 break;
239
240 case 'f':
241 token.type = TokenType::False;
242 ok = match("alse", 4); // cspell:disable-line
243 break;
244
245 case 'n':
246 token.type = TokenType::Null;
247 ok = match("ull", 3);
248 break;
249
250 case ',':
252 break;
253
254 case ':':
256 break;
257
258 case 0:
260 break;
261
262 default:
263 ok = false;
264 break;
265 }
266
267 if (!ok)
268 token.type = TokenType::Error;
269
270 token.end = current_;
271 return true;
272}
273
274void
276{
277 while (current_ != end_)
278 {
279 Char const c = *current_;
280
281 if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
282 {
283 ++current_;
284 }
285 else
286 {
287 break;
288 }
289 }
290}
291
292bool
293Reader::match(Location pattern, int patternLength)
294{
295 if (end_ - current_ < patternLength)
296 return false;
297
298 int index = patternLength;
299
300 while ((index--) != 0)
301 {
302 if (current_[index] != pattern[index])
303 return false;
304 }
305
306 current_ += patternLength;
307 return true;
308}
309
310bool
312{
313 Char const c = getNextChar();
314
315 if (c == '*')
316 return readCStyleComment();
317
318 if (c == '/')
319 return readCppStyleComment();
320
321 return false;
322}
323
324bool
326{
327 while (current_ != end_)
328 {
329 Char const c = getNextChar();
330
331 if (c == '*' && *current_ == '/')
332 break;
333 }
334
335 return getNextChar() == '/';
336}
337
338bool
340{
341 while (current_ != end_)
342 {
343 Char const c = getNextChar();
344
345 if (c == '\r' || c == '\n')
346 break;
347 }
348
349 return true;
350}
351
354{
355 static char const kExtendedTokens[] = {'.', 'e', 'E', '+', '-'};
356
358
359 if (current_ != end_)
360 {
361 if (*current_ == '-')
362 ++current_;
363
364 while (current_ != end_)
365 {
366 if (std::isdigit(static_cast<unsigned char>(*current_)) == 0)
367 {
368 auto ret = std::ranges::find(kExtendedTokens, *current_);
369
370 if (ret == std::end(kExtendedTokens))
371 break;
372
373 type = TokenType::Double;
374 }
375
376 ++current_;
377 }
378 }
379
380 return type;
381}
382
383bool
385{
386 Char c = 0;
387
388 while (current_ != end_)
389 {
390 c = getNextChar();
391
392 if (c == '\\')
393 {
394 getNextChar();
395 }
396 else if (c == '"')
397 {
398 break;
399 }
400 }
401
402 return c == '"';
403}
404
405bool
406Reader::readObject(Token& tokenStart, unsigned depth)
407{
408 Token tokenName{};
409 std::string name;
411
412 while (readToken(tokenName))
413 {
414 bool initialTokenOk = true;
415
416 while (tokenName.type == TokenType::Comment && initialTokenOk)
417 initialTokenOk = readToken(tokenName);
418
419 if (!initialTokenOk)
420 break;
421
422 if (tokenName.type == TokenType::ObjectEnd && name.empty()) // empty object
423 return true;
424
425 if (tokenName.type != TokenType::String)
426 break;
427
428 name = "";
429
430 if (!decodeString(tokenName, name))
432
433 Token colon{};
434
435 if (!readToken(colon) || colon.type != TokenType::MemberSeparator)
436 {
437 return addErrorAndRecover(
438 "Missing ':' after object member name", colon, TokenType::ObjectEnd);
439 }
440
441 // Reject duplicate names
442 if (currentValue().isMember(name))
443 return addError("Key '" + name + "' appears twice.", tokenName);
444
445 Value& value = currentValue()[name];
446 nodes_.push(&value);
447 bool const ok = readValue(depth + 1);
448 nodes_.pop();
449
450 if (!ok) // error already set
452
453 Token comma{};
454
455 if (!readToken(comma) ||
457 comma.type != TokenType::Comment))
458 {
459 return addErrorAndRecover(
460 "Missing ',' or '}' in object declaration", comma, TokenType::ObjectEnd);
461 }
462
463 bool finalizeTokenOk = true;
464
465 while (comma.type == TokenType::Comment && finalizeTokenOk)
466 finalizeTokenOk = readToken(comma);
467
468 if (comma.type == TokenType::ObjectEnd)
469 return true;
470 }
471
472 return addErrorAndRecover("Missing '}' or object member name", tokenName, TokenType::ObjectEnd);
473}
474
475bool
476Reader::readArray(Token& tokenStart, unsigned depth)
477{
479 skipSpaces();
480
481 if (*current_ == ']') // empty array
482 {
483 Token endArray{};
484 readToken(endArray);
485 return true;
486 }
487
488 int index = 0;
489
490 while (true)
491 {
492 Value& value = currentValue()[index++];
493 nodes_.push(&value);
494 bool ok = readValue(depth + 1);
495 nodes_.pop();
496
497 if (!ok) // error already set
499
500 Token token{};
501 // Accept Comment after last item in the array.
502 ok = readToken(token);
503
504 while (token.type == TokenType::Comment && ok)
505 {
506 ok = readToken(token);
507 }
508
509 bool const badTokenType =
511
512 if (!ok || badTokenType)
513 {
514 return addErrorAndRecover(
515 "Missing ',' or ']' in array declaration", token, TokenType::ArrayEnd);
516 }
517
518 if (token.type == TokenType::ArrayEnd)
519 break;
520 }
521
522 return true;
523}
524
525bool
527{
528 Location current = token.start;
529 bool const isNegative = *current == '-';
530
531 if (isNegative)
532 ++current;
533
534 if (current == token.end)
535 {
536 return addError(
537 "'" + std::string(token.start, token.end) + "' is not a valid number.", token);
538 }
539
540 // The existing Json integers are 32-bit so using a 64-bit value here avoids
541 // overflows in the conversion code below.
542 std::int64_t value = 0;
543
544 static_assert(
545 sizeof(value) > sizeof(Value::kMaxUInt),
546 "The JSON integer overflow logic will need to be reworked.");
547
548 while (current < token.end && (value <= Value::kMaxUInt))
549 {
550 Char const c = *current++;
551
552 if (c < '0' || c > '9')
553 {
554 return addError(
555 "'" + std::string(token.start, token.end) + "' is not a number.", token);
556 }
557
558 value = (value * 10) + (c - '0');
559 }
560
561 // More tokens left -> input is larger than largest possible return value
562 if (current != token.end)
563 {
564 return addError(
565 "'" + std::string(token.start, token.end) + "' exceeds the allowable range.", token);
566 }
567
568 if (isNegative)
569 {
570 value = -value;
571
572 if (value < Value::kMinInt || value > Value::kMaxInt)
573 {
574 return addError(
575 "'" + std::string(token.start, token.end) + "' exceeds the allowable range.",
576 token);
577 }
578
579 currentValue() = static_cast<Value::Int>(value);
580 }
581 else
582 {
583 if (value > Value::kMaxUInt)
584 {
585 return addError(
586 "'" + std::string(token.start, token.end) + "' exceeds the allowable range.",
587 token);
588 }
589
590 // If it's representable as a signed integer, construct it as one.
591 if (value <= Value::kMaxInt)
592 {
593 currentValue() = static_cast<Value::Int>(value);
594 }
595 else
596 {
597 currentValue() = static_cast<Value::UInt>(value);
598 }
599 }
600
601 return true;
602}
603
604bool
606{
607 double value = 0;
608 int const bufferSize = 32;
609 int count = 0;
610 int const length = int(token.end - token.start);
611 // Sanity check to avoid buffer overflow exploits.
612 if (length < 0)
613 {
614 return addError("Unable to parse token length", token);
615 }
616 // Avoid using a string constant for the format control string given to
617 // sscanf, as this can cause hard to debug crashes on OS X. See here for
618 // more info:
619 //
620 // http://developer.apple.com/library/mac/#DOCUMENTATION/DeveloperTools/gcc-4.0.1/gcc/Incompatibilities.html
621 char format[] = "%lf";
622 if (length <= bufferSize)
623 {
624 Char buffer[bufferSize + 1];
625 memcpy(buffer, token.start, length);
626 buffer[length] = 0;
627 count = sscanf(buffer, format, &value);
628 }
629 else
630 {
631 std::string const buffer(token.start, token.end);
632 count = sscanf(buffer.c_str(), format, &value);
633 }
634 if (count != 1)
635 return addError("'" + std::string(token.start, token.end) + "' is not a number.", token);
636 currentValue() = value;
637 return true;
638}
639
640bool
642{
643 std::string decoded;
644
645 if (!decodeString(token, decoded))
646 return false;
647
648 currentValue() = decoded;
649 return true;
650}
651
652bool
654{
655 decoded.reserve(token.end - token.start - 2);
656 Location current = token.start + 1; // skip '"'
657 Location end = token.end - 1; // do not include '"'
658
659 while (current != end)
660 {
661 Char const c = *current++;
662
663 if (c == '"')
664 {
665 break;
666 }
667 if (c == '\\')
668 {
669 if (current == end)
670 return addError("Empty escape sequence in string", token, current);
671
672 Char const escape = *current++;
673
674 switch (escape)
675 {
676 case '"':
677 decoded += '"';
678 break;
679
680 case '/':
681 decoded += '/';
682 break;
683
684 case '\\':
685 decoded += '\\';
686 break;
687
688 case 'b':
689 decoded += '\b';
690 break;
691
692 case 'f':
693 decoded += '\f';
694 break;
695
696 case 'n':
697 decoded += '\n';
698 break;
699
700 case 'r':
701 decoded += '\r';
702 break;
703
704 case 't':
705 decoded += '\t';
706 break;
707
708 case 'u': {
709 unsigned int unicode = 0;
710
711 if (!decodeUnicodeCodePoint(token, current, end, unicode))
712 return false;
713
714 decoded += codePointToUTF8(unicode);
715 }
716 break;
717
718 default:
719 return addError("Bad escape sequence in string", token, current);
720 }
721 }
722 else
723 {
724 decoded += c;
725 }
726 }
727
728 return true;
729}
730
731bool
732Reader::decodeUnicodeCodePoint(Token& token, Location& current, Location end, unsigned int& unicode)
733{
734 if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
735 return false;
736
737 if (unicode >= 0xD800 && unicode <= 0xDBFF)
738 {
739 // surrogate pairs
740 if (end - current < 6)
741 {
742 return addError(
743 "additional six characters expected to parse unicode surrogate "
744 "pair.",
745 token,
746 current);
747 }
748
749 unsigned int surrogatePair = 0;
750
751 if (*current != '\\' || *(current + 1) != 'u')
752 {
753 return addError(
754 "expecting another \\u token to begin the second half of a unicode surrogate pair",
755 token,
756 current);
757 }
758
759 current += 2; // skip two characters checked above
760
761 if (!decodeUnicodeEscapeSequence(token, current, end, surrogatePair))
762 return false;
763
764 unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
765 }
766
767 return true;
768}
769
770bool
772 Token& token,
773 Location& current,
774 Location end,
775 unsigned int& unicode)
776{
777 if (end - current < 4)
778 {
779 return addError(
780 "Bad unicode escape sequence in string: four digits expected.", token, current);
781 }
782
783 unicode = 0;
784
785 for (int index = 0; index < 4; ++index)
786 {
787 Char const c = *current++;
788 unicode *= 16;
789
790 if (c >= '0' && c <= '9')
791 {
792 unicode += c - '0';
793 }
794 else if (c >= 'a' && c <= 'f')
795 {
796 unicode += c - 'a' + 10;
797 }
798 else if (c >= 'A' && c <= 'F')
799 {
800 unicode += c - 'A' + 10;
801 }
802 else
803 {
804 return addError(
805 "Bad unicode escape sequence in string: hexadecimal digit "
806 "expected.",
807 token,
808 current);
809 }
810 }
811
812 return true;
813}
814
815bool
816Reader::addError(std::string const& message, Token& token, Location extra)
817{
818 ErrorInfo info;
819 info.token = token;
820 info.message = message;
821 info.extra = extra;
822 errors_.push_back(info);
823 return false;
824}
825
826bool
828{
829 int const errorCount = int(errors_.size());
830 Token skip{};
831
832 while (true)
833 {
834 if (!readToken(skip))
835 errors_.resize(errorCount); // discard errors caused by recovery
836
837 if (skip.type == skipUntilToken || skip.type == TokenType::EndOfStream)
838 break;
839 }
840
841 errors_.resize(errorCount);
842 return false;
843}
844
845bool
846Reader::addErrorAndRecover(std::string const& message, Token& token, TokenType skipUntilToken)
847{
848 addError(message, token);
849 return recoverFromError(skipUntilToken);
850}
851
852Value&
854{
855 return *(nodes_.top());
856}
857
860{
861 if (current_ == end_)
862 return 0;
863
864 return *current_++;
865}
866
867void
868Reader::getLocationLineAndColumn(Location location, int& line, int& column) const
869{
870 Location current = begin_;
871 Location lastLineStart = current;
872 line = 0;
873
874 while (current < location && current != end_)
875 {
876 Char const c = *current++;
877
878 if (c == '\r')
879 {
880 if (*current == '\n')
881 ++current;
882
883 lastLineStart = current;
884 ++line;
885 }
886 else if (c == '\n')
887 {
888 lastLineStart = current;
889 ++line;
890 }
891 }
892
893 // column & line start at 1
894 column = int(location - lastLineStart) + 1;
895 ++line;
896}
897
900{
901 int line = 0, column = 0;
902 getLocationLineAndColumn(location, line, column);
903 return "Line " + std::to_string(line) + ", Column " + std::to_string(column);
904}
905
908{
909 std::string formattedMessage;
910
911 for (Errors::const_iterator itError = errors_.begin(); itError != errors_.end(); ++itError)
912 {
913 ErrorInfo const& error = *itError;
914 formattedMessage += "* " + getLocationLineAndColumn(error.token.start) + "\n";
915 formattedMessage += " " + error.message + "\n";
916
917 if (error.extra != nullptr)
918 formattedMessage += "See " + getLocationLineAndColumn(error.extra) + " for detail.\n";
919 }
920
921 return formattedMessage;
922}
923
926{
927 json::Reader reader;
928 bool const ok = reader.parse(sin, root);
929
930 // XRPL_ASSERT(ok, "json::operator>>() : parse succeeded");
931 if (!ok)
933
934 return sin;
935}
936
937} // namespace json
T c_str(T... args)
Unserialize a JSON document into a Value.
Definition json_reader.h:17
bool addError(std::string const &message, Token &token, Location extra=0)
Location current_
bool parse(std::string const &document, Value &root)
Read a Value from a JSON document.
bool recoverFromError(TokenType skipUntilToken)
bool decodeUnicodeCodePoint(Token &token, Location &current, Location end, unsigned int &unicode)
bool readObject(Token &token, unsigned depth)
std::string getFormattedErrorMessages() const
Returns a user friendly string that list errors in the parsed document.
void skipCommentTokens(Token &token)
bool decodeDouble(Token &token)
Location end_
static constexpr unsigned kNestLimit
Definition json_reader.h:70
bool readArray(Token &token, unsigned depth)
Value * lastValue_
Reader::TokenType readNumber()
void getLocationLineAndColumn(Location location, int &line, int &column) const
Char const * Location
Definition json_reader.h:20
bool decodeNumber(Token &token)
Value & currentValue()
Location begin_
std::string document_
bool readValue(unsigned depth)
bool readCStyleComment()
bool readCppStyleComment()
bool expectToken(TokenType type, Token &token, char const *message)
Location lastValueEnd_
bool decodeUnicodeEscapeSequence(Token &token, Location &current, Location end, unsigned int &unicode)
bool addErrorAndRecover(std::string const &message, Token &token, TokenType skipUntilToken)
bool match(Location pattern, int patternLength)
bool readToken(Token &token)
bool decodeString(Token &token)
Represents a JSON value.
Definition json_value.h:130
static constexpr Int kMaxInt
Definition json_value.h:143
json::Int Int
Definition json_value.h:138
json::UInt UInt
Definition json_value.h:137
static constexpr UInt kMaxUInt
Definition json_value.h:144
T empty(T... args)
T end(T... args)
T find(T... args)
T getline(T... args)
JSON (JavaScript Object Notation).
Definition json_errors.h:5
std::istream & operator>>(std::istream &, Value &)
Read from 'sin' into 'root'.
static std::string codePointToUTF8(unsigned int cp)
@ Array
array value (ordered list)
Definition json_value.h:25
@ Object
object value (collection of name/value pairs).
Definition json_value.h:26
XRPL_NO_SANITIZE_ADDRESS void Throw(Args &&... args)
Definition contract.h:49
T reserve(T... args)
T resize(T... args)
LedgerEntryType type
Definition Keylet.h:21
T to_string(T... args)