114 using number_integer_t =
typename BasicJsonType::number_integer_t;
115 using number_unsigned_t =
typename BasicJsonType::number_unsigned_t;
116 using number_float_t =
typename BasicJsonType::number_float_t;
117 using string_t =
typename BasicJsonType::string_t;
118 using char_type =
typename InputAdapterType::char_type;
124 explicit lexer(InputAdapterType&& adapter,
bool ignore_comments_ =
false) noexcept
125 : ia(std::move(adapter))
126 , ignore_comments(ignore_comments_)
127 , decimal_point_char(
static_cast<char_int_type
>(get_decimal_point()))
131 lexer(
const lexer&) =
delete;
132 lexer(lexer&&) =
default;
133 lexer& operator=(lexer&) =
delete;
134 lexer& operator=(lexer&&) =
default;
144 static char get_decimal_point()
noexcept
146 const auto* loc = localeconv();
147 JSON_ASSERT(loc !=
nullptr);
148 return (loc->decimal_point ==
nullptr) ?
'.' : *(loc->decimal_point);
173 JSON_ASSERT(current ==
'u');
176 const auto factors = { 12u, 8u, 4u, 0u };
177 for (
const auto factor : factors)
181 if (current >=
'0' && current <=
'9')
183 codepoint +=
static_cast<int>((
static_cast<unsigned int>(current) - 0x30u) << factor);
185 else if (current >=
'A' && current <=
'F')
187 codepoint +=
static_cast<int>((
static_cast<unsigned int>(current) - 0x37u) << factor);
189 else if (current >=
'a' && current <=
'f')
191 codepoint +=
static_cast<int>((
static_cast<unsigned int>(current) - 0x57u) << factor);
199 JSON_ASSERT(0x0000 <= codepoint && codepoint <= 0xFFFF);
218 bool next_byte_in_range(std::initializer_list<char_int_type> ranges)
220 JSON_ASSERT(ranges.size() == 2 || ranges.size() == 4 || ranges.size() == 6);
223 for (
auto range = ranges.begin(); range != ranges.end(); ++range)
226 if (JSON_HEDLEY_LIKELY(*range <= current && current <= *(++range)))
232 error_message =
"invalid string: ill-formed UTF-8 byte";
255 token_type scan_string()
261 JSON_ASSERT(current ==
'\"');
271 error_message =
"invalid string: missing closing quote";
272 return token_type::parse_error;
278 return token_type::value_string;
322 const int codepoint1 = get_codepoint();
323 int codepoint = codepoint1;
325 if (JSON_HEDLEY_UNLIKELY(codepoint1 == -1))
327 error_message =
"invalid string: '\\u' must be followed by 4 hex digits";
328 return token_type::parse_error;
332 if (0xD800 <= codepoint1 && codepoint1 <= 0xDBFF)
335 if (JSON_HEDLEY_LIKELY(get() ==
'\\' && get() ==
'u'))
337 const int codepoint2 = get_codepoint();
339 if (JSON_HEDLEY_UNLIKELY(codepoint2 == -1))
341 error_message =
"invalid string: '\\u' must be followed by 4 hex digits";
342 return token_type::parse_error;
346 if (JSON_HEDLEY_LIKELY(0xDC00 <= codepoint2 && codepoint2 <= 0xDFFF))
349 codepoint =
static_cast<int>(
351 (
static_cast<unsigned int>(codepoint1) << 10u)
353 +
static_cast<unsigned int>(codepoint2)
361 error_message =
"invalid string: surrogate U+D800..U+DBFF must be followed by U+DC00..U+DFFF";
362 return token_type::parse_error;
367 error_message =
"invalid string: surrogate U+D800..U+DBFF must be followed by U+DC00..U+DFFF";
368 return token_type::parse_error;
373 if (JSON_HEDLEY_UNLIKELY(0xDC00 <= codepoint1 && codepoint1 <= 0xDFFF))
375 error_message =
"invalid string: surrogate U+DC00..U+DFFF must follow U+D800..U+DBFF";
376 return token_type::parse_error;
381 JSON_ASSERT(0x00 <= codepoint && codepoint <= 0x10FFFF);
384 if (codepoint < 0x80)
387 add(
static_cast<char_int_type
>(codepoint));
389 else if (codepoint <= 0x7FF)
392 add(
static_cast<char_int_type
>(0xC0u | (
static_cast<unsigned int>(codepoint) >> 6u)));
393 add(
static_cast<char_int_type
>(0x80u | (
static_cast<unsigned int>(codepoint) & 0x3Fu)));
395 else if (codepoint <= 0xFFFF)
398 add(
static_cast<char_int_type
>(0xE0u | (
static_cast<unsigned int>(codepoint) >> 12u)));
399 add(
static_cast<char_int_type
>(0x80u | ((
static_cast<unsigned int>(codepoint) >> 6u) & 0x3Fu)));
400 add(
static_cast<char_int_type
>(0x80u | (
static_cast<unsigned int>(codepoint) & 0x3Fu)));
405 add(
static_cast<char_int_type
>(0xF0u | (
static_cast<unsigned int>(codepoint) >> 18u)));
406 add(
static_cast<char_int_type
>(0x80u | ((
static_cast<unsigned int>(codepoint) >> 12u) & 0x3Fu)));
407 add(
static_cast<char_int_type
>(0x80u | ((
static_cast<unsigned int>(codepoint) >> 6u) & 0x3Fu)));
408 add(
static_cast<char_int_type
>(0x80u | (
static_cast<unsigned int>(codepoint) & 0x3Fu)));
416 error_message =
"invalid string: forbidden character after backslash";
417 return token_type::parse_error;
426 error_message =
"invalid string: control character U+0000 (NUL) must be escaped to \\u0000";
427 return token_type::parse_error;
432 error_message =
"invalid string: control character U+0001 (SOH) must be escaped to \\u0001";
433 return token_type::parse_error;
438 error_message =
"invalid string: control character U+0002 (STX) must be escaped to \\u0002";
439 return token_type::parse_error;
444 error_message =
"invalid string: control character U+0003 (ETX) must be escaped to \\u0003";
445 return token_type::parse_error;
450 error_message =
"invalid string: control character U+0004 (EOT) must be escaped to \\u0004";
451 return token_type::parse_error;
456 error_message =
"invalid string: control character U+0005 (ENQ) must be escaped to \\u0005";
457 return token_type::parse_error;
462 error_message =
"invalid string: control character U+0006 (ACK) must be escaped to \\u0006";
463 return token_type::parse_error;
468 error_message =
"invalid string: control character U+0007 (BEL) must be escaped to \\u0007";
469 return token_type::parse_error;
474 error_message =
"invalid string: control character U+0008 (BS) must be escaped to \\u0008 or \\b";
475 return token_type::parse_error;
480 error_message =
"invalid string: control character U+0009 (HT) must be escaped to \\u0009 or \\t";
481 return token_type::parse_error;
486 error_message =
"invalid string: control character U+000A (LF) must be escaped to \\u000A or \\n";
487 return token_type::parse_error;
492 error_message =
"invalid string: control character U+000B (VT) must be escaped to \\u000B";
493 return token_type::parse_error;
498 error_message =
"invalid string: control character U+000C (FF) must be escaped to \\u000C or \\f";
499 return token_type::parse_error;
504 error_message =
"invalid string: control character U+000D (CR) must be escaped to \\u000D or \\r";
505 return token_type::parse_error;
510 error_message =
"invalid string: control character U+000E (SO) must be escaped to \\u000E";
511 return token_type::parse_error;
516 error_message =
"invalid string: control character U+000F (SI) must be escaped to \\u000F";
517 return token_type::parse_error;
522 error_message =
"invalid string: control character U+0010 (DLE) must be escaped to \\u0010";
523 return token_type::parse_error;
528 error_message =
"invalid string: control character U+0011 (DC1) must be escaped to \\u0011";
529 return token_type::parse_error;
534 error_message =
"invalid string: control character U+0012 (DC2) must be escaped to \\u0012";
535 return token_type::parse_error;
540 error_message =
"invalid string: control character U+0013 (DC3) must be escaped to \\u0013";
541 return token_type::parse_error;
546 error_message =
"invalid string: control character U+0014 (DC4) must be escaped to \\u0014";
547 return token_type::parse_error;
552 error_message =
"invalid string: control character U+0015 (NAK) must be escaped to \\u0015";
553 return token_type::parse_error;
558 error_message =
"invalid string: control character U+0016 (SYN) must be escaped to \\u0016";
559 return token_type::parse_error;
564 error_message =
"invalid string: control character U+0017 (ETB) must be escaped to \\u0017";
565 return token_type::parse_error;
570 error_message =
"invalid string: control character U+0018 (CAN) must be escaped to \\u0018";
571 return token_type::parse_error;
576 error_message =
"invalid string: control character U+0019 (EM) must be escaped to \\u0019";
577 return token_type::parse_error;
582 error_message =
"invalid string: control character U+001A (SUB) must be escaped to \\u001A";
583 return token_type::parse_error;
588 error_message =
"invalid string: control character U+001B (ESC) must be escaped to \\u001B";
589 return token_type::parse_error;
594 error_message =
"invalid string: control character U+001C (FS) must be escaped to \\u001C";
595 return token_type::parse_error;
600 error_message =
"invalid string: control character U+001D (GS) must be escaped to \\u001D";
601 return token_type::parse_error;
606 error_message =
"invalid string: control character U+001E (RS) must be escaped to \\u001E";
607 return token_type::parse_error;
612 error_message =
"invalid string: control character U+001F (US) must be escaped to \\u001F";
613 return token_type::parse_error;
748 if (JSON_HEDLEY_UNLIKELY(!next_byte_in_range({0x80, 0xBF})))
750 return token_type::parse_error;
758 if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0xA0, 0xBF, 0x80, 0xBF}))))
760 return token_type::parse_error;
782 if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0x80, 0xBF, 0x80, 0xBF}))))
784 return token_type::parse_error;
792 if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0x80, 0x9F, 0x80, 0xBF}))))
794 return token_type::parse_error;
802 if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0x90, 0xBF, 0x80, 0xBF, 0x80, 0xBF}))))
804 return token_type::parse_error;
814 if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF}))))
816 return token_type::parse_error;
824 if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0x80, 0x8F, 0x80, 0xBF, 0x80, 0xBF}))))
826 return token_type::parse_error;
834 error_message =
"invalid string: ill-formed UTF-8 byte";
835 return token_type::parse_error;
878 error_message =
"invalid comment; missing closing '*/'";
906 error_message =
"invalid comment; expecting '/' or '*' after '/'";
912 JSON_HEDLEY_NON_NULL(2)
913 static void strtof(
float& f,
const char* str,
char** endptr)
noexcept
915 f = std::strtof(str, endptr);
918 JSON_HEDLEY_NON_NULL(2)
919 static void strtof(
double& f,
const char* str,
char** endptr)
noexcept
921 f = std::strtod(str, endptr);
924 JSON_HEDLEY_NON_NULL(2)
925 static void strtof(
long double& f,
const char* str,
char** endptr)
noexcept
927 f = std::strtold(str, endptr);
970 token_type scan_number()
977 token_type number_type = token_type::value_unsigned;
985 goto scan_number_minus;
991 goto scan_number_zero;
1005 goto scan_number_any1;
1015 number_type = token_type::value_integer;
1021 goto scan_number_zero;
1035 goto scan_number_any1;
1040 error_message =
"invalid number; expected digit after '-'";
1041 return token_type::parse_error;
1051 add(decimal_point_char);
1052 decimal_point_position = token_buffer.size() - 1;
1053 goto scan_number_decimal1;
1060 goto scan_number_exponent;
1064 goto scan_number_done;
1083 goto scan_number_any1;
1088 add(decimal_point_char);
1089 decimal_point_position = token_buffer.size() - 1;
1090 goto scan_number_decimal1;
1097 goto scan_number_exponent;
1101 goto scan_number_done;
1104scan_number_decimal1:
1106 number_type = token_type::value_float;
1121 goto scan_number_decimal2;
1126 error_message =
"invalid number; expected digit after '.'";
1127 return token_type::parse_error;
1131scan_number_decimal2:
1147 goto scan_number_decimal2;
1154 goto scan_number_exponent;
1158 goto scan_number_done;
1161scan_number_exponent:
1163 number_type = token_type::value_float;
1170 goto scan_number_sign;
1185 goto scan_number_any2;
1191 "invalid number; expected '+', '-', or digit after exponent";
1192 return token_type::parse_error;
1212 goto scan_number_any2;
1217 error_message =
"invalid number; expected digit after exponent sign";
1218 return token_type::parse_error;
1238 goto scan_number_any2;
1242 goto scan_number_done;
1250 char* endptr =
nullptr;
1254 if (number_type == token_type::value_unsigned)
1256 const auto x = std::strtoull(token_buffer.data(), &endptr, 10);
1259 JSON_ASSERT(endptr == token_buffer.data() + token_buffer.size());
1261 if (errno != ERANGE)
1263 value_unsigned =
static_cast<number_unsigned_t
>(x);
1264 if (value_unsigned == x)
1266 return token_type::value_unsigned;
1270 else if (number_type == token_type::value_integer)
1272 const auto x = std::strtoll(token_buffer.data(), &endptr, 10);
1275 JSON_ASSERT(endptr == token_buffer.data() + token_buffer.size());
1277 if (errno != ERANGE)
1279 value_integer =
static_cast<number_integer_t
>(x);
1280 if (value_integer == x)
1282 return token_type::value_integer;
1289 strtof(value_float, token_buffer.data(), &endptr);
1292 JSON_ASSERT(endptr == token_buffer.data() + token_buffer.size());
1294 return token_type::value_float;
1302 JSON_HEDLEY_NON_NULL(2)
1303 token_type scan_literal(
const char_type* literal_text,
const std::size_t length,
1304 token_type return_type)
1307 for (std::size_t i = 1; i < length; ++i)
1311 error_message =
"invalid literal";
1312 return token_type::parse_error;
1323 void reset()
noexcept
1325 token_buffer.clear();
1326 token_string.clear();
1327 decimal_point_position = std::string::npos;
1343 ++position.chars_read_total;
1344 ++position.chars_read_current_line;
1353 current = ia.get_character();
1361 if (current ==
'\n')
1363 ++position.lines_read;
1364 position.chars_read_current_line = 0;
1382 --position.chars_read_total;
1385 if (position.chars_read_current_line == 0)
1387 if (position.lines_read > 0)
1389 --position.lines_read;
1394 --position.chars_read_current_line;
1399 JSON_ASSERT(!token_string.empty());
1400 token_string.pop_back();
1405 void add(char_int_type c)
1407 token_buffer.push_back(
static_cast<typename string_t::value_type
>(c));
1418 return value_integer;
1424 return value_unsigned;
1437 if (decimal_point_char !=
'.' && decimal_point_position != std::string::npos)
1439 token_buffer[decimal_point_position] =
'.';
1441 return token_buffer;
1461 for (
const auto c : token_string)
1463 if (
static_cast<unsigned char>(c) <=
'\x1F')
1466 std::array<char, 9> cs{{}};
1467 static_cast<void>((std::snprintf)(cs.data(), cs.size(),
"<U+%.4X>",
static_cast<unsigned char>(c)));
1468 result += cs.data();
1473 result.push_back(
static_cast<std::string::value_type
>(c));
1481 JSON_HEDLEY_RETURNS_NON_NULL
1484 return error_message;
1500 return get() == 0xBB && get() == 0xBF;
1509 void skip_whitespace()
1515 while (current ==
' ' || current ==
'\t' || current ==
'\n' || current ==
'\r');
1521 if (position.chars_read_total == 0 && !
skip_bom())
1523 error_message =
"invalid BOM; must be 0xEF 0xBB 0xBF if given";
1524 return token_type::parse_error;
1531 while (ignore_comments && current ==
'/')
1533 if (!scan_comment())
1535 return token_type::parse_error;
1546 return token_type::begin_array;
1548 return token_type::end_array;
1550 return token_type::begin_object;
1552 return token_type::end_object;
1554 return token_type::name_separator;
1556 return token_type::value_separator;
1561 std::array<char_type, 4> true_literal = {{
static_cast<char_type
>(
't'),
static_cast<char_type
>(
'r'),
static_cast<char_type
>(
'u'),
static_cast<char_type
>(
'e')}};
1562 return scan_literal(true_literal.data(), true_literal.size(), token_type::literal_true);
1566 std::array<char_type, 5> false_literal = {{
static_cast<char_type
>(
'f'),
static_cast<char_type
>(
'a'),
static_cast<char_type
>(
'l'),
static_cast<char_type
>(
's'),
static_cast<char_type
>(
'e')}};
1567 return scan_literal(false_literal.data(), false_literal.size(), token_type::literal_false);
1571 std::array<char_type, 4> null_literal = {{
static_cast<char_type
>(
'n'),
static_cast<char_type
>(
'u'),
static_cast<char_type
>(
'l'),
static_cast<char_type
>(
'l')}};
1572 return scan_literal(null_literal.data(), null_literal.size(), token_type::literal_null);
1577 return scan_string();
1591 return scan_number();
1596 case char_traits<char_type>::eof():
1601 error_message =
"invalid literal";
1602 return token_type::parse_error;
1608 InputAdapterType ia;
1611 const bool ignore_comments =
false;
1614 char_int_type current = char_traits<char_type>::eof();
1617 bool next_unget =
false;
1620 position_t position {};
1623 std::vector<char_type> token_string {};
1626 string_t token_buffer {};
1629 const char* error_message =
"";
1632 number_integer_t value_integer = 0;
1633 number_unsigned_t value_unsigned = 0;
1634 number_float_t value_float = 0;
1637 const char_int_type decimal_point_char =
'.';
1639 std::size_t decimal_point_position = std::string::npos;