summaryrefslogtreecommitdiffhomepage
path: root/vendor/toml++/impl/parser.inl
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/toml++/impl/parser.inl')
-rw-r--r--vendor/toml++/impl/parser.inl6938
1 files changed, 3267 insertions, 3671 deletions
diff --git a/vendor/toml++/impl/parser.inl b/vendor/toml++/impl/parser.inl
index 6f0136f..1530e5a 100644
--- a/vendor/toml++/impl/parser.inl
+++ b/vendor/toml++/impl/parser.inl
@@ -1,29 +1,29 @@
-//# This file is a part of toml++ and is subject to the the terms of the MIT license.
-//# Copyright (c) Mark Gillard <mark.gillard@outlook.com.au>
-//# See https://github.com/marzer/tomlplusplus/blob/master/LICENSE for the full license text.
-// SPDX-License-Identifier: MIT
+// # This file is a part of toml++ and is subject to the the terms of the MIT license.
+// # Copyright (c) Mark Gillard <mark.gillard@outlook.com.au>
+// # See https://github.com/marzer/tomlplusplus/blob/master/LICENSE for the full license text.
+// SPDX-License-Identifier: MIT
#pragma once
#include "preprocessor.hpp"
-//# {{
+// # {{
#if !TOML_IMPLEMENTATION
#error This is an implementation-only header.
#endif
-//# }}
+// # }}
#if TOML_ENABLE_PARSER
+#include "array.hpp"
+#include "date_time.hpp"
+#include "parse_error.hpp"
#include "parser.hpp"
-#include "std_optional.hpp"
#include "source_region.hpp"
-#include "parse_error.hpp"
-#include "date_time.hpp"
-#include "value.hpp"
-#include "array.hpp"
+#include "std_optional.hpp"
#include "table.hpp"
#include "unicode.hpp"
+#include "value.hpp"
TOML_DISABLE_WARNINGS;
-#include <istream>
#include <fstream>
+#include <istream>
#if TOML_INT_CHARCONV || TOML_FLOAT_CHARCONV
#include <charconv>
#endif
@@ -36,190 +36,153 @@ TOML_DISABLE_WARNINGS;
TOML_ENABLE_WARNINGS;
#include "header_start.hpp"
-//#---------------------------------------------------------------------------------------------------------------------
-//# UTF8 STREAMS
-//#---------------------------------------------------------------------------------------------------------------------
-
-TOML_ANON_NAMESPACE_START
-{
- template <typename T>
- class utf8_byte_stream;
-
- TOML_INTERNAL_LINKAGE
- constexpr auto utf8_byte_order_mark = "\xEF\xBB\xBF"sv;
-
- template <typename Char>
- class utf8_byte_stream<std::basic_string_view<Char>>
- {
- static_assert(sizeof(Char) == 1);
-
- private:
- std::basic_string_view<Char> source_;
- size_t position_ = {};
-
- public:
- TOML_NODISCARD_CTOR
- explicit constexpr utf8_byte_stream(std::basic_string_view<Char> sv) noexcept //
- : source_{ sv }
- {
- // skip bom
- if (source_.length() >= 3u && memcmp(utf8_byte_order_mark.data(), source_.data(), 3u) == 0)
- position_ += 3u;
- }
-
- TOML_CONST_INLINE_GETTER
- constexpr bool error() const noexcept
- {
- return false;
- }
-
- TOML_PURE_INLINE_GETTER
- constexpr bool eof() const noexcept
- {
- return position_ >= source_.length();
- }
-
- TOML_PURE_INLINE_GETTER
- explicit constexpr operator bool() const noexcept
- {
- return !eof();
- }
-
- TOML_PURE_INLINE_GETTER
- constexpr bool peek_eof() const noexcept
- {
- return eof();
- }
-
- TOML_NODISCARD
- TOML_ATTR(nonnull)
- size_t operator()(void* dest, size_t num) noexcept
- {
- TOML_ASSERT_ASSUME(!eof());
-
- num = impl::min(position_ + num, source_.length()) - position_;
- std::memcpy(dest, source_.data() + position_, num);
- position_ += num;
- return num;
- }
- };
-
- template <>
- class utf8_byte_stream<std::istream>
- {
- private:
- std::istream* source_;
-
- public:
- TOML_NODISCARD_CTOR
- explicit utf8_byte_stream(std::istream& stream) noexcept(!TOML_COMPILER_HAS_EXCEPTIONS) //
- : source_{ &stream }
- {
- if (!*this) // eof, bad
- return;
-
- const auto initial_pos = source_->tellg();
- char bom[3];
- source_->read(bom, 3);
- if (source_->bad() || (source_->gcount() == 3 && memcmp(utf8_byte_order_mark.data(), bom, 3u) == 0))
- return;
-
- source_->clear();
- source_->seekg(initial_pos, std::istream::beg);
- }
-
- TOML_PURE_INLINE_GETTER
- bool error() const noexcept
- {
- return !!(source_->rdstate() & std::istream::badbit);
- }
-
- TOML_PURE_INLINE_GETTER
- bool eof() const noexcept
- {
- return !!(source_->rdstate() & std::istream::eofbit);
- }
-
- TOML_PURE_INLINE_GETTER
- explicit operator bool() const noexcept
- {
- return !(source_->rdstate() & (std::istream::badbit | std::istream::eofbit));
- }
-
- TOML_NODISCARD
- bool peek_eof() const noexcept(!TOML_COMPILER_HAS_EXCEPTIONS)
- {
- return eof() || source_->peek() == std::istream::traits_type::eof();
- }
-
- TOML_NODISCARD
- TOML_ATTR(nonnull)
- size_t operator()(void* dest, size_t num) noexcept(!TOML_COMPILER_HAS_EXCEPTIONS)
- {
- TOML_ASSERT(*this);
-
- source_->read(static_cast<char*>(dest), static_cast<std::streamsize>(num));
- return static_cast<size_t>(source_->gcount());
- }
- };
-
- struct utf8_codepoint
- {
- char32_t value;
- char bytes[4];
- size_t count;
- source_position position;
-
- TOML_PURE_INLINE_GETTER
- constexpr operator const char32_t&() const noexcept
- {
- return value;
- }
-
- TOML_PURE_INLINE_GETTER
- constexpr const char32_t& operator*() const noexcept
- {
- return value;
- }
- };
- static_assert(std::is_trivial_v<utf8_codepoint>);
- static_assert(std::is_standard_layout_v<utf8_codepoint>);
-
- struct TOML_ABSTRACT_INTERFACE utf8_reader_interface
- {
- TOML_NODISCARD
- virtual const source_path_ptr& source_path() const noexcept = 0;
-
- TOML_NODISCARD
- virtual const utf8_codepoint* read_next() noexcept(!TOML_COMPILER_HAS_EXCEPTIONS) = 0;
-
- TOML_NODISCARD
- virtual bool peek_eof() const noexcept(!TOML_COMPILER_HAS_EXCEPTIONS) = 0;
+// #---------------------------------------------------------------------------------------------------------------------
+// # UTF8 STREAMS
+// #---------------------------------------------------------------------------------------------------------------------
+
+TOML_ANON_NAMESPACE_START {
+ template <typename T>
+ class utf8_byte_stream;
+
+ TOML_INTERNAL_LINKAGE
+ constexpr auto utf8_byte_order_mark = "\xEF\xBB\xBF"sv;
+
+ template <typename Char>
+ class utf8_byte_stream<std::basic_string_view<Char>> {
+ static_assert(sizeof(Char) == 1);
+
+ private:
+ std::basic_string_view<Char> source_;
+ size_t position_ = {};
+
+ public:
+ TOML_NODISCARD_CTOR
+ explicit constexpr utf8_byte_stream(std::basic_string_view<Char> sv) noexcept //
+ : source_{sv} {
+ // skip bom
+ if (source_.length() >= 3u && memcmp(utf8_byte_order_mark.data(), source_.data(), 3u) == 0)
+ position_ += 3u;
+ }
+
+ TOML_CONST_INLINE_GETTER
+ constexpr bool error() const noexcept { return false; }
+
+ TOML_PURE_INLINE_GETTER
+ constexpr bool eof() const noexcept { return position_ >= source_.length(); }
+
+ TOML_PURE_INLINE_GETTER
+ explicit constexpr operator bool() const noexcept { return !eof(); }
+
+ TOML_PURE_INLINE_GETTER
+ constexpr bool peek_eof() const noexcept { return eof(); }
+
+ TOML_NODISCARD
+ TOML_ATTR(nonnull)
+ size_t operator()(void* dest, size_t num) noexcept {
+ TOML_ASSERT_ASSUME(!eof());
+
+ num = impl::min(position_ + num, source_.length()) - position_;
+ std::memcpy(dest, source_.data() + position_, num);
+ position_ += num;
+ return num;
+ }
+ };
+
+ template <>
+ class utf8_byte_stream<std::istream> {
+ private:
+ std::istream* source_;
+
+ public:
+ TOML_NODISCARD_CTOR
+ explicit utf8_byte_stream(std::istream& stream) noexcept(!TOML_COMPILER_HAS_EXCEPTIONS) //
+ : source_{&stream} {
+ if (!*this) // eof, bad
+ return;
+
+ const auto initial_pos = source_->tellg();
+ char bom[3];
+ source_->read(bom, 3);
+ if (source_->bad() ||
+ (source_->gcount() == 3 && memcmp(utf8_byte_order_mark.data(), bom, 3u) == 0))
+ return;
+
+ source_->clear();
+ source_->seekg(initial_pos, std::istream::beg);
+ }
+
+ TOML_PURE_INLINE_GETTER
+ bool error() const noexcept { return !!(source_->rdstate() & std::istream::badbit); }
+
+ TOML_PURE_INLINE_GETTER
+ bool eof() const noexcept { return !!(source_->rdstate() & std::istream::eofbit); }
+
+ TOML_PURE_INLINE_GETTER
+ explicit operator bool() const noexcept {
+ return !(source_->rdstate() & (std::istream::badbit | std::istream::eofbit));
+ }
+
+ TOML_NODISCARD
+ bool peek_eof() const noexcept(!TOML_COMPILER_HAS_EXCEPTIONS) {
+ return eof() || source_->peek() == std::istream::traits_type::eof();
+ }
+
+ TOML_NODISCARD
+ TOML_ATTR(nonnull)
+ size_t operator()(void* dest, size_t num) noexcept(!TOML_COMPILER_HAS_EXCEPTIONS) {
+ TOML_ASSERT(*this);
+
+ source_->read(static_cast<char*>(dest), static_cast<std::streamsize>(num));
+ return static_cast<size_t>(source_->gcount());
+ }
+ };
+
+ struct utf8_codepoint {
+ char32_t value;
+ char bytes[4];
+ size_t count;
+ source_position position;
+
+ TOML_PURE_INLINE_GETTER
+ constexpr operator const char32_t&() const noexcept { return value; }
+
+ TOML_PURE_INLINE_GETTER
+ constexpr const char32_t& operator*() const noexcept { return value; }
+ };
+ static_assert(std::is_trivial_v<utf8_codepoint>);
+ static_assert(std::is_standard_layout_v<utf8_codepoint>);
+
+ struct TOML_ABSTRACT_INTERFACE utf8_reader_interface {
+ TOML_NODISCARD
+ virtual const source_path_ptr& source_path() const noexcept = 0;
+
+ TOML_NODISCARD
+ virtual const utf8_codepoint* read_next() noexcept(!TOML_COMPILER_HAS_EXCEPTIONS) = 0;
+
+ TOML_NODISCARD
+ virtual bool peek_eof() const noexcept(!TOML_COMPILER_HAS_EXCEPTIONS) = 0;
#if !TOML_EXCEPTIONS
- TOML_NODISCARD
- virtual optional<parse_error>&& error() noexcept = 0;
+ TOML_NODISCARD
+ virtual optional<parse_error>&& error() noexcept = 0;
#endif
- virtual ~utf8_reader_interface() noexcept = default;
- };
+ virtual ~utf8_reader_interface() noexcept = default;
+ };
#if TOML_EXCEPTIONS
-#define utf8_reader_error(...) throw parse_error(__VA_ARGS__)
+#define utf8_reader_error(...) throw parse_error(__VA_ARGS__)
#define utf8_reader_return_after_error(...) static_assert(true)
-#define utf8_reader_error_check(...) static_assert(true)
+#define utf8_reader_error_check(...) static_assert(true)
#else
-#define utf8_reader_error(...) err_.emplace(__VA_ARGS__)
+#define utf8_reader_error(...) err_.emplace(__VA_ARGS__)
#define utf8_reader_return_after_error(...) return __VA_ARGS__
-#define utf8_reader_error_check(...) \
- do \
- { \
- if TOML_UNLIKELY(err_) \
- return __VA_ARGS__; \
- } \
- while (false)
+#define utf8_reader_error_check(...) \
+ do { \
+ if TOML_UNLIKELY (err_) return __VA_ARGS__; \
+ } while (false)
#endif
@@ -229,373 +192,312 @@ TOML_ANON_NAMESPACE_START
#define TOML_OVERALIGNED alignas(32)
#endif
- template <typename T>
- class TOML_EMPTY_BASES utf8_reader final : public utf8_reader_interface
- {
- private:
- static constexpr size_t block_capacity = 32;
- utf8_byte_stream<T> stream_;
- source_position next_pos_ = { 1, 1 };
-
- impl::utf8_decoder decoder_;
- struct currently_decoding_t
- {
- char bytes[4];
- size_t count;
- } currently_decoding_;
-
- struct codepoints_t
- {
- TOML_OVERALIGNED utf8_codepoint buffer[block_capacity];
- size_t current;
- size_t count;
- } codepoints_;
-
- source_path_ptr source_path_;
+ template <typename T>
+ class TOML_EMPTY_BASES utf8_reader final : public utf8_reader_interface {
+ private:
+ static constexpr size_t block_capacity = 32;
+ utf8_byte_stream<T> stream_;
+ source_position next_pos_ = {1, 1};
+
+ impl::utf8_decoder decoder_;
+ struct currently_decoding_t {
+ char bytes[4];
+ size_t count;
+ } currently_decoding_;
+
+ struct codepoints_t {
+ TOML_OVERALIGNED utf8_codepoint buffer[block_capacity];
+ size_t current;
+ size_t count;
+ } codepoints_;
+
+ source_path_ptr source_path_;
#if !TOML_EXCEPTIONS
- optional<parse_error> err_;
+ optional<parse_error> err_;
#endif
- bool read_next_block() noexcept(!TOML_COMPILER_HAS_EXCEPTIONS)
- {
- TOML_ASSERT(stream_);
+ bool read_next_block() noexcept(!TOML_COMPILER_HAS_EXCEPTIONS) {
+ TOML_ASSERT(stream_);
- TOML_OVERALIGNED char raw_bytes[block_capacity];
- size_t raw_bytes_read;
+ TOML_OVERALIGNED char raw_bytes[block_capacity];
+ size_t raw_bytes_read;
- // read the next raw (encoded) block in from the stream
- if constexpr (noexcept(stream_(raw_bytes, block_capacity)) || !TOML_EXCEPTIONS)
- {
- raw_bytes_read = stream_(raw_bytes, block_capacity);
- }
+ // read the next raw (encoded) block in from the stream
+ if constexpr (noexcept(stream_(raw_bytes, block_capacity)) || !TOML_EXCEPTIONS) {
+ raw_bytes_read = stream_(raw_bytes, block_capacity);
+ }
#if TOML_EXCEPTIONS
- else
- {
- try
- {
- raw_bytes_read = stream_(raw_bytes, block_capacity);
- }
- catch (const std::exception& exc)
- {
- throw parse_error{ exc.what(), next_pos_, source_path_ };
- }
- catch (...)
- {
- throw parse_error{ "An unspecified error occurred", next_pos_, source_path_ };
- }
- }
-#endif // TOML_EXCEPTIONS
-
- // handle a zero-byte read
- if TOML_UNLIKELY(!raw_bytes_read)
- {
- if (stream_.eof())
- {
- // EOF only sets the error state if the decoder wants more input, otherwise
- // a zero-byte read might have just caused the underlying stream to realize it's exhaused and set
- // the EOF flag, and that's totally fine
- if (decoder_.needs_more_input())
- utf8_reader_error("Encountered EOF during incomplete utf-8 code point sequence",
- next_pos_,
- source_path_);
- }
- else
- {
- utf8_reader_error("Reading from the underlying stream failed - zero bytes read",
- next_pos_,
- source_path_);
- }
- return false;
- }
-
- TOML_ASSERT_ASSUME(raw_bytes_read);
- std::memset(&codepoints_, 0, sizeof(codepoints_));
-
- // helper for calculating decoded codepoint line+cols
- const auto calc_positions = [&]() noexcept
- {
- for (size_t i = 0; i < codepoints_.count; i++)
- {
- auto& cp = codepoints_.buffer[i];
- cp.position = next_pos_;
-
- if (cp == U'\n')
- {
- next_pos_.line++;
- next_pos_.column = source_index{ 1 };
- }
- else
- next_pos_.column++;
- }
- };
-
- // decide whether we need to use the UTF-8 decoder or if we can treat this block as plain ASCII
- const auto ascii_fast_path = !decoder_.needs_more_input() && impl::is_ascii(raw_bytes, raw_bytes_read);
-
- // ASCII fast-path
- if (ascii_fast_path)
- {
- decoder_.reset();
- currently_decoding_.count = {};
-
- codepoints_.count = raw_bytes_read;
- for (size_t i = 0; i < codepoints_.count; i++)
- {
- auto& cp = codepoints_.buffer[i];
- cp.value = static_cast<char32_t>(raw_bytes[i]);
- cp.bytes[0] = raw_bytes[i];
- cp.count = 1u;
- }
- }
-
- // UTF-8 slow-path
- else
- {
- // helper for getting precise error location
- const auto error_pos = [&]() noexcept -> const source_position&
- { //
- return codepoints_.count ? codepoints_.buffer[codepoints_.count - 1u].position : next_pos_;
- };
-
- for (size_t i = 0; i < raw_bytes_read; i++)
- {
- decoder_(static_cast<uint8_t>(raw_bytes[i]));
- if TOML_UNLIKELY(decoder_.error())
- {
- calc_positions();
- utf8_reader_error("Encountered invalid utf-8 sequence", error_pos(), source_path_);
- utf8_reader_return_after_error(false);
- }
-
- currently_decoding_.bytes[currently_decoding_.count++] = raw_bytes[i];
-
- if (decoder_.has_code_point())
- {
- auto& cp = codepoints_.buffer[codepoints_.count++];
-
- cp.value = decoder_.codepoint;
- cp.count = currently_decoding_.count;
- std::memcpy(cp.bytes, currently_decoding_.bytes, currently_decoding_.count);
- currently_decoding_.count = {};
- }
- else if TOML_UNLIKELY(currently_decoding_.count == 4u)
- {
- calc_positions();
- utf8_reader_error("Encountered overlong utf-8 sequence", error_pos(), source_path_);
- utf8_reader_return_after_error(false);
- }
- }
- if TOML_UNLIKELY(decoder_.needs_more_input() && stream_.eof())
- {
- calc_positions();
- utf8_reader_error("Encountered EOF during incomplete utf-8 code point sequence",
- error_pos(),
- source_path_);
- utf8_reader_return_after_error(false);
- }
- }
-
- TOML_ASSERT_ASSUME(codepoints_.count);
- calc_positions();
-
- // handle general I/O errors
- // (down here so the next_pos_ benefits from calc_positions())
- if TOML_UNLIKELY(stream_.error())
- {
- utf8_reader_error("An I/O error occurred while reading from the underlying stream",
- next_pos_,
- source_path_);
- utf8_reader_return_after_error(false);
- }
-
- return true;
- }
-
- public:
- template <typename U, typename String = std::string_view>
- TOML_NODISCARD_CTOR
- explicit utf8_reader(U&& source, String&& source_path = {}) noexcept(
- std::is_nothrow_constructible_v<utf8_byte_stream<T>, U&&>)
- : stream_{ static_cast<U&&>(source) }
- {
- currently_decoding_.count = {};
-
- codepoints_.current = {};
- codepoints_.count = {};
-
- if (!source_path.empty())
- source_path_ = std::make_shared<const std::string>(static_cast<String&&>(source_path));
- }
-
- TOML_PURE_INLINE_GETTER
- const source_path_ptr& source_path() const noexcept final
- {
- return source_path_;
- }
-
- TOML_NODISCARD
- const utf8_codepoint* read_next() noexcept(!TOML_COMPILER_HAS_EXCEPTIONS) final
- {
- utf8_reader_error_check({});
-
- if (codepoints_.current == codepoints_.count)
- {
- if TOML_UNLIKELY(!stream_ || !read_next_block())
- return nullptr;
-
- TOML_ASSERT_ASSUME(!codepoints_.current);
- }
- TOML_ASSERT_ASSUME(codepoints_.count);
- TOML_ASSERT_ASSUME(codepoints_.count <= block_capacity);
- TOML_ASSERT_ASSUME(codepoints_.current < codepoints_.count);
-
- return &codepoints_.buffer[codepoints_.current++];
- }
-
- TOML_NODISCARD
- bool peek_eof() const noexcept(!TOML_COMPILER_HAS_EXCEPTIONS) final
- {
- return stream_.peek_eof();
- }
+ else {
+ try {
+ raw_bytes_read = stream_(raw_bytes, block_capacity);
+ } catch (const std::exception& exc) {
+ throw parse_error{exc.what(), next_pos_, source_path_};
+ } catch (...) {
+ throw parse_error{"An unspecified error occurred", next_pos_, source_path_};
+ }
+ }
+#endif // TOML_EXCEPTIONS
+
+ // handle a zero-byte read
+ if TOML_UNLIKELY (!raw_bytes_read) {
+ if (stream_.eof()) {
+ // EOF only sets the error state if the decoder wants more input, otherwise
+ // a zero-byte read might have just caused the underlying stream to realize it's exhaused
+ // and set the EOF flag, and that's totally fine
+ if (decoder_.needs_more_input())
+ utf8_reader_error("Encountered EOF during incomplete utf-8 code point sequence",
+ next_pos_, source_path_);
+ } else {
+ utf8_reader_error("Reading from the underlying stream failed - zero bytes read",
+ next_pos_, source_path_);
+ }
+ return false;
+ }
+
+ TOML_ASSERT_ASSUME(raw_bytes_read);
+ std::memset(&codepoints_, 0, sizeof(codepoints_));
+
+ // helper for calculating decoded codepoint line+cols
+ const auto calc_positions = [&]() noexcept {
+ for (size_t i = 0; i < codepoints_.count; i++) {
+ auto& cp = codepoints_.buffer[i];
+ cp.position = next_pos_;
+
+ if (cp == U'\n') {
+ next_pos_.line++;
+ next_pos_.column = source_index{1};
+ } else
+ next_pos_.column++;
+ }
+ };
+
+ // decide whether we need to use the UTF-8 decoder or if we can treat this block as plain
+ // ASCII
+ const auto ascii_fast_path =
+ !decoder_.needs_more_input() && impl::is_ascii(raw_bytes, raw_bytes_read);
+
+ // ASCII fast-path
+ if (ascii_fast_path) {
+ decoder_.reset();
+ currently_decoding_.count = {};
+
+ codepoints_.count = raw_bytes_read;
+ for (size_t i = 0; i < codepoints_.count; i++) {
+ auto& cp = codepoints_.buffer[i];
+ cp.value = static_cast<char32_t>(raw_bytes[i]);
+ cp.bytes[0] = raw_bytes[i];
+ cp.count = 1u;
+ }
+ }
+
+ // UTF-8 slow-path
+ else {
+ // helper for getting precise error location
+ const auto error_pos = [&]() noexcept -> const source_position& { //
+ return codepoints_.count ? codepoints_.buffer[codepoints_.count - 1u].position
+ : next_pos_;
+ };
+
+ for (size_t i = 0; i < raw_bytes_read; i++) {
+ decoder_(static_cast<uint8_t>(raw_bytes[i]));
+ if TOML_UNLIKELY (decoder_.error()) {
+ calc_positions();
+ utf8_reader_error("Encountered invalid utf-8 sequence", error_pos(), source_path_);
+ utf8_reader_return_after_error(false);
+ }
+
+ currently_decoding_.bytes[currently_decoding_.count++] = raw_bytes[i];
+
+ if (decoder_.has_code_point()) {
+ auto& cp = codepoints_.buffer[codepoints_.count++];
+
+ cp.value = decoder_.codepoint;
+ cp.count = currently_decoding_.count;
+ std::memcpy(cp.bytes, currently_decoding_.bytes, currently_decoding_.count);
+ currently_decoding_.count = {};
+ } else if TOML_UNLIKELY (currently_decoding_.count == 4u) {
+ calc_positions();
+ utf8_reader_error("Encountered overlong utf-8 sequence", error_pos(), source_path_);
+ utf8_reader_return_after_error(false);
+ }
+ }
+ if TOML_UNLIKELY (decoder_.needs_more_input() && stream_.eof()) {
+ calc_positions();
+ utf8_reader_error("Encountered EOF during incomplete utf-8 code point sequence",
+ error_pos(), source_path_);
+ utf8_reader_return_after_error(false);
+ }
+ }
+
+ TOML_ASSERT_ASSUME(codepoints_.count);
+ calc_positions();
+
+ // handle general I/O errors
+ // (down here so the next_pos_ benefits from calc_positions())
+ if TOML_UNLIKELY (stream_.error()) {
+ utf8_reader_error("An I/O error occurred while reading from the underlying stream",
+ next_pos_, source_path_);
+ utf8_reader_return_after_error(false);
+ }
+
+ return true;
+ }
+
+ public:
+ template <typename U, typename String = std::string_view>
+ TOML_NODISCARD_CTOR explicit utf8_reader(U&& source, String&& source_path = {}) noexcept(
+ std::is_nothrow_constructible_v<utf8_byte_stream<T>, U&&>)
+ : stream_{static_cast<U&&>(source)} {
+ currently_decoding_.count = {};
+
+ codepoints_.current = {};
+ codepoints_.count = {};
+
+ if (!source_path.empty())
+ source_path_ = std::make_shared<const std::string>(static_cast<String&&>(source_path));
+ }
+
+ TOML_PURE_INLINE_GETTER
+ const source_path_ptr& source_path() const noexcept final { return source_path_; }
+
+ TOML_NODISCARD
+ const utf8_codepoint* read_next() noexcept(!TOML_COMPILER_HAS_EXCEPTIONS) final {
+ utf8_reader_error_check({});
+
+ if (codepoints_.current == codepoints_.count) {
+ if TOML_UNLIKELY (!stream_ || !read_next_block()) return nullptr;
+
+ TOML_ASSERT_ASSUME(!codepoints_.current);
+ }
+ TOML_ASSERT_ASSUME(codepoints_.count);
+ TOML_ASSERT_ASSUME(codepoints_.count <= block_capacity);
+ TOML_ASSERT_ASSUME(codepoints_.current < codepoints_.count);
+
+ return &codepoints_.buffer[codepoints_.current++];
+ }
+
+ TOML_NODISCARD
+ bool peek_eof() const noexcept(!TOML_COMPILER_HAS_EXCEPTIONS) final {
+ return stream_.peek_eof();
+ }
#if !TOML_EXCEPTIONS
- TOML_NODISCARD
- optional<parse_error>&& error() noexcept final
- {
- return std::move(err_);
- }
+ TOML_NODISCARD
+ optional<parse_error>&& error() noexcept final { return std::move(err_); }
#endif
- };
-
- template <typename Char>
- utf8_reader(std::basic_string_view<Char>, std::string_view) -> utf8_reader<std::basic_string_view<Char>>;
- template <typename Char>
- utf8_reader(std::basic_string_view<Char>, std::string&&) -> utf8_reader<std::basic_string_view<Char>>;
- template <typename Char>
- utf8_reader(std::basic_istream<Char>&, std::string_view) -> utf8_reader<std::basic_istream<Char>>;
- template <typename Char>
- utf8_reader(std::basic_istream<Char>&, std::string&&) -> utf8_reader<std::basic_istream<Char>>;
+ };
+
+ template <typename Char>
+ utf8_reader(std::basic_string_view<Char>,
+ std::string_view) -> utf8_reader<std::basic_string_view<Char>>;
+ template <typename Char>
+ utf8_reader(std::basic_string_view<Char>,
+ std::string&&) -> utf8_reader<std::basic_string_view<Char>>;
+ template <typename Char>
+ utf8_reader(std::basic_istream<Char>&, std::string_view) -> utf8_reader<std::basic_istream<Char>>;
+ template <typename Char>
+ utf8_reader(std::basic_istream<Char>&, std::string&&) -> utf8_reader<std::basic_istream<Char>>;
#if TOML_EXCEPTIONS
#define utf8_buffered_reader_error_check(...) static_assert(true)
#else
-#define utf8_buffered_reader_error_check(...) \
- do \
- { \
- if TOML_UNLIKELY(reader_.error()) \
- return __VA_ARGS__; \
- } \
- while (false)
+#define utf8_buffered_reader_error_check(...) \
+ do { \
+ if TOML_UNLIKELY (reader_.error()) return __VA_ARGS__; \
+ } while (false)
#endif
- class TOML_EMPTY_BASES utf8_buffered_reader
- {
- public:
- static constexpr size_t max_history_length = 128;
-
- private:
- static constexpr size_t history_buffer_size = max_history_length - 1; //'head' is stored in the reader
- utf8_reader_interface& reader_;
- struct
- {
- utf8_codepoint buffer[history_buffer_size];
- size_t count, first;
- } history_ = {};
- const utf8_codepoint* head_ = {};
- size_t negative_offset_ = {};
-
- public:
- TOML_NODISCARD_CTOR
- explicit utf8_buffered_reader(utf8_reader_interface& reader) noexcept //
- : reader_{ reader }
- {}
-
- TOML_PURE_INLINE_GETTER
- const source_path_ptr& source_path() const noexcept
- {
- return reader_.source_path();
- }
-
- TOML_NODISCARD
- const utf8_codepoint* read_next() noexcept(!TOML_COMPILER_HAS_EXCEPTIONS)
- {
- utf8_buffered_reader_error_check({});
-
- if (negative_offset_)
- {
- negative_offset_--;
-
- // an entry negative offset of 1 just means "replay the current head"
- if (!negative_offset_)
- return head_;
-
- // otherwise step back into the history buffer
- else
- return history_.buffer
- + ((history_.first + history_.count - negative_offset_) % history_buffer_size);
- }
- else
- {
- // first character read from stream
- if TOML_UNLIKELY(!history_.count && !head_)
- head_ = reader_.read_next();
-
- // subsequent characters and not eof
- else if (head_)
- {
- if TOML_UNLIKELY(history_.count < history_buffer_size)
- history_.buffer[history_.count++] = *head_;
- else
- history_.buffer[(history_.first++ + history_buffer_size) % history_buffer_size] = *head_;
-
- head_ = reader_.read_next();
- }
-
- return head_;
- }
- }
-
- TOML_NODISCARD
- const utf8_codepoint* step_back(size_t count) noexcept
- {
- utf8_buffered_reader_error_check({});
-
- TOML_ASSERT_ASSUME(history_.count);
- TOML_ASSERT_ASSUME(negative_offset_ + count <= history_.count);
-
- negative_offset_ += count;
-
- return negative_offset_
- ? history_.buffer + ((history_.first + history_.count - negative_offset_) % history_buffer_size)
- : head_;
- }
-
- TOML_NODISCARD
- bool peek_eof() const noexcept(!TOML_COMPILER_HAS_EXCEPTIONS)
- {
- return reader_.peek_eof();
- }
+ class TOML_EMPTY_BASES utf8_buffered_reader {
+ public:
+ static constexpr size_t max_history_length = 128;
+
+ private:
+ static constexpr size_t history_buffer_size =
+ max_history_length - 1; //'head' is stored in the reader
+ utf8_reader_interface& reader_;
+ struct {
+ utf8_codepoint buffer[history_buffer_size];
+ size_t count, first;
+ } history_ = {};
+ const utf8_codepoint* head_ = {};
+ size_t negative_offset_ = {};
+
+ public:
+ TOML_NODISCARD_CTOR
+ explicit utf8_buffered_reader(utf8_reader_interface& reader) noexcept //
+ : reader_{reader} {}
+
+ TOML_PURE_INLINE_GETTER
+ const source_path_ptr& source_path() const noexcept { return reader_.source_path(); }
+
+ TOML_NODISCARD
+ const utf8_codepoint* read_next() noexcept(!TOML_COMPILER_HAS_EXCEPTIONS) {
+ utf8_buffered_reader_error_check({});
+
+ if (negative_offset_) {
+ negative_offset_--;
+
+ // an entry negative offset of 1 just means "replay the current head"
+ if (!negative_offset_) return head_;
+
+ // otherwise step back into the history buffer
+ else
+ return history_.buffer +
+ ((history_.first + history_.count - negative_offset_) % history_buffer_size);
+ } else {
+ // first character read from stream
+ if TOML_UNLIKELY (!history_.count && !head_) head_ = reader_.read_next();
+
+ // subsequent characters and not eof
+ else if (head_) {
+ if TOML_UNLIKELY (history_.count < history_buffer_size)
+ history_.buffer[history_.count++] = *head_;
+ else
+ history_.buffer[(history_.first++ + history_buffer_size) % history_buffer_size] =
+ *head_;
+
+ head_ = reader_.read_next();
+ }
+
+ return head_;
+ }
+ }
+
+ TOML_NODISCARD
+ const utf8_codepoint* step_back(size_t count) noexcept {
+ utf8_buffered_reader_error_check({});
+
+ TOML_ASSERT_ASSUME(history_.count);
+ TOML_ASSERT_ASSUME(negative_offset_ + count <= history_.count);
+
+ negative_offset_ += count;
+
+ return negative_offset_
+ ? history_.buffer +
+ ((history_.first + history_.count - negative_offset_) % history_buffer_size)
+ : head_;
+ }
+
+ TOML_NODISCARD
+ bool peek_eof() const noexcept(!TOML_COMPILER_HAS_EXCEPTIONS) { return reader_.peek_eof(); }
#if !TOML_EXCEPTIONS
- TOML_NODISCARD
- optional<parse_error>&& error() noexcept
- {
- return reader_.error();
- }
+ TOML_NODISCARD
+ optional<parse_error>&& error() noexcept { return reader_.error(); }
#endif
- };
+ };
}
TOML_ANON_NAMESPACE_END;
-//#---------------------------------------------------------------------------------------------------------------------
-//# PARSER INTERNAL IMPLEMENTATION
-//#---------------------------------------------------------------------------------------------------------------------
+// #---------------------------------------------------------------------------------------------------------------------
+// # PARSER INTERNAL IMPLEMENTATION
+// #---------------------------------------------------------------------------------------------------------------------
#if TOML_EXCEPTIONS
#define TOML_RETURNS_BY_THROWING [[noreturn]]
@@ -603,398 +505,331 @@ TOML_ANON_NAMESPACE_END;
#define TOML_RETURNS_BY_THROWING
#endif
-TOML_ANON_NAMESPACE_START
-{
- template <typename... T>
- TOML_CONST_GETTER
- TOML_INTERNAL_LINKAGE
- constexpr bool is_match(char32_t codepoint, T... vals) noexcept
- {
- static_assert((std::is_same_v<char32_t, T> && ...));
- return ((codepoint == vals) || ...);
- }
-
- template <uint64_t>
- struct parse_integer_traits;
- template <>
- struct parse_integer_traits<2>
- {
- static constexpr auto scope_qualifier = "binary integer"sv;
- static constexpr auto is_digit = impl::is_binary_digit;
- static constexpr auto is_signed = false;
- static constexpr auto max_digits = 63;
- static constexpr auto prefix_codepoint = U'b';
- static constexpr auto prefix = "b"sv;
- static constexpr auto full_prefix = "0b"sv;
- };
- template <>
- struct parse_integer_traits<8>
- {
- static constexpr auto scope_qualifier = "octal integer"sv;
- static constexpr auto is_digit = impl::is_octal_digit;
- static constexpr auto is_signed = false;
- static constexpr auto max_digits = 21; // strlen("777777777777777777777")
- static constexpr auto prefix_codepoint = U'o';
- static constexpr auto prefix = "o"sv;
- static constexpr auto full_prefix = "0o"sv;
- };
- template <>
- struct parse_integer_traits<10>
- {
- static constexpr auto scope_qualifier = "decimal integer"sv;
- static constexpr auto is_digit = impl::is_decimal_digit;
- static constexpr auto is_signed = true;
- static constexpr auto max_digits = 19; // strlen("9223372036854775807")
- static constexpr auto full_prefix = ""sv;
- };
- template <>
- struct parse_integer_traits<16>
- {
- static constexpr auto scope_qualifier = "hexadecimal integer"sv;
- static constexpr auto is_digit = impl::is_hexadecimal_digit;
- static constexpr auto is_signed = false;
- static constexpr auto max_digits = 16; // strlen("7FFFFFFFFFFFFFFF")
- static constexpr auto prefix_codepoint = U'x';
- static constexpr auto prefix = "x"sv;
- static constexpr auto full_prefix = "0x"sv;
- };
-
- TOML_PURE_GETTER
- TOML_INTERNAL_LINKAGE
- std::string_view to_sv(node_type val) noexcept
- {
- return impl::node_type_friendly_names[impl::unwrap_enum(val)];
- }
-
- TOML_PURE_GETTER
- TOML_INTERNAL_LINKAGE
- std::string_view to_sv(const std::string& str) noexcept
- {
- return std::string_view{ str };
- }
-
- TOML_CONST_GETTER
- TOML_INTERNAL_LINKAGE
- std::string_view to_sv(bool val) noexcept
- {
- using namespace std::string_view_literals;
-
- return val ? "true"sv : "false"sv;
- }
-
- TOML_PURE_GETTER
- TOML_INTERNAL_LINKAGE
- std::string_view to_sv(const utf8_codepoint& cp) noexcept
- {
- if (cp.value <= U'\x1F')
- return impl::control_char_escapes[cp.value];
- else if (cp.value == U'\x7F')
- return "\\u007F"sv;
- else
- return std::string_view{ cp.bytes, cp.count };
- }
-
- TOML_PURE_GETTER
- TOML_INTERNAL_LINKAGE
- std::string_view to_sv(const utf8_codepoint* cp) noexcept
- {
- if (cp)
- return to_sv(*cp);
- return ""sv;
- }
-
- struct escaped_codepoint
- {
- const utf8_codepoint& cp;
- };
-
- template <typename T>
- TOML_ATTR(nonnull)
- TOML_INTERNAL_LINKAGE
- void concatenate(char*& write_pos, char* const buf_end, const T& arg) noexcept
- {
- if TOML_UNLIKELY(write_pos >= buf_end)
- return;
-
- using arg_type = impl::remove_cvref<T>;
-
- // string views
- if constexpr (std::is_same_v<arg_type, std::string_view>)
- {
- const auto max_chars = static_cast<size_t>(buf_end - write_pos);
- const auto len = max_chars < arg.length() ? max_chars : arg.length();
- std::memcpy(write_pos, arg.data(), len);
- write_pos += len;
- }
-
- // doubles
- else if constexpr (std::is_same_v<arg_type, double>)
- {
+TOML_ANON_NAMESPACE_START {
+ template <typename... T>
+ TOML_CONST_GETTER TOML_INTERNAL_LINKAGE constexpr bool is_match(char32_t codepoint,
+ T... vals) noexcept {
+ static_assert((std::is_same_v<char32_t, T> && ...));
+ return ((codepoint == vals) || ...);
+ }
+
+ template <uint64_t>
+ struct parse_integer_traits;
+ template <>
+ struct parse_integer_traits<2> {
+ static constexpr auto scope_qualifier = "binary integer"sv;
+ static constexpr auto is_digit = impl::is_binary_digit;
+ static constexpr auto is_signed = false;
+ static constexpr auto max_digits = 63;
+ static constexpr auto prefix_codepoint = U'b';
+ static constexpr auto prefix = "b"sv;
+ static constexpr auto full_prefix = "0b"sv;
+ };
+ template <>
+ struct parse_integer_traits<8> {
+ static constexpr auto scope_qualifier = "octal integer"sv;
+ static constexpr auto is_digit = impl::is_octal_digit;
+ static constexpr auto is_signed = false;
+ static constexpr auto max_digits = 21; // strlen("777777777777777777777")
+ static constexpr auto prefix_codepoint = U'o';
+ static constexpr auto prefix = "o"sv;
+ static constexpr auto full_prefix = "0o"sv;
+ };
+ template <>
+ struct parse_integer_traits<10> {
+ static constexpr auto scope_qualifier = "decimal integer"sv;
+ static constexpr auto is_digit = impl::is_decimal_digit;
+ static constexpr auto is_signed = true;
+ static constexpr auto max_digits = 19; // strlen("9223372036854775807")
+ static constexpr auto full_prefix = ""sv;
+ };
+ template <>
+ struct parse_integer_traits<16> {
+ static constexpr auto scope_qualifier = "hexadecimal integer"sv;
+ static constexpr auto is_digit = impl::is_hexadecimal_digit;
+ static constexpr auto is_signed = false;
+ static constexpr auto max_digits = 16; // strlen("7FFFFFFFFFFFFFFF")
+ static constexpr auto prefix_codepoint = U'x';
+ static constexpr auto prefix = "x"sv;
+ static constexpr auto full_prefix = "0x"sv;
+ };
+
+ TOML_PURE_GETTER
+ TOML_INTERNAL_LINKAGE
+ std::string_view to_sv(node_type val) noexcept {
+ return impl::node_type_friendly_names[impl::unwrap_enum(val)];
+ }
+
+ TOML_PURE_GETTER
+ TOML_INTERNAL_LINKAGE
+ std::string_view to_sv(const std::string& str) noexcept {
+ return std::string_view{str};
+ }
+
+ TOML_CONST_GETTER
+ TOML_INTERNAL_LINKAGE
+ std::string_view to_sv(bool val) noexcept {
+ using namespace std::string_view_literals;
+
+ return val ? "true"sv : "false"sv;
+ }
+
+ TOML_PURE_GETTER
+ TOML_INTERNAL_LINKAGE
+ std::string_view to_sv(const utf8_codepoint& cp) noexcept {
+ if (cp.value <= U'\x1F')
+ return impl::control_char_escapes[cp.value];
+ else if (cp.value == U'\x7F')
+ return "\\u007F"sv;
+ else
+ return std::string_view{cp.bytes, cp.count};
+ }
+
+ TOML_PURE_GETTER
+ TOML_INTERNAL_LINKAGE
+ std::string_view to_sv(const utf8_codepoint* cp) noexcept {
+ if (cp) return to_sv(*cp);
+ return ""sv;
+ }
+
+ struct escaped_codepoint {
+ const utf8_codepoint& cp;
+ };
+
+ template <typename T>
+ TOML_ATTR(nonnull)
+ TOML_INTERNAL_LINKAGE void concatenate(char*& write_pos, char* const buf_end,
+ const T& arg) noexcept {
+ if TOML_UNLIKELY (write_pos >= buf_end) return;
+
+ using arg_type = impl::remove_cvref<T>;
+
+ // string views
+ if constexpr (std::is_same_v<arg_type, std::string_view>) {
+ const auto max_chars = static_cast<size_t>(buf_end - write_pos);
+ const auto len = max_chars < arg.length() ? max_chars : arg.length();
+ std::memcpy(write_pos, arg.data(), len);
+ write_pos += len;
+ }
+
+ // doubles
+ else if constexpr (std::is_same_v<arg_type, double>) {
#if TOML_FLOAT_CHARCONV
- const auto result = std::to_chars(write_pos, buf_end, arg);
- write_pos = result.ptr;
+ const auto result = std::to_chars(write_pos, buf_end, arg);
+ write_pos = result.ptr;
#else
- std::ostringstream ss;
- ss.imbue(std::locale::classic());
- ss.precision(std::numeric_limits<arg_type>::max_digits10);
- ss << arg;
- concatenate(write_pos, buf_end, to_sv(std::move(ss).str()));
+ std::ostringstream ss;
+ ss.imbue(std::locale::classic());
+ ss.precision(std::numeric_limits<arg_type>::max_digits10);
+ ss << arg;
+ concatenate(write_pos, buf_end, to_sv(std::move(ss).str()));
#endif
- }
+ }
- // 64-bit integers
- else if constexpr (impl::is_one_of<arg_type, int64_t, uint64_t>)
- {
+ // 64-bit integers
+ else if constexpr (impl::is_one_of<arg_type, int64_t, uint64_t>) {
#if TOML_INT_CHARCONV
- const auto result = std::to_chars(write_pos, buf_end, arg);
- write_pos = result.ptr;
+ const auto result = std::to_chars(write_pos, buf_end, arg);
+ write_pos = result.ptr;
#else
- std::ostringstream ss;
- ss.imbue(std::locale::classic());
- using cast_type = std::conditional_t<std::is_signed_v<arg_type>, int64_t, uint64_t>;
- ss << static_cast<cast_type>(arg);
- concatenate(write_pos, buf_end, to_sv(std::move(ss).str()));
+ std::ostringstream ss;
+ ss.imbue(std::locale::classic());
+ using cast_type = std::conditional_t<std::is_signed_v<arg_type>, int64_t, uint64_t>;
+ ss << static_cast<cast_type>(arg);
+ concatenate(write_pos, buf_end, to_sv(std::move(ss).str()));
#endif
- }
-
- // escaped_codepoint
- else if constexpr (std::is_same_v<arg_type, escaped_codepoint>)
- {
- if (arg.cp.value <= U'\x7F')
- concatenate(write_pos, buf_end, to_sv(arg.cp));
- else
- {
- auto val = static_cast<uint_least32_t>(arg.cp.value);
- const auto digits = val > 0xFFFFu ? 8u : 4u;
- constexpr auto mask = uint_least32_t{ 0xFu };
- char buf[10] = { '\\', digits > 4 ? 'U' : 'u' };
- for (auto i = 2u + digits; i-- > 2u;)
- {
- const auto hexdig = val & mask;
- buf[i] = static_cast<char>(hexdig >= 0xAu ? ('A' + (hexdig - 0xAu)) : ('0' + hexdig));
- val >>= 4;
- }
- concatenate(write_pos, buf_end, std::string_view{ buf, digits + 2u });
- }
- }
-
- // all other floats (fallback - coerce to double)
- else if constexpr (std::is_floating_point_v<arg_type>)
- concatenate(write_pos, buf_end, static_cast<double>(arg));
-
- // all other integers (fallback - coerce to (u)int64_t)
- else if constexpr (std::is_arithmetic_v<arg_type> && std::is_integral_v<arg_type>)
- {
- using cast_type = std::conditional_t<std::is_unsigned_v<arg_type>, uint64_t, int64_t>;
- concatenate(write_pos, buf_end, static_cast<cast_type>(arg));
- }
-
- else
- {
- static_assert(
- impl::always_false<T>,
- "concatenate() inputs are limited to std::string_views, integers, floats, and escaped_codepoint");
- }
- }
-
- struct error_builder
- {
- static constexpr std::size_t buf_size = 512;
- char buf[buf_size];
- char* write_pos = buf;
- char* const max_write_pos = buf + (buf_size - std::size_t{ 1 }); // allow for null terminator
-
- TOML_NODISCARD_CTOR
- error_builder(std::string_view scope) noexcept
- {
- concatenate(write_pos, max_write_pos, "Error while parsing "sv);
- concatenate(write_pos, max_write_pos, scope);
- concatenate(write_pos, max_write_pos, ": "sv);
- }
-
- template <typename T>
- void append(const T& arg) noexcept
- {
- concatenate(write_pos, max_write_pos, arg);
- }
-
- TOML_RETURNS_BY_THROWING
- auto finish(const source_position& pos, const source_path_ptr& source_path) const
- {
- *write_pos = '\0';
+ }
+
+ // escaped_codepoint
+ else if constexpr (std::is_same_v<arg_type, escaped_codepoint>) {
+ if (arg.cp.value <= U'\x7F')
+ concatenate(write_pos, buf_end, to_sv(arg.cp));
+ else {
+ auto val = static_cast<uint_least32_t>(arg.cp.value);
+ const auto digits = val > 0xFFFFu ? 8u : 4u;
+ constexpr auto mask = uint_least32_t{0xFu};
+ char buf[10] = {'\\', digits > 4 ? 'U' : 'u'};
+ for (auto i = 2u + digits; i-- > 2u;) {
+ const auto hexdig = val & mask;
+ buf[i] = static_cast<char>(hexdig >= 0xAu ? ('A' + (hexdig - 0xAu)) : ('0' + hexdig));
+ val >>= 4;
+ }
+ concatenate(write_pos, buf_end, std::string_view{buf, digits + 2u});
+ }
+ }
+
+ // all other floats (fallback - coerce to double)
+ else if constexpr (std::is_floating_point_v<arg_type>)
+ concatenate(write_pos, buf_end, static_cast<double>(arg));
+
+ // all other integers (fallback - coerce to (u)int64_t)
+ else if constexpr (std::is_arithmetic_v<arg_type> && std::is_integral_v<arg_type>) {
+ using cast_type = std::conditional_t<std::is_unsigned_v<arg_type>, uint64_t, int64_t>;
+ concatenate(write_pos, buf_end, static_cast<cast_type>(arg));
+ }
+
+ else {
+ static_assert(impl::always_false<T>,
+ "concatenate() inputs are limited to std::string_views, integers, floats, and "
+ "escaped_codepoint");
+ }
+ }
+
+ struct error_builder {
+ static constexpr std::size_t buf_size = 512;
+ char buf[buf_size];
+ char* write_pos = buf;
+ char* const max_write_pos = buf + (buf_size - std::size_t{1}); // allow for null terminator
+
+ TOML_NODISCARD_CTOR
+ error_builder(std::string_view scope) noexcept {
+ concatenate(write_pos, max_write_pos, "Error while parsing "sv);
+ concatenate(write_pos, max_write_pos, scope);
+ concatenate(write_pos, max_write_pos, ": "sv);
+ }
+
+ template <typename T>
+ void append(const T& arg) noexcept {
+ concatenate(write_pos, max_write_pos, arg);
+ }
+
+ TOML_RETURNS_BY_THROWING
+ auto finish(const source_position& pos, const source_path_ptr& source_path) const {
+ *write_pos = '\0';
#if TOML_EXCEPTIONS
- throw parse_error{ buf, pos, source_path };
+ throw parse_error{buf, pos, source_path};
#else
- return parse_error{ std::string(buf, static_cast<size_t>(write_pos - buf)), pos, source_path };
+ return parse_error{std::string(buf, static_cast<size_t>(write_pos - buf)), pos, source_path};
#endif
- }
-
- TOML_DELETE_DEFAULTS(error_builder);
- };
-
- struct parse_scope
- {
- std::string_view& storage_;
- std::string_view parent_;
-
- TOML_NODISCARD_CTOR
- explicit parse_scope(std::string_view& current_scope, std::string_view new_scope) noexcept
- : storage_{ current_scope },
- parent_{ current_scope }
- {
- storage_ = new_scope;
- }
-
- ~parse_scope() noexcept
- {
- storage_ = parent_;
- }
-
- TOML_DELETE_DEFAULTS(parse_scope);
- };
+ }
+
+ TOML_DELETE_DEFAULTS(error_builder);
+ };
+
+ struct parse_scope {
+ std::string_view& storage_;
+ std::string_view parent_;
+
+ TOML_NODISCARD_CTOR
+ explicit parse_scope(std::string_view& current_scope, std::string_view new_scope) noexcept
+ : storage_{current_scope}, parent_{current_scope} {
+ storage_ = new_scope;
+ }
+
+ ~parse_scope() noexcept { storage_ = parent_; }
+
+ TOML_DELETE_DEFAULTS(parse_scope);
+ };
#define push_parse_scope_2(scope, line) parse_scope ps_##line(current_scope, scope)
#define push_parse_scope_1(scope, line) push_parse_scope_2(scope, line)
-#define push_parse_scope(scope) push_parse_scope_1(scope, __LINE__)
-
- struct parse_key_buffer
- {
- std::string buffer;
- std::vector<std::pair<size_t, size_t>> segments;
- std::vector<source_position> starts;
- std::vector<source_position> ends;
-
- void clear() noexcept
- {
- buffer.clear();
- segments.clear();
- starts.clear();
- ends.clear();
- }
-
- void push_back(std::string_view segment, source_position b, source_position e)
- {
- segments.push_back({ buffer.length(), segment.length() });
- buffer.append(segment);
- starts.push_back(b);
- ends.push_back(e);
- }
-
- TOML_PURE_INLINE_GETTER
- std::string_view operator[](size_t i) const noexcept
- {
- return std::string_view{ buffer.c_str() + segments[i].first, segments[i].second };
- }
-
- TOML_PURE_INLINE_GETTER
- std::string_view back() const noexcept
- {
- return (*this)[segments.size() - 1u];
- }
-
- TOML_PURE_INLINE_GETTER
- bool empty() const noexcept
- {
- return segments.empty();
- }
-
- TOML_PURE_INLINE_GETTER
- size_t size() const noexcept
- {
- return segments.size();
- }
- };
-
- struct depth_counter_scope
- {
- size_t& depth_;
-
- TOML_NODISCARD_CTOR
- explicit depth_counter_scope(size_t& depth) noexcept //
- : depth_{ depth }
- {
- depth_++;
- }
-
- ~depth_counter_scope() noexcept
- {
- depth_--;
- }
-
- TOML_DELETE_DEFAULTS(depth_counter_scope);
- };
-
- struct parsed_string
- {
- std::string_view value;
- bool was_multi_line;
- };
-
- struct table_vector_scope
- {
- std::vector<table*>& tables;
-
- TOML_NODISCARD_CTOR
- explicit table_vector_scope(std::vector<table*>& tables_, table& tbl) //
- : tables{ tables_ }
- {
- tables.push_back(&tbl);
- }
-
- ~table_vector_scope() noexcept
- {
- tables.pop_back();
- }
-
- TOML_DELETE_DEFAULTS(table_vector_scope);
- };
+#define push_parse_scope(scope) push_parse_scope_1(scope, __LINE__)
+
+ struct parse_key_buffer {
+ std::string buffer;
+ std::vector<std::pair<size_t, size_t>> segments;
+ std::vector<source_position> starts;
+ std::vector<source_position> ends;
+
+ void clear() noexcept {
+ buffer.clear();
+ segments.clear();
+ starts.clear();
+ ends.clear();
+ }
+
+ void push_back(std::string_view segment, source_position b, source_position e) {
+ segments.push_back({buffer.length(), segment.length()});
+ buffer.append(segment);
+ starts.push_back(b);
+ ends.push_back(e);
+ }
+
+ TOML_PURE_INLINE_GETTER
+ std::string_view operator[](size_t i) const noexcept {
+ return std::string_view{buffer.c_str() + segments[i].first, segments[i].second};
+ }
+
+ TOML_PURE_INLINE_GETTER
+ std::string_view back() const noexcept { return (*this)[segments.size() - 1u]; }
+
+ TOML_PURE_INLINE_GETTER
+ bool empty() const noexcept { return segments.empty(); }
+
+ TOML_PURE_INLINE_GETTER
+ size_t size() const noexcept { return segments.size(); }
+ };
+
+ struct depth_counter_scope {
+ size_t& depth_;
+
+ TOML_NODISCARD_CTOR
+ explicit depth_counter_scope(size_t& depth) noexcept //
+ : depth_{depth} {
+ depth_++;
+ }
+
+ ~depth_counter_scope() noexcept { depth_--; }
+
+ TOML_DELETE_DEFAULTS(depth_counter_scope);
+ };
+
+ struct parsed_string {
+ std::string_view value;
+ bool was_multi_line;
+ };
+
+ struct table_vector_scope {
+ std::vector<table*>& tables;
+
+ TOML_NODISCARD_CTOR
+ explicit table_vector_scope(std::vector<table*>& tables_, table& tbl) //
+ : tables{tables_} {
+ tables.push_back(&tbl);
+ }
+
+ ~table_vector_scope() noexcept { tables.pop_back(); }
+
+ TOML_DELETE_DEFAULTS(table_vector_scope);
+ };
}
TOML_ANON_NAMESPACE_END;
-#if 1 // parser helper macros
+#if 1 // parser helper macros
// Q: "what the fuck is this? MACROS????"
// A: The parser needs to work in exceptionless mode (returning error objects directly)
-// and exception mode (reporting parse failures by throwing). Two totally different control flows.
-// These macros encapsulate the differences between the two modes so I can write code code
+// and exception mode (reporting parse failures by throwing). Two totally different control
+// flows. These macros encapsulate the differences between the two modes so I can write code code
// as though I was only targeting one mode and not want yeet myself into the sun.
-// They're all #undef'd at the bottom of the parser's implementation so they should be harmless outside
-// of toml++.
+// They're all #undef'd at the bottom of the parser's implementation so they should be harmless
+// outside of toml++.
-#define is_eof() !cp
+#define is_eof() !cp
#define assert_not_eof() TOML_ASSERT_ASSUME(cp != nullptr)
-#define return_if_eof(...) \
- do \
- { \
- if TOML_UNLIKELY(is_eof()) \
- return __VA_ARGS__; \
- } \
- while (false)
+#define return_if_eof(...) \
+ do { \
+ if TOML_UNLIKELY (is_eof()) return __VA_ARGS__; \
+ } while (false)
#if TOML_EXCEPTIONS
-#define is_error() false
-#define return_after_error(...) TOML_UNREACHABLE
-#define assert_not_error() static_assert(true)
-#define return_if_error(...) static_assert(true)
+#define is_error() false
+#define return_after_error(...) TOML_UNREACHABLE
+#define assert_not_error() static_assert(true)
+#define return_if_error(...) static_assert(true)
#define return_if_error_or_eof(...) return_if_eof(__VA_ARGS__)
#else
-#define is_error() !!err
+#define is_error() !!err
#define return_after_error(...) return __VA_ARGS__
-#define assert_not_error() TOML_ASSERT(!is_error())
-#define return_if_error(...) \
- do \
- { \
- if TOML_UNLIKELY(is_error()) \
- return __VA_ARGS__; \
- } \
- while (false)
-#define return_if_error_or_eof(...) \
- do \
- { \
- if TOML_UNLIKELY(is_eof() || is_error()) \
- return __VA_ARGS__; \
- } \
- while (false)
+#define assert_not_error() TOML_ASSERT(!is_error())
+#define return_if_error(...) \
+ do { \
+ if TOML_UNLIKELY (is_error()) return __VA_ARGS__; \
+ } while (false)
+#define return_if_error_or_eof(...) \
+ do { \
+ if TOML_UNLIKELY (is_eof() || is_error()) return __VA_ARGS__; \
+ } while (false)
#endif
#if defined(TOML_BREAK_AT_PARSE_ERRORS) && TOML_BREAK_AT_PARSE_ERRORS
@@ -1016,2711 +851,2483 @@ TOML_ANON_NAMESPACE_END;
#define parse_error_break() static_assert(true)
#endif
-#define set_error_and_return(ret, ...) \
- do \
- { \
- if (!is_error()) \
- set_error(__VA_ARGS__); \
- return_after_error(ret); \
- } \
- while (false)
+#define set_error_and_return(ret, ...) \
+ do { \
+ if (!is_error()) set_error(__VA_ARGS__); \
+ return_after_error(ret); \
+ } while (false)
#define set_error_and_return_default(...) set_error_and_return({}, __VA_ARGS__)
-#define set_error_and_return_if_eof(...) \
- do \
- { \
- if TOML_UNLIKELY(is_eof()) \
- set_error_and_return(__VA_ARGS__, "encountered end-of-file"sv); \
- } \
- while (false)
-
-#define advance_and_return_if_error(...) \
- do \
- { \
- assert_not_eof(); \
- advance(); \
- return_if_error(__VA_ARGS__); \
- } \
- while (false)
-
-#define advance_and_return_if_error_or_eof(...) \
- do \
- { \
- assert_not_eof(); \
- advance(); \
- return_if_error(__VA_ARGS__); \
- set_error_and_return_if_eof(__VA_ARGS__); \
- } \
- while (false)
-
-#endif // parser helper macros
-
-TOML_IMPL_NAMESPACE_START
-{
- TOML_ABI_NAMESPACE_BOOL(TOML_EXCEPTIONS, impl_ex, impl_noex);
-
- class parser
- {
- private:
- static constexpr size_t max_nested_values = TOML_MAX_NESTED_VALUES;
-
- utf8_buffered_reader reader;
- table root;
- source_position prev_pos = { 1, 1 };
- const utf8_codepoint* cp = {};
- std::vector<table*> implicit_tables;
- std::vector<table*> dotted_key_tables;
- std::vector<table*> open_inline_tables;
- std::vector<array*> table_arrays;
- parse_key_buffer key_buffer;
- std::string string_buffer;
- std::string recording_buffer; // for diagnostics
- bool recording = false, recording_whitespace = true;
- std::string_view current_scope;
- size_t nested_values = {};
+#define set_error_and_return_if_eof(...) \
+ do { \
+ if TOML_UNLIKELY (is_eof()) set_error_and_return(__VA_ARGS__, "encountered end-of-file"sv); \
+ } while (false)
+
+#define advance_and_return_if_error(...) \
+ do { \
+ assert_not_eof(); \
+ advance(); \
+ return_if_error(__VA_ARGS__); \
+ } while (false)
+
+#define advance_and_return_if_error_or_eof(...) \
+ do { \
+ assert_not_eof(); \
+ advance(); \
+ return_if_error(__VA_ARGS__); \
+ set_error_and_return_if_eof(__VA_ARGS__); \
+ } while (false)
+
+#endif // parser helper macros
+
+TOML_IMPL_NAMESPACE_START {
+ TOML_ABI_NAMESPACE_BOOL(TOML_EXCEPTIONS, impl_ex, impl_noex);
+
+ class parser {
+ private:
+ static constexpr size_t max_nested_values = TOML_MAX_NESTED_VALUES;
+
+ utf8_buffered_reader reader;
+ table root;
+ source_position prev_pos = {1, 1};
+ const utf8_codepoint* cp = {};
+ std::vector<table*> implicit_tables;
+ std::vector<table*> dotted_key_tables;
+ std::vector<table*> open_inline_tables;
+ std::vector<array*> table_arrays;
+ parse_key_buffer key_buffer;
+ std::string string_buffer;
+ std::string recording_buffer; // for diagnostics
+ bool recording = false, recording_whitespace = true;
+ std::string_view current_scope;
+ size_t nested_values = {};
#if !TOML_EXCEPTIONS
- mutable optional<parse_error> err;
+ mutable optional<parse_error> err;
#endif
- TOML_NODISCARD
- source_position current_position(source_index fallback_offset = 0) const noexcept
- {
- if (!is_eof())
- return cp->position;
- return { prev_pos.line, static_cast<source_index>(prev_pos.column + fallback_offset) };
- }
+ TOML_NODISCARD
+ source_position current_position(source_index fallback_offset = 0) const noexcept {
+ if (!is_eof()) return cp->position;
+ return {prev_pos.line, static_cast<source_index>(prev_pos.column + fallback_offset)};
+ }
- template <typename... T>
- TOML_RETURNS_BY_THROWING
- TOML_NEVER_INLINE
- void set_error_at(source_position pos, const T&... reason) const
- {
- static_assert(sizeof...(T) > 0);
- return_if_error();
+ template <typename... T>
+ TOML_RETURNS_BY_THROWING TOML_NEVER_INLINE void set_error_at(source_position pos,
+ const T&... reason) const {
+ static_assert(sizeof...(T) > 0);
+ return_if_error();
- error_builder builder{ current_scope };
- (builder.append(reason), ...);
+ error_builder builder{current_scope};
+ (builder.append(reason), ...);
- parse_error_break();
+ parse_error_break();
#if TOML_EXCEPTIONS
- builder.finish(pos, reader.source_path());
+ builder.finish(pos, reader.source_path());
#else
- err.emplace(builder.finish(pos, reader.source_path()));
+ err.emplace(builder.finish(pos, reader.source_path()));
#endif
- }
+ }
- template <typename... T>
- TOML_RETURNS_BY_THROWING
- void set_error(const T&... reason) const
- {
- set_error_at(current_position(1), reason...);
- }
+ template <typename... T>
+ TOML_RETURNS_BY_THROWING void set_error(const T&... reason) const {
+ set_error_at(current_position(1), reason...);
+ }
- void go_back(size_t count = 1) noexcept
- {
- return_if_error();
- TOML_ASSERT_ASSUME(count);
+ void go_back(size_t count = 1) noexcept {
+ return_if_error();
+ TOML_ASSERT_ASSUME(count);
- cp = reader.step_back(count);
- prev_pos = cp->position;
- }
+ cp = reader.step_back(count);
+ prev_pos = cp->position;
+ }
- void advance()
- {
- return_if_error();
- assert_not_eof();
+ void advance() {
+ return_if_error();
+ assert_not_eof();
- prev_pos = cp->position;
- cp = reader.read_next();
+ prev_pos = cp->position;
+ cp = reader.read_next();
#if !TOML_EXCEPTIONS
- if (reader.error())
- {
- err = std::move(reader.error());
- return;
- }
+ if (reader.error()) {
+ err = std::move(reader.error());
+ return;
+ }
#endif
- if (recording && !is_eof())
- {
- if (recording_whitespace || !is_whitespace(*cp))
- recording_buffer.append(cp->bytes, cp->count);
- }
- }
-
- void start_recording(bool include_current = true) noexcept
- {
- return_if_error();
-
- recording = true;
- recording_whitespace = true;
- recording_buffer.clear();
- if (include_current && !is_eof())
- recording_buffer.append(cp->bytes, cp->count);
- }
-
- void stop_recording(size_t pop_bytes = 0) noexcept
- {
- return_if_error();
-
- recording = false;
- if (pop_bytes)
- {
- if (pop_bytes >= recording_buffer.length())
- recording_buffer.clear();
- else if (pop_bytes == 1u)
- recording_buffer.pop_back();
- else
- recording_buffer.erase(recording_buffer.begin()
- + static_cast<ptrdiff_t>(recording_buffer.length() - pop_bytes),
- recording_buffer.end());
- }
- }
-
- bool consume_leading_whitespace()
- {
- return_if_error_or_eof({});
-
- bool consumed = false;
- while (!is_eof() && is_horizontal_whitespace(*cp))
- {
- if TOML_UNLIKELY(!is_ascii_horizontal_whitespace(*cp))
- set_error_and_return_default("expected space or tab, saw '"sv, escaped_codepoint{ *cp }, "'"sv);
-
- consumed = true;
- advance_and_return_if_error({});
- }
- return consumed;
- }
-
- bool consume_line_break()
- {
- return_if_error_or_eof({});
-
- if TOML_UNLIKELY(is_match(*cp, U'\v', U'\f'))
- set_error_and_return_default(
- R"(vertical tabs '\v' and form-feeds '\f' are not legal line breaks in TOML)"sv);
-
- if (*cp == U'\r')
- {
- advance_and_return_if_error({}); // skip \r
-
- if TOML_UNLIKELY(is_eof())
- set_error_and_return_default("expected '\\n' after '\\r', saw EOF"sv);
-
- if TOML_UNLIKELY(*cp != U'\n')
- set_error_and_return_default("expected '\\n' after '\\r', saw '"sv,
- escaped_codepoint{ *cp },
- "'"sv);
- }
- else if (*cp != U'\n')
- return false;
-
- advance_and_return_if_error({}); // skip \n
- return true;
- }
-
- bool consume_rest_of_line()
- {
- return_if_error_or_eof({});
-
- do
- {
- if (is_ascii_vertical_whitespace(*cp))
- return consume_line_break();
- else
- advance();
- return_if_error({});
- }
- while (!is_eof());
-
- return true;
- }
-
- bool consume_comment()
- {
- return_if_error_or_eof({});
-
- if (*cp != U'#')
- return false;
-
- push_parse_scope("comment"sv);
-
- advance_and_return_if_error({}); // skip the '#'
-
- while (!is_eof())
- {
- if (consume_line_break())
- return true;
- return_if_error({});
+ if (recording && !is_eof()) {
+ if (recording_whitespace || !is_whitespace(*cp))
+ recording_buffer.append(cp->bytes, cp->count);
+ }
+ }
+
+ void start_recording(bool include_current = true) noexcept {
+ return_if_error();
+
+ recording = true;
+ recording_whitespace = true;
+ recording_buffer.clear();
+ if (include_current && !is_eof()) recording_buffer.append(cp->bytes, cp->count);
+ }
+
+ void stop_recording(size_t pop_bytes = 0) noexcept {
+ return_if_error();
+
+ recording = false;
+ if (pop_bytes) {
+ if (pop_bytes >= recording_buffer.length())
+ recording_buffer.clear();
+ else if (pop_bytes == 1u)
+ recording_buffer.pop_back();
+ else
+ recording_buffer.erase(recording_buffer.begin() +
+ static_cast<ptrdiff_t>(recording_buffer.length() - pop_bytes),
+ recording_buffer.end());
+ }
+ }
+
+ bool consume_leading_whitespace() {
+ return_if_error_or_eof({});
+
+ bool consumed = false;
+ while (!is_eof() && is_horizontal_whitespace(*cp)) {
+ if TOML_UNLIKELY (!is_ascii_horizontal_whitespace(*cp))
+ set_error_and_return_default("expected space or tab, saw '"sv, escaped_codepoint{*cp},
+ "'"sv);
+
+ consumed = true;
+ advance_and_return_if_error({});
+ }
+ return consumed;
+ }
+
+ bool consume_line_break() {
+ return_if_error_or_eof({});
+
+ if TOML_UNLIKELY (is_match(*cp, U'\v', U'\f'))
+ set_error_and_return_default(
+ R"(vertical tabs '\v' and form-feeds '\f' are not legal line breaks in TOML)"sv);
+
+ if (*cp == U'\r') {
+ advance_and_return_if_error({}); // skip \r
+
+ if TOML_UNLIKELY (is_eof())
+ set_error_and_return_default("expected '\\n' after '\\r', saw EOF"sv);
+
+ if TOML_UNLIKELY (*cp != U'\n')
+ set_error_and_return_default("expected '\\n' after '\\r', saw '"sv,
+ escaped_codepoint{*cp}, "'"sv);
+ } else if (*cp != U'\n')
+ return false;
+
+ advance_and_return_if_error({}); // skip \n
+ return true;
+ }
+
+ bool consume_rest_of_line() {
+ return_if_error_or_eof({});
+
+ do {
+ if (is_ascii_vertical_whitespace(*cp))
+ return consume_line_break();
+ else
+ advance();
+ return_if_error({});
+ } while (!is_eof());
+
+ return true;
+ }
+
+ bool consume_comment() {
+ return_if_error_or_eof({});
+
+ if (*cp != U'#') return false;
+
+ push_parse_scope("comment"sv);
+
+ advance_and_return_if_error({}); // skip the '#'
+
+ while (!is_eof()) {
+ if (consume_line_break()) return true;
+ return_if_error({});
#if TOML_LANG_AT_LEAST(1, 0, 0)
- // toml/issues/567 (disallow non-TAB control characters in comments)
- if TOML_UNLIKELY(is_nontab_control_character(*cp))
- set_error_and_return_default(
- "control characters other than TAB (U+0009) are explicitly prohibited in comments"sv);
+ // toml/issues/567 (disallow non-TAB control characters in comments)
+ if TOML_UNLIKELY (is_nontab_control_character(*cp))
+ set_error_and_return_default(
+ "control characters other than TAB (U+0009) are explicitly prohibited in comments"sv);
- // toml/pull/720 (disallow surrogates in comments)
- else if TOML_UNLIKELY(is_unicode_surrogate(*cp))
- set_error_and_return_default(
- "unicode surrogates (U+D800 to U+DFFF) are explicitly prohibited in comments"sv);
+ // toml/pull/720 (disallow surrogates in comments)
+ else if TOML_UNLIKELY (is_unicode_surrogate(*cp))
+ set_error_and_return_default(
+ "unicode surrogates (U+D800 to U+DFFF) are explicitly prohibited in comments"sv);
#endif
- advance_and_return_if_error({});
- }
-
- return true;
- }
-
- TOML_NODISCARD
- bool consume_expected_sequence(std::u32string_view seq)
- {
- return_if_error({});
- TOML_ASSERT(!seq.empty());
-
- for (auto c : seq)
- {
- set_error_and_return_if_eof({});
- if (*cp != c)
- return false;
- advance_and_return_if_error({});
- }
- return true;
- }
-
- template <typename T>
- TOML_NODISCARD
- bool consume_digit_sequence(T* digits, size_t len)
- {
- return_if_error({});
- TOML_ASSERT_ASSUME(digits);
- TOML_ASSERT_ASSUME(len);
-
- for (size_t i = 0; i < len; i++)
- {
- set_error_and_return_if_eof({});
- if (!is_decimal_digit(*cp))
- return false;
-
- digits[i] = static_cast<T>(*cp - U'0');
- advance_and_return_if_error({});
- }
- return true;
- }
-
- template <typename T>
- TOML_NODISCARD
- size_t consume_variable_length_digit_sequence(T* buffer, size_t max_len)
- {
- return_if_error({});
- TOML_ASSERT_ASSUME(buffer);
- TOML_ASSERT_ASSUME(max_len);
-
- size_t i = {};
- for (; i < max_len; i++)
- {
- if (is_eof() || !is_decimal_digit(*cp))
- break;
-
- buffer[i] = static_cast<T>(*cp - U'0');
- advance_and_return_if_error({});
- }
- return i;
- }
-
- TOML_NODISCARD
- TOML_NEVER_INLINE
- std::string_view parse_basic_string(bool multi_line)
- {
- return_if_error({});
- assert_not_eof();
- TOML_ASSERT_ASSUME(*cp == U'"');
- push_parse_scope("string"sv);
-
- // skip the '"'
- advance_and_return_if_error_or_eof({});
-
- // multi-line strings ignore a single line ending right at the beginning
- if (multi_line)
- {
- consume_line_break();
- return_if_error({});
- set_error_and_return_if_eof({});
- }
-
- auto& str = string_buffer;
- str.clear();
- bool escaped = false;
- bool skipping_whitespace = false;
- do
- {
- if (escaped)
- {
- escaped = false;
-
- // handle 'line ending slashes' in multi-line mode
- if (multi_line && is_whitespace(*cp))
- {
- consume_leading_whitespace();
-
- if TOML_UNLIKELY(!consume_line_break())
- set_error_and_return_default(
- "line-ending backslashes must be the last non-whitespace character on the line"sv);
-
- skipping_whitespace = true;
- return_if_error({});
- continue;
- }
-
- bool skip_escaped_codepoint = true;
- assert_not_eof();
- switch (const auto escaped_codepoint = *cp)
- {
- // 'regular' escape codes
- case U'b': str += '\b'; break;
- case U'f': str += '\f'; break;
- case U'n': str += '\n'; break;
- case U'r': str += '\r'; break;
- case U't': str += '\t'; break;
- case U'"': str += '"'; break;
- case U'\\': str += '\\'; break;
-
-#if TOML_LANG_UNRELEASED // toml/pull/790 (\e shorthand for \x1B)
- case U'e': str += '\x1B'; break;
+ advance_and_return_if_error({});
+ }
+
+ return true;
+ }
+
+ TOML_NODISCARD
+ bool consume_expected_sequence(std::u32string_view seq) {
+ return_if_error({});
+ TOML_ASSERT(!seq.empty());
+
+ for (auto c : seq) {
+ set_error_and_return_if_eof({});
+ if (*cp != c) return false;
+ advance_and_return_if_error({});
+ }
+ return true;
+ }
+
+ template <typename T>
+ TOML_NODISCARD bool consume_digit_sequence(T* digits, size_t len) {
+ return_if_error({});
+ TOML_ASSERT_ASSUME(digits);
+ TOML_ASSERT_ASSUME(len);
+
+ for (size_t i = 0; i < len; i++) {
+ set_error_and_return_if_eof({});
+ if (!is_decimal_digit(*cp)) return false;
+
+ digits[i] = static_cast<T>(*cp - U'0');
+ advance_and_return_if_error({});
+ }
+ return true;
+ }
+
+ template <typename T>
+ TOML_NODISCARD size_t consume_variable_length_digit_sequence(T* buffer, size_t max_len) {
+ return_if_error({});
+ TOML_ASSERT_ASSUME(buffer);
+ TOML_ASSERT_ASSUME(max_len);
+
+ size_t i = {};
+ for (; i < max_len; i++) {
+ if (is_eof() || !is_decimal_digit(*cp)) break;
+
+ buffer[i] = static_cast<T>(*cp - U'0');
+ advance_and_return_if_error({});
+ }
+ return i;
+ }
+
+ TOML_NODISCARD
+ TOML_NEVER_INLINE
+ std::string_view parse_basic_string(bool multi_line) {
+ return_if_error({});
+ assert_not_eof();
+ TOML_ASSERT_ASSUME(*cp == U'"');
+ push_parse_scope("string"sv);
+
+ // skip the '"'
+ advance_and_return_if_error_or_eof({});
+
+ // multi-line strings ignore a single line ending right at the beginning
+ if (multi_line) {
+ consume_line_break();
+ return_if_error({});
+ set_error_and_return_if_eof({});
+ }
+
+ auto& str = string_buffer;
+ str.clear();
+ bool escaped = false;
+ bool skipping_whitespace = false;
+ do {
+ if (escaped) {
+ escaped = false;
+
+ // handle 'line ending slashes' in multi-line mode
+ if (multi_line && is_whitespace(*cp)) {
+ consume_leading_whitespace();
+
+ if TOML_UNLIKELY (!consume_line_break())
+ set_error_and_return_default(
+ "line-ending backslashes must be the last non-whitespace character on the line"sv);
+
+ skipping_whitespace = true;
+ return_if_error({});
+ continue;
+ }
+
+ bool skip_escaped_codepoint = true;
+ assert_not_eof();
+ switch (const auto escaped_codepoint = *cp) {
+ // 'regular' escape codes
+ case U'b':
+ str += '\b';
+ break;
+ case U'f':
+ str += '\f';
+ break;
+ case U'n':
+ str += '\n';
+ break;
+ case U'r':
+ str += '\r';
+ break;
+ case U't':
+ str += '\t';
+ break;
+ case U'"':
+ str += '"';
+ break;
+ case U'\\':
+ str += '\\';
+ break;
+
+#if TOML_LANG_UNRELEASED // toml/pull/790 (\e shorthand for \x1B)
+ case U'e':
+ str += '\x1B';
+ break;
#else
- case U'e':
- set_error_and_return_default(
- "escape sequence '\\e' is not supported in TOML 1.0.0 and earlier"sv);
+ case U'e':
+ set_error_and_return_default(
+ "escape sequence '\\e' is not supported in TOML 1.0.0 and earlier"sv);
#endif
-#if TOML_LANG_UNRELEASED // toml/pull/796 (\xHH unicode scalar sequences)
- case U'x': [[fallthrough]];
+#if TOML_LANG_UNRELEASED // toml/pull/796 (\xHH unicode scalar sequences)
+ case U'x':
+ [[fallthrough]];
#else
- case U'x':
- set_error_and_return_default(
- "escape sequence '\\x' is not supported in TOML 1.0.0 and earlier"sv);
+ case U'x':
+ set_error_and_return_default(
+ "escape sequence '\\x' is not supported in TOML 1.0.0 and earlier"sv);
#endif
- // unicode scalar sequences
- case U'u': [[fallthrough]];
- case U'U':
- {
- push_parse_scope("unicode scalar sequence"sv);
- advance_and_return_if_error_or_eof({});
- skip_escaped_codepoint = false;
-
- uint32_t place_value =
- escaped_codepoint == U'U' ? 0x10000000u : (escaped_codepoint == U'u' ? 0x1000u : 0x10u);
- uint32_t sequence_value{};
- while (place_value)
- {
- set_error_and_return_if_eof({});
-
- if TOML_UNLIKELY(!is_hexadecimal_digit(*cp))
- set_error_and_return_default("expected hex digit, saw '"sv, to_sv(*cp), "'"sv);
-
- sequence_value += place_value * hex_to_dec(*cp);
- place_value /= 16u;
- advance_and_return_if_error({});
- }
-
- if TOML_UNLIKELY(is_unicode_surrogate(sequence_value))
- set_error_and_return_default(
- "unicode surrogates (U+D800 - U+DFFF) are explicitly prohibited"sv);
- else if TOML_UNLIKELY(sequence_value > 0x10FFFFu)
- set_error_and_return_default("values greater than U+10FFFF are invalid"sv);
-
- if (sequence_value < 0x80)
- {
- str += static_cast<char>(sequence_value);
- }
- else if (sequence_value < 0x800u)
- {
- str += static_cast<char>((sequence_value >> 6) | 0xC0u);
- str += static_cast<char>((sequence_value & 0x3Fu) | 0x80u);
- }
- else if (sequence_value < 0x10000u)
- {
- str += static_cast<char>((sequence_value >> 12) | 0xE0u);
- str += static_cast<char>(((sequence_value >> 6) & 0x3Fu) | 0x80u);
- str += static_cast<char>((sequence_value & 0x3Fu) | 0x80u);
- }
- else if (sequence_value < 0x110000u)
- {
- str += static_cast<char>((sequence_value >> 18) | 0xF0u);
- str += static_cast<char>(((sequence_value >> 12) & 0x3Fu) | 0x80u);
- str += static_cast<char>(((sequence_value >> 6) & 0x3Fu) | 0x80u);
- str += static_cast<char>((sequence_value & 0x3Fu) | 0x80u);
- }
- break;
- }
-
- // ???
- TOML_UNLIKELY_CASE
- default: set_error_and_return_default("unknown escape sequence '\\"sv, to_sv(*cp), "'"sv);
- }
-
- if (skip_escaped_codepoint)
- advance_and_return_if_error_or_eof({});
- }
- else
- {
- // handle closing delimiters
- if (*cp == U'"')
- {
- if (multi_line)
- {
- size_t lookaheads = {};
- size_t consecutive_delimiters = 1;
- do
- {
- advance_and_return_if_error({});
- lookaheads++;
- if (!is_eof() && *cp == U'"')
- consecutive_delimiters++;
- else
- break;
- }
- while (lookaheads < 4u);
-
- switch (consecutive_delimiters)
- {
- // """ " (one quote somewhere in a ML string)
- case 1:
- str += '"';
- skipping_whitespace = false;
- continue;
-
- // """ "" (two quotes somewhere in a ML string)
- case 2:
- str.append("\"\""sv);
- skipping_whitespace = false;
- continue;
-
- // """ """ (the end of the string)
- case 3: return str;
-
- // """ """" (one at the end of the string)
- case 4: str += '"'; return str;
-
- // """ """"" (two quotes at the end of the string)
- case 5:
- str.append("\"\""sv);
- advance_and_return_if_error({}); // skip the last '"'
- return str;
-
- default: TOML_UNREACHABLE;
- }
- }
- else
- {
- advance_and_return_if_error({}); // skip the closing delimiter
- return str;
- }
- }
-
- // handle escapes
- else if (*cp == U'\\')
- {
- advance_and_return_if_error_or_eof({}); // skip the '\'
- skipping_whitespace = false;
- escaped = true;
- continue;
- }
-
- // handle line endings in multi-line mode
- if (multi_line && is_ascii_vertical_whitespace(*cp))
- {
- consume_line_break();
- return_if_error({});
- if (!skipping_whitespace)
- str += '\n';
- continue;
- }
-
- // handle control characters
- if TOML_UNLIKELY(is_nontab_control_character(*cp))
- set_error_and_return_default(
- "unescaped control characters other than TAB (U+0009) are explicitly prohibited"sv);
+ // unicode scalar sequences
+ case U'u':
+ [[fallthrough]];
+ case U'U': {
+ push_parse_scope("unicode scalar sequence"sv);
+ advance_and_return_if_error_or_eof({});
+ skip_escaped_codepoint = false;
+
+ uint32_t place_value = escaped_codepoint == U'U'
+ ? 0x10000000u
+ : (escaped_codepoint == U'u' ? 0x1000u : 0x10u);
+ uint32_t sequence_value{};
+ while (place_value) {
+ set_error_and_return_if_eof({});
+
+ if TOML_UNLIKELY (!is_hexadecimal_digit(*cp))
+ set_error_and_return_default("expected hex digit, saw '"sv, to_sv(*cp), "'"sv);
+
+ sequence_value += place_value * hex_to_dec(*cp);
+ place_value /= 16u;
+ advance_and_return_if_error({});
+ }
+
+ if TOML_UNLIKELY (is_unicode_surrogate(sequence_value))
+ set_error_and_return_default(
+ "unicode surrogates (U+D800 - U+DFFF) are explicitly prohibited"sv);
+ else if TOML_UNLIKELY (sequence_value > 0x10FFFFu)
+ set_error_and_return_default("values greater than U+10FFFF are invalid"sv);
+
+ if (sequence_value < 0x80) {
+ str += static_cast<char>(sequence_value);
+ } else if (sequence_value < 0x800u) {
+ str += static_cast<char>((sequence_value >> 6) | 0xC0u);
+ str += static_cast<char>((sequence_value & 0x3Fu) | 0x80u);
+ } else if (sequence_value < 0x10000u) {
+ str += static_cast<char>((sequence_value >> 12) | 0xE0u);
+ str += static_cast<char>(((sequence_value >> 6) & 0x3Fu) | 0x80u);
+ str += static_cast<char>((sequence_value & 0x3Fu) | 0x80u);
+ } else if (sequence_value < 0x110000u) {
+ str += static_cast<char>((sequence_value >> 18) | 0xF0u);
+ str += static_cast<char>(((sequence_value >> 12) & 0x3Fu) | 0x80u);
+ str += static_cast<char>(((sequence_value >> 6) & 0x3Fu) | 0x80u);
+ str += static_cast<char>((sequence_value & 0x3Fu) | 0x80u);
+ }
+ break;
+ }
+
+ // ???
+ TOML_UNLIKELY_CASE
+ default:
+ set_error_and_return_default("unknown escape sequence '\\"sv, to_sv(*cp), "'"sv);
+ }
+
+ if (skip_escaped_codepoint) advance_and_return_if_error_or_eof({});
+ } else {
+ // handle closing delimiters
+ if (*cp == U'"') {
+ if (multi_line) {
+ size_t lookaheads = {};
+ size_t consecutive_delimiters = 1;
+ do {
+ advance_and_return_if_error({});
+ lookaheads++;
+ if (!is_eof() && *cp == U'"')
+ consecutive_delimiters++;
+ else
+ break;
+ } while (lookaheads < 4u);
+
+ switch (consecutive_delimiters) {
+ // """ " (one quote somewhere in a ML string)
+ case 1:
+ str += '"';
+ skipping_whitespace = false;
+ continue;
+
+ // """ "" (two quotes somewhere in a ML string)
+ case 2:
+ str.append("\"\""sv);
+ skipping_whitespace = false;
+ continue;
+
+ // """ """ (the end of the string)
+ case 3:
+ return str;
+
+ // """ """" (one at the end of the string)
+ case 4:
+ str += '"';
+ return str;
+
+ // """ """"" (two quotes at the end of the string)
+ case 5:
+ str.append("\"\""sv);
+ advance_and_return_if_error({}); // skip the last '"'
+ return str;
+
+ default:
+ TOML_UNREACHABLE;
+ }
+ } else {
+ advance_and_return_if_error({}); // skip the closing delimiter
+ return str;
+ }
+ }
+
+ // handle escapes
+ else if (*cp == U'\\') {
+ advance_and_return_if_error_or_eof({}); // skip the '\'
+ skipping_whitespace = false;
+ escaped = true;
+ continue;
+ }
+
+ // handle line endings in multi-line mode
+ if (multi_line && is_ascii_vertical_whitespace(*cp)) {
+ consume_line_break();
+ return_if_error({});
+ if (!skipping_whitespace) str += '\n';
+ continue;
+ }
+
+ // handle control characters
+ if TOML_UNLIKELY (is_nontab_control_character(*cp))
+ set_error_and_return_default(
+ "unescaped control characters other than TAB (U+0009) are explicitly prohibited"sv);
#if TOML_LANG_AT_LEAST(1, 0, 0)
- // handle surrogates in strings
- if TOML_UNLIKELY(is_unicode_surrogate(*cp))
- set_error_and_return_default(
- "unescaped unicode surrogates (U+D800 to U+DFFF) are explicitly prohibited"sv);
+ // handle surrogates in strings
+ if TOML_UNLIKELY (is_unicode_surrogate(*cp))
+ set_error_and_return_default(
+ "unescaped unicode surrogates (U+D800 to U+DFFF) are explicitly prohibited"sv);
#endif
- if (multi_line)
- {
- if (!skipping_whitespace || !is_horizontal_whitespace(*cp))
- {
- skipping_whitespace = false;
- str.append(cp->bytes, cp->count);
- }
- }
- else
- str.append(cp->bytes, cp->count);
-
- advance_and_return_if_error({});
- }
- }
- while (!is_eof());
-
- set_error_and_return_default("encountered end-of-file"sv);
- }
-
- TOML_NODISCARD
- TOML_NEVER_INLINE
- std::string_view parse_literal_string(bool multi_line)
- {
- return_if_error({});
- assert_not_eof();
- TOML_ASSERT_ASSUME(*cp == U'\'');
- push_parse_scope("literal string"sv);
-
- // skip the delimiter
- advance_and_return_if_error_or_eof({});
-
- // multi-line strings ignore a single line ending right at the beginning
- if (multi_line)
- {
- consume_line_break();
- return_if_error({});
- set_error_and_return_if_eof({});
- }
-
- auto& str = string_buffer;
- str.clear();
- do
- {
- return_if_error({});
-
- // handle closing delimiters
- if (*cp == U'\'')
- {
- if (multi_line)
- {
- size_t lookaheads = {};
- size_t consecutive_delimiters = 1;
- do
- {
- advance_and_return_if_error({});
- lookaheads++;
- if (!is_eof() && *cp == U'\'')
- consecutive_delimiters++;
- else
- break;
- }
- while (lookaheads < 4u);
-
- switch (consecutive_delimiters)
- {
- // ''' ' (one quote somewhere in a ML string)
- case 1: str += '\''; continue;
-
- // ''' '' (two quotes somewhere in a ML string)
- case 2: str.append("''"sv); continue;
-
- // ''' ''' (the end of the string)
- case 3: return str;
-
- // ''' '''' (one at the end of the string)
- case 4: str += '\''; return str;
-
- // ''' ''''' (two quotes at the end of the string)
- case 5:
- str.append("''"sv);
- advance_and_return_if_error({}); // skip the last '
- return str;
-
- default: TOML_UNREACHABLE;
- }
- }
- else
- {
- advance_and_return_if_error({}); // skip the closing delimiter
- return str;
- }
- }
-
- // handle line endings in multi-line mode
- if (multi_line && is_ascii_vertical_whitespace(*cp))
- {
- consume_line_break();
- return_if_error({});
- str += '\n';
- continue;
- }
-
- // handle control characters
- if TOML_UNLIKELY(is_nontab_control_character(*cp))
- set_error_and_return_default(
- "control characters other than TAB (U+0009) are explicitly prohibited"sv);
+ if (multi_line) {
+ if (!skipping_whitespace || !is_horizontal_whitespace(*cp)) {
+ skipping_whitespace = false;
+ str.append(cp->bytes, cp->count);
+ }
+ } else
+ str.append(cp->bytes, cp->count);
+
+ advance_and_return_if_error({});
+ }
+ } while (!is_eof());
+
+ set_error_and_return_default("encountered end-of-file"sv);
+ }
+
+ TOML_NODISCARD
+ TOML_NEVER_INLINE
+ std::string_view parse_literal_string(bool multi_line) {
+ return_if_error({});
+ assert_not_eof();
+ TOML_ASSERT_ASSUME(*cp == U'\'');
+ push_parse_scope("literal string"sv);
+
+ // skip the delimiter
+ advance_and_return_if_error_or_eof({});
+
+ // multi-line strings ignore a single line ending right at the beginning
+ if (multi_line) {
+ consume_line_break();
+ return_if_error({});
+ set_error_and_return_if_eof({});
+ }
+
+ auto& str = string_buffer;
+ str.clear();
+ do {
+ return_if_error({});
+
+ // handle closing delimiters
+ if (*cp == U'\'') {
+ if (multi_line) {
+ size_t lookaheads = {};
+ size_t consecutive_delimiters = 1;
+ do {
+ advance_and_return_if_error({});
+ lookaheads++;
+ if (!is_eof() && *cp == U'\'')
+ consecutive_delimiters++;
+ else
+ break;
+ } while (lookaheads < 4u);
+
+ switch (consecutive_delimiters) {
+ // ''' ' (one quote somewhere in a ML string)
+ case 1:
+ str += '\'';
+ continue;
+
+ // ''' '' (two quotes somewhere in a ML string)
+ case 2:
+ str.append("''"sv);
+ continue;
+
+ // ''' ''' (the end of the string)
+ case 3:
+ return str;
+
+ // ''' '''' (one at the end of the string)
+ case 4:
+ str += '\'';
+ return str;
+
+ // ''' ''''' (two quotes at the end of the string)
+ case 5:
+ str.append("''"sv);
+ advance_and_return_if_error({}); // skip the last '
+ return str;
+
+ default:
+ TOML_UNREACHABLE;
+ }
+ } else {
+ advance_and_return_if_error({}); // skip the closing delimiter
+ return str;
+ }
+ }
+
+ // handle line endings in multi-line mode
+ if (multi_line && is_ascii_vertical_whitespace(*cp)) {
+ consume_line_break();
+ return_if_error({});
+ str += '\n';
+ continue;
+ }
+
+ // handle control characters
+ if TOML_UNLIKELY (is_nontab_control_character(*cp))
+ set_error_and_return_default(
+ "control characters other than TAB (U+0009) are explicitly prohibited"sv);
#if TOML_LANG_AT_LEAST(1, 0, 0)
- // handle surrogates in strings
- if TOML_UNLIKELY(is_unicode_surrogate(*cp))
- set_error_and_return_default("unicode surrogates (U+D800 - U+DFFF) are explicitly prohibited"sv);
+ // handle surrogates in strings
+ if TOML_UNLIKELY (is_unicode_surrogate(*cp))
+ set_error_and_return_default(
+ "unicode surrogates (U+D800 - U+DFFF) are explicitly prohibited"sv);
#endif
- str.append(cp->bytes, cp->count);
- advance_and_return_if_error({});
- }
- while (!is_eof());
-
- set_error_and_return_default("encountered end-of-file"sv);
- }
-
- TOML_NODISCARD
- TOML_NEVER_INLINE
- parsed_string parse_string()
- {
- return_if_error({});
- assert_not_eof();
- TOML_ASSERT_ASSUME(is_string_delimiter(*cp));
- push_parse_scope("string"sv);
-
- // get the first three characters to determine the string type
- const auto first = cp->value;
- advance_and_return_if_error_or_eof({});
- const auto second = cp->value;
- advance_and_return_if_error({});
- const auto third = cp ? cp->value : U'\0';
-
- // if we were eof at the third character then first and second need to be
- // the same string character (otherwise it's an unterminated string)
- if (is_eof())
- {
- if (second == first)
- return {};
-
- set_error_and_return_default("encountered end-of-file"sv);
- }
-
- // if the first three characters are all the same string delimiter then
- // it's a multi-line string.
- else if (first == second && first == third)
- {
- return { first == U'\'' ? parse_literal_string(true) : parse_basic_string(true), true };
- }
-
- // otherwise it's just a regular string.
- else
- {
- // step back two characters so that the current
- // character is the string delimiter
- go_back(2u);
-
- return { first == U'\'' ? parse_literal_string(false) : parse_basic_string(false), false };
- }
- }
-
- TOML_NODISCARD
- TOML_NEVER_INLINE
- std::string_view parse_bare_key_segment()
- {
- return_if_error({});
- assert_not_eof();
- TOML_ASSERT_ASSUME(is_bare_key_character(*cp));
-
- string_buffer.clear();
-
- while (!is_eof())
- {
- if (!is_bare_key_character(*cp))
- break;
-
- string_buffer.append(cp->bytes, cp->count);
- advance_and_return_if_error({});
- }
-
- return string_buffer;
- }
-
- TOML_NODISCARD
- TOML_NEVER_INLINE
- bool parse_boolean()
- {
- return_if_error({});
- assert_not_eof();
- TOML_ASSERT_ASSUME(is_match(*cp, U't', U'f', U'T', U'F'));
- push_parse_scope("boolean"sv);
-
- start_recording(true);
- auto result = is_match(*cp, U't', U'T');
- if (!consume_expected_sequence(result ? U"true"sv : U"false"sv))
- set_error_and_return_default("expected '"sv,
- to_sv(result),
- "', saw '"sv,
- to_sv(recording_buffer),
- "'"sv);
- stop_recording();
-
- if (cp && !is_value_terminator(*cp))
- set_error_and_return_default("expected value-terminator, saw '"sv, to_sv(*cp), "'"sv);
-
- return result;
- }
-
- TOML_NODISCARD
- TOML_NEVER_INLINE
- double parse_inf_or_nan()
- {
- return_if_error({});
- assert_not_eof();
- TOML_ASSERT_ASSUME(is_match(*cp, U'i', U'n', U'I', U'N', U'+', U'-'));
- push_parse_scope("floating-point"sv);
-
- start_recording(true);
- const bool negative = *cp == U'-';
- if (negative || *cp == U'+')
- advance_and_return_if_error_or_eof({});
-
- const bool inf = is_match(*cp, U'i', U'I');
- if (!consume_expected_sequence(inf ? U"inf"sv : U"nan"sv))
- set_error_and_return_default("expected '"sv,
- inf ? "inf"sv : "nan"sv,
- "', saw '"sv,
- to_sv(recording_buffer),
- "'"sv);
- stop_recording();
-
- if (cp && !is_value_terminator(*cp))
- set_error_and_return_default("expected value-terminator, saw '"sv, to_sv(*cp), "'"sv);
-
- return inf ? (negative ? -std::numeric_limits<double>::infinity() : std::numeric_limits<double>::infinity())
- : std::numeric_limits<double>::quiet_NaN();
- }
-
- TOML_NODISCARD
- TOML_NEVER_INLINE
- double parse_float()
- {
- return_if_error({});
- assert_not_eof();
- TOML_ASSERT_ASSUME(is_match(*cp, U'+', U'-', U'.') || is_decimal_digit(*cp));
- push_parse_scope("floating-point"sv);
-
- // sign
- const int sign = *cp == U'-' ? -1 : 1;
- if (is_match(*cp, U'+', U'-'))
- advance_and_return_if_error_or_eof({});
-
- // consume value chars
- char chars[utf8_buffered_reader::max_history_length];
- size_t length = {};
- const utf8_codepoint* prev = {};
- bool seen_decimal = false, seen_exponent = false;
- char first_integer_part = '\0';
- while (!is_eof() && !is_value_terminator(*cp))
- {
- if (*cp == U'_')
- {
- if (!prev || !is_decimal_digit(*prev))
- set_error_and_return_default("underscores may only follow digits"sv);
-
- prev = cp;
- advance_and_return_if_error_or_eof({});
- continue;
- }
- else if TOML_UNLIKELY(prev && *prev == U'_' && !is_decimal_digit(*cp))
- set_error_and_return_default("underscores must be followed by digits"sv);
- else if TOML_UNLIKELY(length == sizeof(chars))
- set_error_and_return_default("exceeds length limit of "sv,
- sizeof(chars),
- " digits"sv,
- (seen_exponent ? ""sv : " (consider using exponent notation)"sv));
- else if (*cp == U'.')
- {
- // .1
- // -.1
- // +.1 (no integer part)
- if (!first_integer_part)
- set_error_and_return_default("expected decimal digit, saw '.'"sv);
-
- // 1.0e+.10 (exponent cannot have '.')
- else if (seen_exponent)
- set_error_and_return_default("expected exponent decimal digit or sign, saw '.'"sv);
-
- // 1.0.e+.10
- // 1..0
- // (multiple '.')
- else if (seen_decimal)
- set_error_and_return_default("expected decimal digit or exponent, saw '.'"sv);
-
- seen_decimal = true;
- }
- else if (is_match(*cp, U'e', U'E'))
- {
- if (prev && !is_decimal_digit(*prev))
- set_error_and_return_default("expected decimal digit, saw '"sv, to_sv(*cp), "'"sv);
-
- // 1.0ee+10 (multiple 'e')
- else if (seen_exponent)
- set_error_and_return_default("expected decimal digit, saw '"sv, to_sv(*cp), "'"sv);
-
- seen_decimal = true; // implied
- seen_exponent = true;
- }
- else if (is_match(*cp, U'+', U'-'))
- {
- // 1.-0 (sign in mantissa)
- if (!seen_exponent)
- set_error_and_return_default("expected decimal digit or '.', saw '"sv, to_sv(*cp), "'"sv);
-
- // 1.0e1-0 (misplaced exponent sign)
- else if (!is_match(*prev, U'e', U'E'))
- set_error_and_return_default("expected exponent digit, saw '"sv, to_sv(*cp), "'"sv);
- }
- else if (is_decimal_digit(*cp))
- {
- if (!seen_decimal)
- {
- if (!first_integer_part)
- first_integer_part = static_cast<char>(cp->bytes[0]);
- else if (first_integer_part == '0')
- set_error_and_return_default("leading zeroes are prohibited"sv);
- }
- }
- else
- set_error_and_return_default("expected decimal digit, saw '"sv, to_sv(*cp), "'"sv);
-
- chars[length++] = static_cast<char>(cp->bytes[0]);
- prev = cp;
- advance_and_return_if_error({});
- }
-
- // sanity-check ending state
- if (prev)
- {
- if (*prev == U'_')
- {
- set_error_and_return_if_eof({});
- set_error_and_return_default("underscores must be followed by digits"sv);
- }
- else if (is_match(*prev, U'e', U'E', U'+', U'-', U'.'))
- {
- set_error_and_return_if_eof({});
- set_error_and_return_default("expected decimal digit, saw '"sv, to_sv(*cp), "'"sv);
- }
- }
-
- // convert to double
- double result;
+ str.append(cp->bytes, cp->count);
+ advance_and_return_if_error({});
+ } while (!is_eof());
+
+ set_error_and_return_default("encountered end-of-file"sv);
+ }
+
+ TOML_NODISCARD
+ TOML_NEVER_INLINE
+ parsed_string parse_string() {
+ return_if_error({});
+ assert_not_eof();
+ TOML_ASSERT_ASSUME(is_string_delimiter(*cp));
+ push_parse_scope("string"sv);
+
+ // get the first three characters to determine the string type
+ const auto first = cp->value;
+ advance_and_return_if_error_or_eof({});
+ const auto second = cp->value;
+ advance_and_return_if_error({});
+ const auto third = cp ? cp->value : U'\0';
+
+ // if we were eof at the third character then first and second need to be
+ // the same string character (otherwise it's an unterminated string)
+ if (is_eof()) {
+ if (second == first) return {};
+
+ set_error_and_return_default("encountered end-of-file"sv);
+ }
+
+ // if the first three characters are all the same string delimiter then
+ // it's a multi-line string.
+ else if (first == second && first == third) {
+ return {first == U'\'' ? parse_literal_string(true) : parse_basic_string(true), true};
+ }
+
+ // otherwise it's just a regular string.
+ else {
+ // step back two characters so that the current
+ // character is the string delimiter
+ go_back(2u);
+
+ return {first == U'\'' ? parse_literal_string(false) : parse_basic_string(false), false};
+ }
+ }
+
+ TOML_NODISCARD
+ TOML_NEVER_INLINE
+ std::string_view parse_bare_key_segment() {
+ return_if_error({});
+ assert_not_eof();
+ TOML_ASSERT_ASSUME(is_bare_key_character(*cp));
+
+ string_buffer.clear();
+
+ while (!is_eof()) {
+ if (!is_bare_key_character(*cp)) break;
+
+ string_buffer.append(cp->bytes, cp->count);
+ advance_and_return_if_error({});
+ }
+
+ return string_buffer;
+ }
+
+ TOML_NODISCARD
+ TOML_NEVER_INLINE
+ bool parse_boolean() {
+ return_if_error({});
+ assert_not_eof();
+ TOML_ASSERT_ASSUME(is_match(*cp, U't', U'f', U'T', U'F'));
+ push_parse_scope("boolean"sv);
+
+ start_recording(true);
+ auto result = is_match(*cp, U't', U'T');
+ if (!consume_expected_sequence(result ? U"true"sv : U"false"sv))
+ set_error_and_return_default("expected '"sv, to_sv(result), "', saw '"sv,
+ to_sv(recording_buffer), "'"sv);
+ stop_recording();
+
+ if (cp && !is_value_terminator(*cp))
+ set_error_and_return_default("expected value-terminator, saw '"sv, to_sv(*cp), "'"sv);
+
+ return result;
+ }
+
+ TOML_NODISCARD
+ TOML_NEVER_INLINE
+ double parse_inf_or_nan() {
+ return_if_error({});
+ assert_not_eof();
+ TOML_ASSERT_ASSUME(is_match(*cp, U'i', U'n', U'I', U'N', U'+', U'-'));
+ push_parse_scope("floating-point"sv);
+
+ start_recording(true);
+ const bool negative = *cp == U'-';
+ if (negative || *cp == U'+') advance_and_return_if_error_or_eof({});
+
+ const bool inf = is_match(*cp, U'i', U'I');
+ if (!consume_expected_sequence(inf ? U"inf"sv : U"nan"sv))
+ set_error_and_return_default("expected '"sv, inf ? "inf"sv : "nan"sv, "', saw '"sv,
+ to_sv(recording_buffer), "'"sv);
+ stop_recording();
+
+ if (cp && !is_value_terminator(*cp))
+ set_error_and_return_default("expected value-terminator, saw '"sv, to_sv(*cp), "'"sv);
+
+ return inf ? (negative ? -std::numeric_limits<double>::infinity()
+ : std::numeric_limits<double>::infinity())
+ : std::numeric_limits<double>::quiet_NaN();
+ }
+
+ TOML_NODISCARD
+ TOML_NEVER_INLINE
+ double parse_float() {
+ return_if_error({});
+ assert_not_eof();
+ TOML_ASSERT_ASSUME(is_match(*cp, U'+', U'-', U'.') || is_decimal_digit(*cp));
+ push_parse_scope("floating-point"sv);
+
+ // sign
+ const int sign = *cp == U'-' ? -1 : 1;
+ if (is_match(*cp, U'+', U'-')) advance_and_return_if_error_or_eof({});
+
+ // consume value chars
+ char chars[utf8_buffered_reader::max_history_length];
+ size_t length = {};
+ const utf8_codepoint* prev = {};
+ bool seen_decimal = false, seen_exponent = false;
+ char first_integer_part = '\0';
+ while (!is_eof() && !is_value_terminator(*cp)) {
+ if (*cp == U'_') {
+ if (!prev || !is_decimal_digit(*prev))
+ set_error_and_return_default("underscores may only follow digits"sv);
+
+ prev = cp;
+ advance_and_return_if_error_or_eof({});
+ continue;
+ } else if TOML_UNLIKELY (prev && *prev == U'_' && !is_decimal_digit(*cp))
+ set_error_and_return_default("underscores must be followed by digits"sv);
+ else if TOML_UNLIKELY (length == sizeof(chars))
+ set_error_and_return_default(
+ "exceeds length limit of "sv, sizeof(chars), " digits"sv,
+ (seen_exponent ? ""sv : " (consider using exponent notation)"sv));
+ else if (*cp == U'.') {
+ // .1
+ // -.1
+ // +.1 (no integer part)
+ if (!first_integer_part)
+ set_error_and_return_default("expected decimal digit, saw '.'"sv);
+
+ // 1.0e+.10 (exponent cannot have '.')
+ else if (seen_exponent)
+ set_error_and_return_default("expected exponent decimal digit or sign, saw '.'"sv);
+
+ // 1.0.e+.10
+ // 1..0
+ // (multiple '.')
+ else if (seen_decimal)
+ set_error_and_return_default("expected decimal digit or exponent, saw '.'"sv);
+
+ seen_decimal = true;
+ } else if (is_match(*cp, U'e', U'E')) {
+ if (prev && !is_decimal_digit(*prev))
+ set_error_and_return_default("expected decimal digit, saw '"sv, to_sv(*cp), "'"sv);
+
+ // 1.0ee+10 (multiple 'e')
+ else if (seen_exponent)
+ set_error_and_return_default("expected decimal digit, saw '"sv, to_sv(*cp), "'"sv);
+
+ seen_decimal = true; // implied
+ seen_exponent = true;
+ } else if (is_match(*cp, U'+', U'-')) {
+ // 1.-0 (sign in mantissa)
+ if (!seen_exponent)
+ set_error_and_return_default("expected decimal digit or '.', saw '"sv, to_sv(*cp),
+ "'"sv);
+
+ // 1.0e1-0 (misplaced exponent sign)
+ else if (!is_match(*prev, U'e', U'E'))
+ set_error_and_return_default("expected exponent digit, saw '"sv, to_sv(*cp), "'"sv);
+ } else if (is_decimal_digit(*cp)) {
+ if (!seen_decimal) {
+ if (!first_integer_part)
+ first_integer_part = static_cast<char>(cp->bytes[0]);
+ else if (first_integer_part == '0')
+ set_error_and_return_default("leading zeroes are prohibited"sv);
+ }
+ } else
+ set_error_and_return_default("expected decimal digit, saw '"sv, to_sv(*cp), "'"sv);
+
+ chars[length++] = static_cast<char>(cp->bytes[0]);
+ prev = cp;
+ advance_and_return_if_error({});
+ }
+
+ // sanity-check ending state
+ if (prev) {
+ if (*prev == U'_') {
+ set_error_and_return_if_eof({});
+ set_error_and_return_default("underscores must be followed by digits"sv);
+ } else if (is_match(*prev, U'e', U'E', U'+', U'-', U'.')) {
+ set_error_and_return_if_eof({});
+ set_error_and_return_default("expected decimal digit, saw '"sv, to_sv(*cp), "'"sv);
+ }
+ }
+
+ // convert to double
+ double result;
#if TOML_FLOAT_CHARCONV
- {
- auto fc_result = std::from_chars(chars, chars + length, result);
- switch (fc_result.ec)
- {
- TOML_LIKELY_CASE
- case std::errc{}: // ok
- return result * sign;
-
- case std::errc::invalid_argument:
- set_error_and_return_default("'"sv,
- std::string_view{ chars, length },
- "' could not be interpreted as a value"sv);
- break;
-
- case std::errc::result_out_of_range:
- set_error_and_return_default("'"sv,
- std::string_view{ chars, length },
- "' is not representable in 64 bits"sv);
- break;
-
- default: //??
- set_error_and_return_default("an unspecified error occurred while trying to interpret '"sv,
- std::string_view{ chars, length },
- "' as a value"sv);
- }
- }
+ {
+ auto fc_result = std::from_chars(chars, chars + length, result);
+ switch (fc_result.ec) {
+ TOML_LIKELY_CASE
+ case std::errc{}: // ok
+ return result * sign;
+
+ case std::errc::invalid_argument:
+ set_error_and_return_default("'"sv, std::string_view{chars, length},
+ "' could not be interpreted as a value"sv);
+ break;
+
+ case std::errc::result_out_of_range:
+ set_error_and_return_default("'"sv, std::string_view{chars, length},
+ "' is not representable in 64 bits"sv);
+ break;
+
+ default: //??
+ set_error_and_return_default(
+ "an unspecified error occurred while trying to interpret '"sv,
+ std::string_view{chars, length}, "' as a value"sv);
+ }
+ }
#else
- {
- std::stringstream ss;
- ss.imbue(std::locale::classic());
- ss.write(chars, static_cast<std::streamsize>(length));
- if ((ss >> result))
- return result * sign;
- else
- set_error_and_return_default("'"sv,
- std::string_view{ chars, length },
- "' could not be interpreted as a value"sv);
- }
+ {
+ std::stringstream ss;
+ ss.imbue(std::locale::classic());
+ ss.write(chars, static_cast<std::streamsize>(length));
+ if ((ss >> result))
+ return result * sign;
+ else
+ set_error_and_return_default("'"sv, std::string_view{chars, length},
+ "' could not be interpreted as a value"sv);
+ }
#endif
- }
-
- TOML_NODISCARD
- TOML_NEVER_INLINE
- double parse_hex_float()
- {
- return_if_error({});
- assert_not_eof();
- TOML_ASSERT_ASSUME(is_match(*cp, U'0', U'+', U'-'));
- push_parse_scope("hexadecimal floating-point"sv);
-
-#if TOML_LANG_UNRELEASED // toml/issues/562 (hexfloats)
-
- // sign
- const int sign = *cp == U'-' ? -1 : 1;
- if (is_match(*cp, U'+', U'-'))
- advance_and_return_if_error_or_eof({});
-
- // '0'
- if (*cp != U'0')
- set_error_and_return_default(" expected '0', saw '"sv, to_sv(*cp), "'"sv);
- advance_and_return_if_error_or_eof({});
-
- // 'x' or 'X'
- if (!is_match(*cp, U'x', U'X'))
- set_error_and_return_default("expected 'x' or 'X', saw '"sv, to_sv(*cp), "'"sv);
- advance_and_return_if_error_or_eof({});
-
- // <HEX DIGITS> ([.]<HEX DIGITS>)? [pP] [+-]? <DEC DIGITS>
-
- // consume value fragments
- struct fragment
- {
- char chars[24];
- size_t length;
- double value;
- };
- fragment fragments[] = {
- {}, // mantissa, whole part
- {}, // mantissa, fractional part
- {} // exponent
- };
- fragment* current_fragment = fragments;
- const utf8_codepoint* prev = {};
- int exponent_sign = 1;
- while (!is_eof() && !is_value_terminator(*cp))
- {
- if (*cp == U'_')
- {
- if (!prev || !is_hexadecimal_digit(*prev))
- set_error_and_return_default("underscores may only follow digits"sv);
-
- prev = cp;
- advance_and_return_if_error_or_eof({});
- continue;
- }
- else if (prev && *prev == U'_' && !is_hexadecimal_digit(*cp))
- set_error_and_return_default("underscores must be followed by digits"sv);
- else if (*cp == U'.')
- {
- // 0x10.0p-.0 (exponent cannot have '.')
- if (current_fragment == fragments + 2)
- set_error_and_return_default("expected exponent digit or sign, saw '.'"sv);
-
- // 0x10.0.p-0 (multiple '.')
- else if (current_fragment == fragments + 1)
- set_error_and_return_default("expected hexadecimal digit or exponent, saw '.'"sv);
-
- else
- current_fragment++;
- }
- else if (is_match(*cp, U'p', U'P'))
- {
- // 0x10.0pp-0 (multiple 'p')
- if (current_fragment == fragments + 2)
- set_error_and_return_default("expected exponent digit or sign, saw '"sv, to_sv(*cp), "'"sv);
-
- // 0x.p-0 (mantissa is just '.')
- else if (fragments[0].length == 0u && fragments[1].length == 0u)
- set_error_and_return_default("expected hexadecimal digit, saw '"sv, to_sv(*cp), "'"sv);
-
- else
- current_fragment = fragments + 2;
- }
- else if (is_match(*cp, U'+', U'-'))
- {
- // 0x-10.0p-0 (sign in mantissa)
- if (current_fragment != fragments + 2)
- set_error_and_return_default("expected hexadecimal digit or '.', saw '"sv, to_sv(*cp), "'"sv);
-
- // 0x10.0p0- (misplaced exponent sign)
- else if (!is_match(*prev, U'p', U'P'))
- set_error_and_return_default("expected exponent digit, saw '"sv, to_sv(*cp), "'"sv);
-
- else
- exponent_sign = *cp == U'-' ? -1 : 1;
- }
- else if (current_fragment < fragments + 2 && !is_hexadecimal_digit(*cp))
- set_error_and_return_default("expected hexadecimal digit or '.', saw '"sv, to_sv(*cp), "'"sv);
- else if (current_fragment == fragments + 2 && !is_decimal_digit(*cp))
- set_error_and_return_default("expected exponent digit or sign, saw '"sv, to_sv(*cp), "'"sv);
- else if (current_fragment->length == sizeof(fragment::chars))
- set_error_and_return_default("fragment exceeeds maximum length of "sv,
- sizeof(fragment::chars),
- " characters"sv);
- else
- current_fragment->chars[current_fragment->length++] = static_cast<char>(cp->bytes[0]);
-
- prev = cp;
- advance_and_return_if_error({});
- }
-
- // sanity-check ending state
- if (current_fragment != fragments + 2 || current_fragment->length == 0u)
- {
- set_error_and_return_if_eof({});
- set_error_and_return_default("missing exponent"sv);
- }
- else if (prev && *prev == U'_')
- {
- set_error_and_return_if_eof({});
- set_error_and_return_default("underscores must be followed by digits"sv);
- }
-
- // calculate values for the three fragments
- for (int fragment_idx = 0; fragment_idx < 3; fragment_idx++)
- {
- auto& f = fragments[fragment_idx];
- const uint32_t base = fragment_idx == 2 ? 10u : 16u;
-
- // left-trim zeroes
- const char* c = f.chars;
- size_t sig = {};
- while (f.length && *c == '0')
- {
- f.length--;
- c++;
- sig++;
- }
- if (!f.length)
- continue;
-
- // calculate value
- auto place = 1u;
- for (size_t i = 0; i < f.length - 1u; i++)
- place *= base;
- uint32_t val{};
- while (place)
- {
- if (base == 16)
- val += place * hex_to_dec(*c);
- else
- val += place * static_cast<uint32_t>(*c - '0');
- if (fragment_idx == 1)
- sig++;
- c++;
- place /= base;
- }
- f.value = static_cast<double>(val);
-
- // shift the fractional part
- if (fragment_idx == 1)
- {
- while (sig--)
- f.value /= base;
- }
- }
-
- return (fragments[0].value + fragments[1].value) * pow(2.0, fragments[2].value * exponent_sign) * sign;
-
-#else // !TOML_LANG_UNRELEASED
-
- set_error_and_return_default("hexadecimal floating-point values are not supported "
- "in TOML 1.0.0 and earlier"sv);
-
-#endif // !TOML_LANG_UNRELEASED
- }
-
- template <uint64_t base>
- TOML_NODISCARD
- TOML_NEVER_INLINE
- int64_t parse_integer()
- {
- return_if_error({});
- assert_not_eof();
- using traits = parse_integer_traits<base>;
- push_parse_scope(traits::scope_qualifier);
-
- [[maybe_unused]] int64_t sign = 1;
- if constexpr (traits::is_signed)
- {
- sign = *cp == U'-' ? -1 : 1;
- if (is_match(*cp, U'+', U'-'))
- advance_and_return_if_error_or_eof({});
- }
-
- if constexpr (base == 10)
- {
- if (!traits::is_digit(*cp))
- set_error_and_return_default("expected expected digit or sign, saw '"sv, to_sv(*cp), "'"sv);
- }
- else
- {
- // '0'
- if (*cp != U'0')
- set_error_and_return_default("expected '0', saw '"sv, to_sv(*cp), "'"sv);
- advance_and_return_if_error_or_eof({});
-
- // 'b', 'o', 'x'
- if (*cp != traits::prefix_codepoint)
- set_error_and_return_default("expected '"sv, traits::prefix, "', saw '"sv, to_sv(*cp), "'"sv);
- advance_and_return_if_error_or_eof({});
-
- if (!traits::is_digit(*cp))
- set_error_and_return_default("expected digit, saw '"sv, to_sv(*cp), "'"sv);
- }
-
- // consume digits
- char digits[utf8_buffered_reader::max_history_length];
- size_t length = {};
- const utf8_codepoint* prev = {};
- while (!is_eof() && !is_value_terminator(*cp))
- {
- if (*cp == U'_')
- {
- if (!prev || !traits::is_digit(*prev))
- set_error_and_return_default("underscores may only follow digits"sv);
-
- prev = cp;
- advance_and_return_if_error_or_eof({});
- continue;
- }
- else if TOML_UNLIKELY(prev && *prev == U'_' && !traits::is_digit(*cp))
- set_error_and_return_default("underscores must be followed by digits"sv);
- else if TOML_UNLIKELY(!traits::is_digit(*cp))
- set_error_and_return_default("expected digit, saw '"sv, to_sv(*cp), "'"sv);
- else if TOML_UNLIKELY(length == sizeof(digits))
- set_error_and_return_default("exceeds length limit of "sv, sizeof(digits), " digits"sv);
- else
- digits[length++] = static_cast<char>(cp->bytes[0]);
-
- prev = cp;
- advance_and_return_if_error({});
- }
-
- // sanity check ending state
- if (prev && *prev == U'_')
- {
- set_error_and_return_if_eof({});
- set_error_and_return_default("underscores must be followed by digits"sv);
- }
-
- // single digits can be converted trivially
- if (length == 1u)
- {
- int64_t result;
-
- if constexpr (base == 16)
- result = static_cast<int64_t>(hex_to_dec(digits[0]));
- else
- result = static_cast<int64_t>(digits[0] - '0');
-
- if constexpr (traits::is_signed)
- result *= sign;
-
- return result;
- }
-
- // bin, oct and hex allow leading zeroes so trim them first
- const char* end = digits + length;
- const char* msd = digits;
- if constexpr (base != 10)
- {
- while (msd < end && *msd == '0')
- msd++;
- if (msd == end)
- return 0ll;
- }
-
- // decimal integers do not allow leading zeroes
- else
- {
- if TOML_UNLIKELY(digits[0] == '0')
- set_error_and_return_default("leading zeroes are prohibited"sv);
- }
-
- // range check
- if TOML_UNLIKELY(static_cast<size_t>(end - msd) > traits::max_digits)
- set_error_and_return_default("'"sv,
- traits::full_prefix,
- std::string_view{ digits, length },
- "' is not representable in 64 bits"sv);
-
- // do the thing
- {
- uint64_t result = {};
- {
- uint64_t power = 1;
- while (--end >= msd)
- {
- if constexpr (base == 16)
- result += power * hex_to_dec(*end);
- else
- result += power * static_cast<uint64_t>(*end - '0');
-
- power *= base;
- }
- }
-
- // range check
- static constexpr auto i64_max = static_cast<uint64_t>((std::numeric_limits<int64_t>::max)());
- if TOML_UNLIKELY(result > i64_max + (sign < 0 ? 1u : 0u))
- set_error_and_return_default("'"sv,
- traits::full_prefix,
- std::string_view{ digits, length },
- "' is not representable in 64 bits"sv);
-
- if constexpr (traits::is_signed)
- {
- // avoid signed multiply UB when parsing INT64_MIN
- if TOML_UNLIKELY(sign < 0 && result == i64_max + 1u)
- return (std::numeric_limits<int64_t>::min)();
-
- return static_cast<int64_t>(result) * sign;
- }
- else
- return static_cast<int64_t>(result);
- }
- }
-
- TOML_NODISCARD
- TOML_NEVER_INLINE
- date parse_date(bool part_of_datetime = false)
- {
- return_if_error({});
- assert_not_eof();
- TOML_ASSERT_ASSUME(is_decimal_digit(*cp));
- push_parse_scope("date"sv);
-
- // "YYYY"
- uint32_t digits[4];
- if (!consume_digit_sequence(digits, 4u))
- set_error_and_return_default("expected 4-digit year, saw '"sv, to_sv(cp), "'"sv);
- const auto year = digits[3] + digits[2] * 10u + digits[1] * 100u + digits[0] * 1000u;
- const auto is_leap_year = (year % 4u == 0u) && ((year % 100u != 0u) || (year % 400u == 0u));
- set_error_and_return_if_eof({});
-
- // '-'
- if (*cp != U'-')
- set_error_and_return_default("expected '-', saw '"sv, to_sv(*cp), "'"sv);
- advance_and_return_if_error_or_eof({});
-
- // "MM"
- if (!consume_digit_sequence(digits, 2u))
- set_error_and_return_default("expected 2-digit month, saw '"sv, to_sv(cp), "'"sv);
- const auto month = digits[1] + digits[0] * 10u;
- if (month == 0u || month > 12u)
- set_error_and_return_default("expected month between 1 and 12 (inclusive), saw "sv, month);
- const auto max_days_in_month = month == 2u
- ? (is_leap_year ? 29u : 28u)
- : (month == 4u || month == 6u || month == 9u || month == 11u ? 30u : 31u);
- set_error_and_return_if_eof({});
-
- // '-'
- if (*cp != U'-')
- set_error_and_return_default("expected '-', saw '"sv, to_sv(*cp), "'"sv);
- advance_and_return_if_error_or_eof({});
-
- // "DD"
- if (!consume_digit_sequence(digits, 2u))
- set_error_and_return_default("expected 2-digit day, saw '"sv, to_sv(cp), "'"sv);
- const auto day = digits[1] + digits[0] * 10u;
- if (day == 0u || day > max_days_in_month)
- set_error_and_return_default("expected day between 1 and "sv,
- max_days_in_month,
- " (inclusive), saw "sv,
- day);
-
- if (!part_of_datetime && !is_eof() && !is_value_terminator(*cp))
- set_error_and_return_default("expected value-terminator, saw '"sv, to_sv(*cp), "'"sv);
-
- return { year, month, day };
- }
-
- TOML_NODISCARD
- TOML_NEVER_INLINE
- time parse_time(bool part_of_datetime = false)
- {
- return_if_error({});
- assert_not_eof();
- TOML_ASSERT_ASSUME(is_decimal_digit(*cp));
- push_parse_scope("time"sv);
-
- static constexpr size_t max_digits = 64; // far more than necessary but needed to allow fractional
- // millisecond truncation per the spec
- uint32_t digits[max_digits];
-
- // "HH"
- if (!consume_digit_sequence(digits, 2u))
- set_error_and_return_default("expected 2-digit hour, saw '"sv, to_sv(cp), "'"sv);
- const auto hour = digits[1] + digits[0] * 10u;
- if (hour > 23u)
- set_error_and_return_default("expected hour between 0 to 59 (inclusive), saw "sv, hour);
- set_error_and_return_if_eof({});
-
- // ':'
- if (*cp != U':')
- set_error_and_return_default("expected ':', saw '"sv, to_sv(*cp), "'"sv);
- advance_and_return_if_error_or_eof({});
-
- // "MM"
- if (!consume_digit_sequence(digits, 2u))
- set_error_and_return_default("expected 2-digit minute, saw '"sv, to_sv(cp), "'"sv);
- const auto minute = digits[1] + digits[0] * 10u;
- if (minute > 59u)
- set_error_and_return_default("expected minute between 0 and 59 (inclusive), saw "sv, minute);
- auto time = toml::time{ hour, minute };
-
- // ':'
- if constexpr (TOML_LANG_UNRELEASED) // toml/issues/671 (allow omission of seconds)
- {
- if (is_eof() || is_value_terminator(*cp) || (part_of_datetime && is_match(*cp, U'+', U'-', U'Z', U'z')))
- return time;
- }
- else
- set_error_and_return_if_eof({});
- if (*cp != U':')
- set_error_and_return_default("expected ':', saw '"sv, to_sv(*cp), "'"sv);
- advance_and_return_if_error_or_eof({});
-
- // "SS"
- if (!consume_digit_sequence(digits, 2u))
- set_error_and_return_default("expected 2-digit second, saw '"sv, to_sv(cp), "'"sv);
- const auto second = digits[1] + digits[0] * 10u;
- if (second > 59u)
- set_error_and_return_default("expected second between 0 and 59 (inclusive), saw "sv, second);
- time.second = static_cast<decltype(time.second)>(second);
-
- // '.' (early-exiting is allowed; fractional is optional)
- if (is_eof() || is_value_terminator(*cp) || (part_of_datetime && is_match(*cp, U'+', U'-', U'Z', U'z')))
- return time;
- if (*cp != U'.')
- set_error_and_return_default("expected '.', saw '"sv, to_sv(*cp), "'"sv);
- advance_and_return_if_error_or_eof({});
-
- // "FFFFFFFFF"
- size_t digit_count = consume_variable_length_digit_sequence(digits, max_digits);
- if (!digit_count)
- {
- set_error_and_return_if_eof({});
- set_error_and_return_default("expected fractional digits, saw '"sv, to_sv(*cp), "'"sv);
- }
- else if (!is_eof())
- {
- if (digit_count == max_digits && is_decimal_digit(*cp))
- set_error_and_return_default("fractional component exceeds maximum precision of "sv, max_digits);
- else if (!part_of_datetime && !is_value_terminator(*cp))
- set_error_and_return_default("expected value-terminator, saw '"sv, to_sv(*cp), "'"sv);
- }
- uint32_t value = 0u;
- uint32_t place = 1u;
- for (auto i = impl::min<size_t>(digit_count, 9u); i-- > 0u;)
- {
- value += digits[i] * place;
- place *= 10u;
- }
- for (auto i = digit_count; i < 9u; i++) // implicit zeros
- value *= 10u;
- time.nanosecond = value;
- return time;
- }
-
- TOML_NODISCARD
- TOML_NEVER_INLINE
- date_time parse_date_time()
- {
- return_if_error({});
- assert_not_eof();
- TOML_ASSERT_ASSUME(is_decimal_digit(*cp));
- push_parse_scope("date-time"sv);
-
- // "YYYY-MM-DD"
- auto date = parse_date(true);
- set_error_and_return_if_eof({});
-
- // ' ', 'T' or 't'
- if (!is_match(*cp, U' ', U'T', U't'))
- set_error_and_return_default("expected space, 'T' or 't', saw '"sv, to_sv(*cp), "'"sv);
- advance_and_return_if_error_or_eof({});
-
- // "HH:MM:SS.FFFFFFFFF"
- auto time = parse_time(true);
- return_if_error({});
-
- // no offset
- if (is_eof() || is_value_terminator(*cp))
- return { date, time };
-
- // zero offset ('Z' or 'z')
- time_offset offset{};
- if (is_match(*cp, U'Z', U'z'))
- advance_and_return_if_error({});
-
- // explicit offset ("+/-HH:MM")
- else if (is_match(*cp, U'+', U'-'))
- {
- push_parse_scope("date-time offset"sv);
-
- // sign
- int sign = *cp == U'-' ? -1 : 1;
- advance_and_return_if_error_or_eof({});
-
- // "HH"
- int digits[2];
- if (!consume_digit_sequence(digits, 2u))
- set_error_and_return_default("expected 2-digit hour, saw '"sv, to_sv(cp), "'"sv);
- const auto hour = digits[1] + digits[0] * 10;
- if (hour > 23)
- set_error_and_return_default("expected hour between 0 and 23 (inclusive), saw "sv, hour);
- set_error_and_return_if_eof({});
-
- // ':'
- if (*cp != U':')
- set_error_and_return_default("expected ':', saw '"sv, to_sv(*cp), "'"sv);
- advance_and_return_if_error_or_eof({});
-
- // "MM"
- if (!consume_digit_sequence(digits, 2u))
- set_error_and_return_default("expected 2-digit minute, saw '"sv, to_sv(cp), "'"sv);
- const auto minute = digits[1] + digits[0] * 10;
- if (minute > 59)
- set_error_and_return_default("expected minute between 0 and 59 (inclusive), saw "sv, minute);
- offset.minutes = static_cast<decltype(offset.minutes)>((hour * 60 + minute) * sign);
- }
-
- if (!is_eof() && !is_value_terminator(*cp))
- set_error_and_return_default("expected value-terminator, saw '"sv, to_sv(*cp), "'"sv);
-
- return { date, time, offset };
- }
-
- TOML_NODISCARD
- node_ptr parse_array();
-
- TOML_NODISCARD
- node_ptr parse_inline_table();
-
- TOML_NODISCARD
- node_ptr parse_value_known_prefixes()
- {
- return_if_error({});
- assert_not_eof();
- TOML_ASSERT_ASSUME(!is_control_character(*cp));
- TOML_ASSERT_ASSUME(*cp != U'_');
-
- switch (cp->value)
- {
- // arrays
- case U'[': return parse_array();
-
- // inline tables
- case U'{': return parse_inline_table();
-
- // floats beginning with '.'
- case U'.': return node_ptr{ new value{ parse_float() } };
-
- // strings
- case U'"': [[fallthrough]];
- case U'\'': return node_ptr{ new value{ parse_string().value } };
-
- default:
- {
- const auto cp_upper = static_cast<uint_least32_t>(cp->value) & ~0x20u;
-
- // bools
- if (cp_upper == 70u || cp_upper == 84u) // F or T
- return node_ptr{ new value{ parse_boolean() } };
-
- // inf/nan
- else if (cp_upper == 73u || cp_upper == 78u) // I or N
- return node_ptr{ new value{ parse_inf_or_nan() } };
-
- else
- return nullptr;
- }
- }
- TOML_UNREACHABLE;
- }
-
- TOML_NODISCARD
- node_ptr parse_value()
- {
- return_if_error({});
- assert_not_eof();
- TOML_ASSERT_ASSUME(!is_value_terminator(*cp));
- push_parse_scope("value"sv);
-
- const depth_counter_scope depth_counter{ nested_values };
- if TOML_UNLIKELY(nested_values > max_nested_values)
- set_error_and_return_default("exceeded maximum nested value depth of "sv,
- max_nested_values,
- " (TOML_MAX_NESTED_VALUES)"sv);
-
- // check if it begins with some control character
- // (note that this will also fail for whitespace but we're assuming we've
- // called consume_leading_whitespace() before calling parse_value())
- if TOML_UNLIKELY(is_control_character(*cp))
- set_error_and_return_default("unexpected control character"sv);
-
- // underscores at the beginning
- else if (*cp == U'_')
- set_error_and_return_default("values may not begin with underscores"sv);
-
- const auto begin_pos = cp->position;
- node_ptr val;
-
- do
- {
- TOML_ASSERT_ASSUME(!is_control_character(*cp));
- TOML_ASSERT_ASSUME(*cp != U'_');
-
- // detect the value type and parse accordingly,
- // starting with value types that can be detected
- // unambiguously from just one character.
-
- val = parse_value_known_prefixes();
- return_if_error({});
- if (val)
- break;
-
- // value types from here down require more than one character to unambiguously identify
- // so scan ahead and collect a set of value 'traits'.
- enum TOML_CLOSED_FLAGS_ENUM value_traits : int
- {
- has_nothing = 0,
- has_digits = 1,
- has_b = 1 << 1, // as second char only (0b)
- has_e = 1 << 2, // only float exponents
- has_o = 1 << 3, // as second char only (0o)
- has_p = 1 << 4, // only hexfloat exponents
- has_t = 1 << 5,
- has_x = 1 << 6, // as second or third char only (0x, -0x, +0x)
- has_z = 1 << 7,
- has_colon = 1 << 8,
- has_plus = 1 << 9,
- has_minus = 1 << 10,
- has_dot = 1 << 11,
- begins_sign = 1 << 12,
- begins_digit = 1 << 13,
- begins_zero = 1 << 14,
-
- signs_msk = has_plus | has_minus,
- bdigit_msk = has_digits | begins_digit,
- bzero_msk = bdigit_msk | begins_zero,
- };
- value_traits traits = has_nothing;
- const auto has_any = [&](auto t) noexcept { return (traits & t) != has_nothing; };
- const auto has_none = [&](auto t) noexcept { return (traits & t) == has_nothing; };
- const auto add_trait = [&](auto t) noexcept { traits = static_cast<value_traits>(traits | t); };
-
- // examine the first character to get the 'begins with' traits
- // (good fail-fast opportunity; all the remaining types begin with numeric digits or signs)
- if (is_decimal_digit(*cp))
- {
- add_trait(begins_digit);
- if (*cp == U'0')
- add_trait(begins_zero);
- }
- else if (is_match(*cp, U'+', U'-'))
- add_trait(begins_sign);
- else
- break;
-
- // scan the rest of the value to determine the remaining traits
- char32_t chars[utf8_buffered_reader::max_history_length];
- size_t char_count = {}, advance_count = {};
- bool eof_while_scanning = false;
- const auto scan = [&]() noexcept(!TOML_COMPILER_HAS_EXCEPTIONS)
- {
- if (is_eof())
- return;
- TOML_ASSERT_ASSUME(!is_value_terminator(*cp));
-
- do
- {
- if (const auto c = **cp; c != U'_')
- {
- chars[char_count++] = c;
-
- if (is_decimal_digit(c))
- add_trait(has_digits);
- else if (is_ascii_letter(c))
- {
- TOML_ASSERT_ASSUME((c >= U'a' && c <= U'z') || (c >= U'A' && c <= U'Z'));
- switch (static_cast<char32_t>(c | 32u))
- {
- case U'b':
- if (char_count == 2u && has_any(begins_zero))
- add_trait(has_b);
- break;
-
- case U'e':
- if (char_count > 1u
- && has_none(has_b | has_o | has_p | has_t | has_x | has_z | has_colon)
- && (has_none(has_plus | has_minus) || has_any(begins_sign)))
- add_trait(has_e);
- break;
-
- case U'o':
- if (char_count == 2u && has_any(begins_zero))
- add_trait(has_o);
- break;
-
- case U'p':
- if (has_any(has_x))
- add_trait(has_p);
- break;
-
- case U'x':
- if ((char_count == 2u && has_any(begins_zero))
- || (char_count == 3u && has_any(begins_sign) && chars[1] == U'0'))
- add_trait(has_x);
- break;
-
- case U't': add_trait(has_t); break;
- case U'z': add_trait(has_z); break;
- }
- }
- else if (c <= U':')
- {
- TOML_ASSERT_ASSUME(c < U'0' || c > U'9');
- switch (c)
- {
- case U'+': add_trait(has_plus); break;
- case U'-': add_trait(has_minus); break;
- case U'.': add_trait(has_dot); break;
- case U':': add_trait(has_colon); break;
- }
- }
- }
-
- advance_and_return_if_error();
- advance_count++;
- eof_while_scanning = is_eof();
- }
- while (advance_count < (utf8_buffered_reader::max_history_length - 1u) && !is_eof()
- && !is_value_terminator(*cp));
- };
- scan();
- return_if_error({});
-
- // force further scanning if this could have been a date-time with a space instead of a T
- if (char_count == 10u //
- && (traits | begins_zero) == (bzero_msk | has_minus) //
- && chars[4] == U'-' //
- && chars[7] == U'-' //
- && !is_eof() //
- && *cp == U' ')
- {
- const auto pre_advance_count = advance_count;
- const auto pre_scan_traits = traits;
- chars[char_count++] = *cp;
- add_trait(has_t);
-
- const auto backpedal = [&]() noexcept
- {
- go_back(advance_count - pre_advance_count);
- advance_count = pre_advance_count;
- traits = pre_scan_traits;
- char_count = 10u;
- };
-
- advance_and_return_if_error({});
- advance_count++;
-
- if (is_eof() || !is_decimal_digit(*cp))
- backpedal();
- else
- {
- chars[char_count++] = *cp;
-
- advance_and_return_if_error({});
- advance_count++;
-
- scan();
- return_if_error({});
-
- if (char_count == 12u)
- backpedal();
- }
- }
-
- // set the reader back to where we started
- go_back(advance_count);
-
- // if after scanning ahead we still only have one value character,
- // the only valid value type is an integer.
- if (char_count == 1u)
- {
- if (has_any(begins_digit))
- {
- val.reset(new value{ static_cast<int64_t>(chars[0] - U'0') });
- advance(); // skip the digit
- break;
- }
-
- // anything else would be ambiguous.
- else
- set_error_and_return_default(eof_while_scanning ? "encountered end-of-file"sv
- : "could not determine value type"sv);
- }
-
- // now things that can be identified from two or more characters
- return_if_error({});
- TOML_ASSERT_ASSUME(char_count >= 2u);
-
- // do some 'fuzzy matching' where there's no ambiguity, since that allows the specific
- // typed parse functions to take over and show better diagnostics if there's an issue
- // (as opposed to the fallback "could not determine type" message)
- if (has_any(has_p))
- val.reset(new value{ parse_hex_float() });
- else if (has_any(has_x | has_o | has_b))
- {
- int64_t i;
- value_flags flags;
- if (has_any(has_x))
- {
- i = parse_integer<16>();
- flags = value_flags::format_as_hexadecimal;
- }
- else if (has_any(has_o))
- {
- i = parse_integer<8>();
- flags = value_flags::format_as_octal;
- }
- else // has_b
- {
- i = parse_integer<2>();
- flags = value_flags::format_as_binary;
- }
- return_if_error({});
-
- val.reset(new value{ i });
- val->ref_cast<int64_t>().flags(flags);
- }
- else if (has_any(has_e) || (has_any(begins_digit) && chars[1] == U'.'))
- val.reset(new value{ parse_float() });
- else if (has_any(begins_sign))
- {
- // single-digit signed integers
- if (char_count == 2u && has_any(has_digits))
- {
- val.reset(new value{ static_cast<int64_t>(chars[1] - U'0') * (chars[0] == U'-' ? -1LL : 1LL) });
- advance(); // skip the sign
- advance(); // skip the digit
- break;
- }
-
- // simple signed floats (e.g. +1.0)
- if (is_decimal_digit(chars[1]) && chars[2] == U'.')
- val.reset(new value{ parse_float() });
-
- // signed infinity or nan
- else if (is_match(chars[1], U'i', U'n', U'I', U'N'))
- val.reset(new value{ parse_inf_or_nan() });
- }
-
- return_if_error({});
- if (val)
- break;
-
- // match trait masks against what they can match exclusively.
- // all correct value parses will come out of this list, so doing this as a switch is likely to
- // be a better friend to the optimizer on the success path (failure path can be slow but that
- // doesn't matter much).
- switch (unwrap_enum(traits))
- {
- // binary integers
- // 0b10
- case bzero_msk | has_b:
- val.reset(new value{ parse_integer<2>() });
- val->ref_cast<int64_t>().flags(value_flags::format_as_binary);
- break;
-
- // octal integers
- // 0o10
- case bzero_msk | has_o:
- val.reset(new value{ parse_integer<8>() });
- val->ref_cast<int64_t>().flags(value_flags::format_as_octal);
- break;
-
- // decimal integers
- // 00
- // 10
- // +10
- // -10
- case bzero_msk: [[fallthrough]];
- case bdigit_msk: [[fallthrough]];
- case begins_sign | has_digits | has_minus: [[fallthrough]];
- case begins_sign | has_digits | has_plus:
- {
- // if the value was so long we exhausted the history buffer it's reasonable to assume
- // there was more and the value's actual type is impossible to identify without making the
- // buffer bigger (since it could have actually been a float), so emit an error.
- //
- // (this will likely only come up during fuzzing and similar scenarios)
- static constexpr size_t max_numeric_value_length =
- utf8_buffered_reader::max_history_length - 2u;
- if TOML_UNLIKELY(!eof_while_scanning && advance_count > max_numeric_value_length)
- set_error_and_return_default("numeric value too long to identify type - cannot exceed "sv,
- max_numeric_value_length,
- " characters"sv);
-
- val.reset(new value{ parse_integer<10>() });
- break;
- }
-
- // hexadecimal integers
- // 0x10
- case bzero_msk | has_x:
- val.reset(new value{ parse_integer<16>() });
- val->ref_cast<int64_t>().flags(value_flags::format_as_hexadecimal);
- break;
-
- // decimal floats
- // 0e1
- // 0e-1
- // 0e+1
- // 0.0
- // 0.0e1
- // 0.0e-1
- // 0.0e+1
- case bzero_msk | has_e: [[fallthrough]];
- case bzero_msk | has_e | has_minus: [[fallthrough]];
- case bzero_msk | has_e | has_plus: [[fallthrough]];
- case bzero_msk | has_dot: [[fallthrough]];
- case bzero_msk | has_dot | has_e: [[fallthrough]];
- case bzero_msk | has_dot | has_e | has_minus: [[fallthrough]];
- case bzero_msk | has_dot | has_e | has_plus: [[fallthrough]];
- // 1e1
- // 1e-1
- // 1e+1
- // 1.0
- // 1.0e1
- // 1.0e-1
- // 1.0e+1
- case bdigit_msk | has_e: [[fallthrough]];
- case bdigit_msk | has_e | has_minus: [[fallthrough]];
- case bdigit_msk | has_e | has_plus: [[fallthrough]];
- case bdigit_msk | has_dot: [[fallthrough]];
- case bdigit_msk | has_dot | has_e: [[fallthrough]];
- case bdigit_msk | has_dot | has_e | has_minus: [[fallthrough]];
- case bdigit_msk | has_dot | has_e | has_plus: [[fallthrough]];
- // +1e1
- // +1.0
- // +1.0e1
- // +1.0e+1
- // +1.0e-1
- // -1.0e+1
- case begins_sign | has_digits | has_e | has_plus: [[fallthrough]];
- case begins_sign | has_digits | has_dot | has_plus: [[fallthrough]];
- case begins_sign | has_digits | has_dot | has_e | has_plus: [[fallthrough]];
- case begins_sign | has_digits | has_dot | has_e | signs_msk: [[fallthrough]];
- // -1e1
- // -1e+1
- // +1e-1
- // -1.0
- // -1.0e1
- // -1.0e-1
- case begins_sign | has_digits | has_e | has_minus: [[fallthrough]];
- case begins_sign | has_digits | has_e | signs_msk: [[fallthrough]];
- case begins_sign | has_digits | has_dot | has_minus: [[fallthrough]];
- case begins_sign | has_digits | has_dot | has_e | has_minus:
- val.reset(new value{ parse_float() });
- break;
-
- // hexadecimal floats
- // 0x10p0
- // 0x10p-0
- // 0x10p+0
- case bzero_msk | has_x | has_p: [[fallthrough]];
- case bzero_msk | has_x | has_p | has_minus: [[fallthrough]];
- case bzero_msk | has_x | has_p | has_plus: [[fallthrough]];
- // -0x10p0
- // -0x10p-0
- // +0x10p0
- // +0x10p+0
- // -0x10p+0
- // +0x10p-0
- case begins_sign | has_digits | has_x | has_p | has_minus: [[fallthrough]];
- case begins_sign | has_digits | has_x | has_p | has_plus: [[fallthrough]];
- case begins_sign | has_digits | has_x | has_p | signs_msk: [[fallthrough]];
- // 0x10.1p0
- // 0x10.1p-0
- // 0x10.1p+0
- case bzero_msk | has_x | has_dot | has_p: [[fallthrough]];
- case bzero_msk | has_x | has_dot | has_p | has_minus: [[fallthrough]];
- case bzero_msk | has_x | has_dot | has_p | has_plus: [[fallthrough]];
- // -0x10.1p0
- // -0x10.1p-0
- // +0x10.1p0
- // +0x10.1p+0
- // -0x10.1p+0
- // +0x10.1p-0
- case begins_sign | has_digits | has_x | has_dot | has_p | has_minus: [[fallthrough]];
- case begins_sign | has_digits | has_x | has_dot | has_p | has_plus: [[fallthrough]];
- case begins_sign | has_digits | has_x | has_dot | has_p | signs_msk:
- val.reset(new value{ parse_hex_float() });
- break;
-
- // times
- // HH:MM
- // HH:MM:SS
- // HH:MM:SS.FFFFFF
- case bzero_msk | has_colon: [[fallthrough]];
- case bzero_msk | has_colon | has_dot: [[fallthrough]];
- case bdigit_msk | has_colon: [[fallthrough]];
- case bdigit_msk | has_colon | has_dot: val.reset(new value{ parse_time() }); break;
-
- // local dates
- // YYYY-MM-DD
- case bzero_msk | has_minus: [[fallthrough]];
- case bdigit_msk | has_minus: val.reset(new value{ parse_date() }); break;
-
- // date-times
- // YYYY-MM-DDTHH:MM
- // YYYY-MM-DDTHH:MM-HH:MM
- // YYYY-MM-DDTHH:MM+HH:MM
- // YYYY-MM-DD HH:MM
- // YYYY-MM-DD HH:MM-HH:MM
- // YYYY-MM-DD HH:MM+HH:MM
- // YYYY-MM-DDTHH:MM:SS
- // YYYY-MM-DDTHH:MM:SS-HH:MM
- // YYYY-MM-DDTHH:MM:SS+HH:MM
- // YYYY-MM-DD HH:MM:SS
- // YYYY-MM-DD HH:MM:SS-HH:MM
- // YYYY-MM-DD HH:MM:SS+HH:MM
- case bzero_msk | has_minus | has_colon | has_t: [[fallthrough]];
- case bzero_msk | signs_msk | has_colon | has_t: [[fallthrough]];
- case bdigit_msk | has_minus | has_colon | has_t: [[fallthrough]];
- case bdigit_msk | signs_msk | has_colon | has_t: [[fallthrough]];
- // YYYY-MM-DDTHH:MM:SS.FFFFFF
- // YYYY-MM-DDTHH:MM:SS.FFFFFF-HH:MM
- // YYYY-MM-DDTHH:MM:SS.FFFFFF+HH:MM
- // YYYY-MM-DD HH:MM:SS.FFFFFF
- // YYYY-MM-DD HH:MM:SS.FFFFFF-HH:MM
- // YYYY-MM-DD HH:MM:SS.FFFFFF+HH:MM
- case bzero_msk | has_minus | has_colon | has_dot | has_t: [[fallthrough]];
- case bzero_msk | signs_msk | has_colon | has_dot | has_t: [[fallthrough]];
- case bdigit_msk | has_minus | has_colon | has_dot | has_t: [[fallthrough]];
- case bdigit_msk | signs_msk | has_colon | has_dot | has_t: [[fallthrough]];
- // YYYY-MM-DDTHH:MMZ
- // YYYY-MM-DD HH:MMZ
- // YYYY-MM-DDTHH:MM:SSZ
- // YYYY-MM-DD HH:MM:SSZ
- // YYYY-MM-DDTHH:MM:SS.FFFFFFZ
- // YYYY-MM-DD HH:MM:SS.FFFFFFZ
- case bzero_msk | has_minus | has_colon | has_z | has_t: [[fallthrough]];
- case bzero_msk | has_minus | has_colon | has_dot | has_z | has_t: [[fallthrough]];
- case bdigit_msk | has_minus | has_colon | has_z | has_t: [[fallthrough]];
- case bdigit_msk | has_minus | has_colon | has_dot | has_z | has_t:
- val.reset(new value{ parse_date_time() });
- break;
- }
- }
- while (false);
-
- if (!val)
- {
- set_error_at(begin_pos, "could not determine value type"sv);
- return_after_error({});
- }
-
- val->source_ = { begin_pos, current_position(1), reader.source_path() };
- return val;
- }
-
- TOML_NEVER_INLINE
- bool parse_key()
- {
- return_if_error({});
- assert_not_eof();
- TOML_ASSERT_ASSUME(is_bare_key_character(*cp) || is_string_delimiter(*cp));
- push_parse_scope("key"sv);
-
- key_buffer.clear();
- recording_whitespace = false;
-
- while (!is_error())
- {
- std::string_view key_segment;
- const auto key_begin = current_position();
-
- // bare_key_segment
- if (is_bare_key_character(*cp))
- key_segment = parse_bare_key_segment();
-
- // "quoted key segment"
- else if (is_string_delimiter(*cp))
- {
- const auto begin_pos = cp->position;
-
- recording_whitespace = true;
- parsed_string str = parse_string();
- recording_whitespace = false;
- return_if_error({});
-
- if (str.was_multi_line)
- {
- set_error_at(begin_pos,
- "multi-line strings are prohibited in "sv,
- key_buffer.empty() ? ""sv : "dotted "sv,
- "keys"sv);
- return_after_error({});
- }
- else
- key_segment = str.value;
- }
-
- // ???
- else
- set_error_and_return_default("expected bare key starting character or string delimiter, saw '"sv,
- to_sv(*cp),
- "'"sv);
-
- const auto key_end = current_position();
-
- // whitespace following the key segment
- consume_leading_whitespace();
-
- // store segment
- key_buffer.push_back(key_segment, key_begin, key_end);
-
- // eof or no more key to come
- if (is_eof() || *cp != U'.')
- break;
-
- // was a dotted key - go around again
- advance_and_return_if_error_or_eof({});
- consume_leading_whitespace();
- set_error_and_return_if_eof({});
- }
- return_if_error({});
-
- return true;
- }
-
- TOML_NODISCARD
- key make_key(size_t segment_index) const
- {
- TOML_ASSERT(key_buffer.size() > segment_index);
-
- return key{
- key_buffer[segment_index],
- source_region{ key_buffer.starts[segment_index], key_buffer.ends[segment_index], root.source().path }
- };
- }
-
- TOML_NODISCARD
- TOML_NEVER_INLINE
- table* parse_table_header()
- {
- return_if_error({});
- assert_not_eof();
- TOML_ASSERT_ASSUME(*cp == U'[');
- push_parse_scope("table header"sv);
-
- const source_position header_begin_pos = cp->position;
- source_position header_end_pos;
- bool is_arr = false;
-
- // parse header
- {
- // skip first '['
- advance_and_return_if_error_or_eof({});
-
- // skip past any whitespace that followed the '['
- const bool had_leading_whitespace = consume_leading_whitespace();
- set_error_and_return_if_eof({});
-
- // skip second '[' (if present)
- if (*cp == U'[')
- {
- if (had_leading_whitespace)
- set_error_and_return_default(
- "[[array-of-table]] brackets must be contiguous (i.e. [ [ this ] ] is prohibited)"sv);
-
- is_arr = true;
- advance_and_return_if_error_or_eof({});
-
- // skip past any whitespace that followed the '['
- consume_leading_whitespace();
- set_error_and_return_if_eof({});
- }
-
- // check for a premature closing ']'
- if (*cp == U']')
- set_error_and_return_default("tables with blank bare keys are explicitly prohibited"sv);
-
- // get the actual key
- start_recording();
- parse_key();
- stop_recording(1u);
- return_if_error({});
-
- // skip past any whitespace that followed the key
- consume_leading_whitespace();
- return_if_error({});
- set_error_and_return_if_eof({});
-
- // consume the closing ']'
- if (*cp != U']')
- set_error_and_return_default("expected ']', saw '"sv, to_sv(*cp), "'"sv);
- if (is_arr)
- {
- advance_and_return_if_error_or_eof({});
- if (*cp != U']')
- set_error_and_return_default("expected ']', saw '"sv, to_sv(*cp), "'"sv);
- }
- advance_and_return_if_error({});
- header_end_pos = current_position(1);
-
- // handle the rest of the line after the header
- consume_leading_whitespace();
- if (!is_eof() && !consume_comment() && !consume_line_break())
- set_error_and_return_default("expected a comment or whitespace, saw '"sv, to_sv(cp), "'"sv);
- }
- TOML_ASSERT(!key_buffer.empty());
-
- // check if each parent is a table/table array, or can be created implicitly as a table.
- table* parent = &root;
- for (size_t i = 0, e = key_buffer.size() - 1u; i < e; i++)
- {
- const std::string_view segment = key_buffer[i];
- auto pit = parent->lower_bound(segment);
-
- // parent already existed
- if (pit != parent->end() && pit->first == segment)
- {
- node& p = pit->second;
-
- if (auto tbl = p.as_table())
- {
- // adding to closed inline tables is illegal
- if (tbl->is_inline() && !impl::find(open_inline_tables.begin(), open_inline_tables.end(), tbl))
- set_error_and_return_default("cannot insert '"sv,
- to_sv(recording_buffer),
- "' into existing inline table"sv);
-
- parent = tbl;
- }
- else if (auto arr = p.as_array(); arr && impl::find(table_arrays.begin(), table_arrays.end(), arr))
- {
- // table arrays are a special case;
- // the spec dictates we select the most recently declared element in the array.
- TOML_ASSERT(!arr->empty());
- TOML_ASSERT(arr->back().is_table());
- parent = &arr->back().ref_cast<table>();
- }
- else
- {
- if (!is_arr && p.type() == node_type::table)
- set_error_and_return_default("cannot redefine existing table '"sv,
- to_sv(recording_buffer),
- "'"sv);
- else
- set_error_and_return_default("cannot redefine existing "sv,
- to_sv(p.type()),
- " '"sv,
- to_sv(recording_buffer),
- "' as "sv,
- is_arr ? "array-of-tables"sv : "table"sv);
- }
- }
-
- // need to create a new implicit table
- else
- {
- pit = parent->emplace_hint<table>(pit, make_key(i));
- table& p = pit->second.ref_cast<table>();
- p.source_ = { header_begin_pos, header_end_pos, reader.source_path() };
-
- implicit_tables.push_back(&p);
- parent = &p;
- }
- }
-
- const auto last_segment = key_buffer.back();
- auto it = parent->lower_bound(last_segment);
-
- // if there was already a matching node some sanity checking is necessary;
- // this is ok if we're making an array and the existing element is already an array (new element)
- // or if we're making a table and the existing element is an implicitly-created table (promote it),
- // otherwise this is a redefinition error.
- if (it != parent->end() && it->first == last_segment)
- {
- node& matching_node = it->second;
- if (auto arr = matching_node.as_array();
- is_arr && arr && impl::find(table_arrays.begin(), table_arrays.end(), arr))
- {
- table& tbl = arr->emplace_back<table>();
- tbl.source_ = { header_begin_pos, header_end_pos, reader.source_path() };
- return &tbl;
- }
-
- else if (auto tbl = matching_node.as_table(); !is_arr && tbl && !implicit_tables.empty())
- {
- if (auto found = impl::find(implicit_tables.begin(), implicit_tables.end(), tbl); found)
- {
- bool ok = true;
- if (!tbl->empty())
- {
- for (auto& [_, child] : *tbl)
- {
- if (!child.is_table() && !child.is_array_of_tables())
- {
- ok = false;
- break;
- }
- }
- }
-
- if (ok)
- {
- implicit_tables.erase(implicit_tables.cbegin() + (found - implicit_tables.data()));
- tbl->source_.begin = header_begin_pos;
- tbl->source_.end = header_end_pos;
- return tbl;
- }
- }
- }
-
- // if we get here it's a redefinition error.
- if (!is_arr && matching_node.type() == node_type::table)
- {
- set_error_at(header_begin_pos,
- "cannot redefine existing table '"sv,
- to_sv(recording_buffer),
- "'"sv);
- return_after_error({});
- }
- else
- {
- set_error_at(header_begin_pos,
- "cannot redefine existing "sv,
- to_sv(matching_node.type()),
- " '"sv,
- to_sv(recording_buffer),
- "' as "sv,
- is_arr ? "array-of-tables"sv : "table"sv);
- return_after_error({});
- }
- }
-
- // there was no matching node, sweet - we can freely instantiate a new table/table array.
- else
- {
- auto last_key = make_key(key_buffer.size() - 1u);
-
- // if it's an array we need to make the array and it's first table element,
- // set the starting regions, and return the table element
- if (is_arr)
- {
- it = parent->emplace_hint<array>(it, std::move(last_key));
- array& tbl_arr = it->second.ref_cast<array>();
- table_arrays.push_back(&tbl_arr);
- tbl_arr.source_ = { header_begin_pos, header_end_pos, reader.source_path() };
-
- table& tbl = tbl_arr.emplace_back<table>();
- tbl.source_ = { header_begin_pos, header_end_pos, reader.source_path() };
- return &tbl;
- }
-
- // otherwise we're just making a table
- else
- {
- it = parent->emplace_hint<table>(it, std::move(last_key));
- table& tbl = it->second.ref_cast<table>();
- tbl.source_ = { header_begin_pos, header_end_pos, reader.source_path() };
- return &tbl;
- }
- }
- }
-
- TOML_NEVER_INLINE
- bool parse_key_value_pair_and_insert(table* tbl)
- {
- return_if_error({});
- assert_not_eof();
- TOML_ASSERT_ASSUME(is_string_delimiter(*cp) || is_bare_key_character(*cp));
- push_parse_scope("key-value pair"sv);
-
- // read the key into the key buffer
- start_recording();
- parse_key();
- stop_recording(1u);
- return_if_error({});
- TOML_ASSERT(key_buffer.size() >= 1u);
-
- // skip past any whitespace that followed the key
- consume_leading_whitespace();
- set_error_and_return_if_eof({});
-
- // '='
- if (*cp != U'=')
- set_error_and_return_default("expected '=', saw '"sv, to_sv(*cp), "'"sv);
- advance_and_return_if_error_or_eof({});
-
- // skip past any whitespace that followed the '='
- consume_leading_whitespace();
- return_if_error({});
- set_error_and_return_if_eof({});
-
- // check that the next character could actually be a value
- if (is_value_terminator(*cp))
- set_error_and_return_default("expected value, saw '"sv, to_sv(*cp), "'"sv);
-
- // if it's a dotted kvp we need to spawn the parent sub-tables if necessary,
- // and set the target table to the second-to-last one in the chain
- if (key_buffer.size() > 1u)
- {
- for (size_t i = 0; i < key_buffer.size() - 1u; i++)
- {
- const std::string_view segment = key_buffer[i];
- auto pit = tbl->lower_bound(segment);
-
- // parent already existed
- if (pit != tbl->end() && pit->first == segment)
- {
- table* p = pit->second.as_table();
-
- // redefinition
- if TOML_UNLIKELY(!p
- || !(impl::find(dotted_key_tables.begin(), dotted_key_tables.end(), p)
- || impl::find(implicit_tables.begin(), implicit_tables.end(), p)))
- {
- set_error_at(key_buffer.starts[i],
- "cannot redefine existing "sv,
- to_sv(pit->second.type()),
- " as dotted key-value pair"sv);
- return_after_error({});
- }
-
- tbl = p;
- }
-
- // need to create a new implicit table
- else
- {
- pit = tbl->emplace_hint<table>(pit, make_key(i));
- table& p = pit->second.ref_cast<table>();
- p.source_ = pit->first.source();
-
- dotted_key_tables.push_back(&p);
- tbl = &p;
- }
- }
- }
-
- // ensure this isn't a redefinition
- const std::string_view last_segment = key_buffer.back();
- auto it = tbl->lower_bound(last_segment);
- if (it != tbl->end() && it->first == last_segment)
- {
- set_error("cannot redefine existing "sv,
- to_sv(it->second.type()),
- " '"sv,
- to_sv(recording_buffer),
- "'"sv);
- return_after_error({});
- }
-
- // create the key first since the key buffer will likely get overwritten during value parsing (inline
- // tables)
- auto last_key = make_key(key_buffer.size() - 1u);
-
- // now we can actually parse the value
- node_ptr val = parse_value();
- return_if_error({});
-
- tbl->emplace_hint<node_ptr>(it, std::move(last_key), std::move(val));
- return true;
- }
-
- void parse_document()
- {
- assert_not_error();
- assert_not_eof();
- push_parse_scope("root table"sv);
-
- table* current_table = &root;
-
- do
- {
- return_if_error();
-
- // leading whitespace, line endings, comments
- if (consume_leading_whitespace() || consume_line_break() || consume_comment())
- continue;
- return_if_error();
-
- // [tables]
- // [[table array]]
- if (*cp == U'[')
- current_table = parse_table_header();
-
- // bare_keys
- // dotted.keys
- // "quoted keys"
- else if (is_bare_key_character(*cp) || is_string_delimiter(*cp))
- {
- push_parse_scope("key-value pair"sv);
-
- parse_key_value_pair_and_insert(current_table);
-
- // handle the rest of the line after the kvp
- // (this is not done in parse_key_value_pair() because that is also used for inline tables)
- consume_leading_whitespace();
- return_if_error();
- if (!is_eof() && !consume_comment() && !consume_line_break())
- set_error("expected a comment or whitespace, saw '"sv, to_sv(cp), "'"sv);
- }
-
- else // ??
- set_error("expected keys, tables, whitespace or comments, saw '"sv, to_sv(cp), "'"sv);
- }
- while (!is_eof());
-
- auto eof_pos = current_position(1);
- root.source_.end = eof_pos;
- if (current_table && current_table != &root && current_table->source_.end <= current_table->source_.begin)
- current_table->source_.end = eof_pos;
- }
-
- static void update_region_ends(node& nde) noexcept
- {
- const auto type = nde.type();
- if (type > node_type::array)
- return;
-
- if (type == node_type::table)
- {
- auto& tbl = nde.ref_cast<table>();
- if (tbl.is_inline()) // inline tables (and all their inline descendants) are already correctly
- // terminated
- return;
-
- auto end = nde.source_.end;
- for (auto&& [k, v] : tbl)
- {
- TOML_UNUSED(k);
- update_region_ends(v);
- if (end < v.source_.end)
- end = v.source_.end;
- }
- }
- else // arrays
- {
- auto& arr = nde.ref_cast<array>();
- auto end = nde.source_.end;
- for (auto&& v : arr)
- {
- update_region_ends(v);
- if (end < v.source_.end)
- end = v.source_.end;
- }
- nde.source_.end = end;
- }
- }
-
- public:
- parser(utf8_reader_interface&& reader_) //
- : reader{ reader_ }
- {
- root.source_ = { prev_pos, prev_pos, reader.source_path() };
-
- if (!reader.peek_eof())
- {
- cp = reader.read_next();
+ }
+
+ TOML_NODISCARD
+ TOML_NEVER_INLINE
+ double parse_hex_float() {
+ return_if_error({});
+ assert_not_eof();
+ TOML_ASSERT_ASSUME(is_match(*cp, U'0', U'+', U'-'));
+ push_parse_scope("hexadecimal floating-point"sv);
+
+#if TOML_LANG_UNRELEASED // toml/issues/562 (hexfloats)
+
+ // sign
+ const int sign = *cp == U'-' ? -1 : 1;
+ if (is_match(*cp, U'+', U'-')) advance_and_return_if_error_or_eof({});
+
+ // '0'
+ if (*cp != U'0') set_error_and_return_default(" expected '0', saw '"sv, to_sv(*cp), "'"sv);
+ advance_and_return_if_error_or_eof({});
+
+ // 'x' or 'X'
+ if (!is_match(*cp, U'x', U'X'))
+ set_error_and_return_default("expected 'x' or 'X', saw '"sv, to_sv(*cp), "'"sv);
+ advance_and_return_if_error_or_eof({});
+
+ // <HEX DIGITS> ([.]<HEX DIGITS>)? [pP] [+-]? <DEC DIGITS>
+
+ // consume value fragments
+ struct fragment {
+ char chars[24];
+ size_t length;
+ double value;
+ };
+ fragment fragments[] = {
+ {}, // mantissa, whole part
+ {}, // mantissa, fractional part
+ {} // exponent
+ };
+ fragment* current_fragment = fragments;
+ const utf8_codepoint* prev = {};
+ int exponent_sign = 1;
+ while (!is_eof() && !is_value_terminator(*cp)) {
+ if (*cp == U'_') {
+ if (!prev || !is_hexadecimal_digit(*prev))
+ set_error_and_return_default("underscores may only follow digits"sv);
+
+ prev = cp;
+ advance_and_return_if_error_or_eof({});
+ continue;
+ } else if (prev && *prev == U'_' && !is_hexadecimal_digit(*cp))
+ set_error_and_return_default("underscores must be followed by digits"sv);
+ else if (*cp == U'.') {
+ // 0x10.0p-.0 (exponent cannot have '.')
+ if (current_fragment == fragments + 2)
+ set_error_and_return_default("expected exponent digit or sign, saw '.'"sv);
+
+ // 0x10.0.p-0 (multiple '.')
+ else if (current_fragment == fragments + 1)
+ set_error_and_return_default("expected hexadecimal digit or exponent, saw '.'"sv);
+
+ else
+ current_fragment++;
+ } else if (is_match(*cp, U'p', U'P')) {
+ // 0x10.0pp-0 (multiple 'p')
+ if (current_fragment == fragments + 2)
+ set_error_and_return_default("expected exponent digit or sign, saw '"sv, to_sv(*cp),
+ "'"sv);
+
+ // 0x.p-0 (mantissa is just '.')
+ else if (fragments[0].length == 0u && fragments[1].length == 0u)
+ set_error_and_return_default("expected hexadecimal digit, saw '"sv, to_sv(*cp), "'"sv);
+
+ else
+ current_fragment = fragments + 2;
+ } else if (is_match(*cp, U'+', U'-')) {
+ // 0x-10.0p-0 (sign in mantissa)
+ if (current_fragment != fragments + 2)
+ set_error_and_return_default("expected hexadecimal digit or '.', saw '"sv, to_sv(*cp),
+ "'"sv);
+
+ // 0x10.0p0- (misplaced exponent sign)
+ else if (!is_match(*prev, U'p', U'P'))
+ set_error_and_return_default("expected exponent digit, saw '"sv, to_sv(*cp), "'"sv);
+
+ else
+ exponent_sign = *cp == U'-' ? -1 : 1;
+ } else if (current_fragment < fragments + 2 && !is_hexadecimal_digit(*cp))
+ set_error_and_return_default("expected hexadecimal digit or '.', saw '"sv, to_sv(*cp),
+ "'"sv);
+ else if (current_fragment == fragments + 2 && !is_decimal_digit(*cp))
+ set_error_and_return_default("expected exponent digit or sign, saw '"sv, to_sv(*cp),
+ "'"sv);
+ else if (current_fragment->length == sizeof(fragment::chars))
+ set_error_and_return_default("fragment exceeeds maximum length of "sv,
+ sizeof(fragment::chars), " characters"sv);
+ else
+ current_fragment->chars[current_fragment->length++] = static_cast<char>(cp->bytes[0]);
+
+ prev = cp;
+ advance_and_return_if_error({});
+ }
+
+ // sanity-check ending state
+ if (current_fragment != fragments + 2 || current_fragment->length == 0u) {
+ set_error_and_return_if_eof({});
+ set_error_and_return_default("missing exponent"sv);
+ } else if (prev && *prev == U'_') {
+ set_error_and_return_if_eof({});
+ set_error_and_return_default("underscores must be followed by digits"sv);
+ }
+
+ // calculate values for the three fragments
+ for (int fragment_idx = 0; fragment_idx < 3; fragment_idx++) {
+ auto& f = fragments[fragment_idx];
+ const uint32_t base = fragment_idx == 2 ? 10u : 16u;
+
+ // left-trim zeroes
+ const char* c = f.chars;
+ size_t sig = {};
+ while (f.length && *c == '0') {
+ f.length--;
+ c++;
+ sig++;
+ }
+ if (!f.length) continue;
+
+ // calculate value
+ auto place = 1u;
+ for (size_t i = 0; i < f.length - 1u; i++) place *= base;
+ uint32_t val{};
+ while (place) {
+ if (base == 16)
+ val += place * hex_to_dec(*c);
+ else
+ val += place * static_cast<uint32_t>(*c - '0');
+ if (fragment_idx == 1) sig++;
+ c++;
+ place /= base;
+ }
+ f.value = static_cast<double>(val);
+
+ // shift the fractional part
+ if (fragment_idx == 1) {
+ while (sig--) f.value /= base;
+ }
+ }
+
+ return (fragments[0].value + fragments[1].value) *
+ pow(2.0, fragments[2].value * exponent_sign) * sign;
+
+#else // !TOML_LANG_UNRELEASED
+
+ set_error_and_return_default(
+ "hexadecimal floating-point values are not supported "
+ "in TOML 1.0.0 and earlier"sv);
+
+#endif // !TOML_LANG_UNRELEASED
+ }
+
+ template <uint64_t base>
+ TOML_NODISCARD TOML_NEVER_INLINE int64_t parse_integer() {
+ return_if_error({});
+ assert_not_eof();
+ using traits = parse_integer_traits<base>;
+ push_parse_scope(traits::scope_qualifier);
+
+ [[maybe_unused]] int64_t sign = 1;
+ if constexpr (traits::is_signed) {
+ sign = *cp == U'-' ? -1 : 1;
+ if (is_match(*cp, U'+', U'-')) advance_and_return_if_error_or_eof({});
+ }
+
+ if constexpr (base == 10) {
+ if (!traits::is_digit(*cp))
+ set_error_and_return_default("expected expected digit or sign, saw '"sv, to_sv(*cp),
+ "'"sv);
+ } else {
+ // '0'
+ if (*cp != U'0') set_error_and_return_default("expected '0', saw '"sv, to_sv(*cp), "'"sv);
+ advance_and_return_if_error_or_eof({});
+
+ // 'b', 'o', 'x'
+ if (*cp != traits::prefix_codepoint)
+ set_error_and_return_default("expected '"sv, traits::prefix, "', saw '"sv, to_sv(*cp),
+ "'"sv);
+ advance_and_return_if_error_or_eof({});
+
+ if (!traits::is_digit(*cp))
+ set_error_and_return_default("expected digit, saw '"sv, to_sv(*cp), "'"sv);
+ }
+
+ // consume digits
+ char digits[utf8_buffered_reader::max_history_length];
+ size_t length = {};
+ const utf8_codepoint* prev = {};
+ while (!is_eof() && !is_value_terminator(*cp)) {
+ if (*cp == U'_') {
+ if (!prev || !traits::is_digit(*prev))
+ set_error_and_return_default("underscores may only follow digits"sv);
+
+ prev = cp;
+ advance_and_return_if_error_or_eof({});
+ continue;
+ } else if TOML_UNLIKELY (prev && *prev == U'_' && !traits::is_digit(*cp))
+ set_error_and_return_default("underscores must be followed by digits"sv);
+ else if TOML_UNLIKELY (!traits::is_digit(*cp))
+ set_error_and_return_default("expected digit, saw '"sv, to_sv(*cp), "'"sv);
+ else if TOML_UNLIKELY (length == sizeof(digits))
+ set_error_and_return_default("exceeds length limit of "sv, sizeof(digits), " digits"sv);
+ else
+ digits[length++] = static_cast<char>(cp->bytes[0]);
+
+ prev = cp;
+ advance_and_return_if_error({});
+ }
+
+ // sanity check ending state
+ if (prev && *prev == U'_') {
+ set_error_and_return_if_eof({});
+ set_error_and_return_default("underscores must be followed by digits"sv);
+ }
+
+ // single digits can be converted trivially
+ if (length == 1u) {
+ int64_t result;
+
+ if constexpr (base == 16)
+ result = static_cast<int64_t>(hex_to_dec(digits[0]));
+ else
+ result = static_cast<int64_t>(digits[0] - '0');
+
+ if constexpr (traits::is_signed) result *= sign;
+
+ return result;
+ }
+
+ // bin, oct and hex allow leading zeroes so trim them first
+ const char* end = digits + length;
+ const char* msd = digits;
+ if constexpr (base != 10) {
+ while (msd < end && *msd == '0') msd++;
+ if (msd == end) return 0ll;
+ }
+
+ // decimal integers do not allow leading zeroes
+ else {
+ if TOML_UNLIKELY (digits[0] == '0')
+ set_error_and_return_default("leading zeroes are prohibited"sv);
+ }
+
+ // range check
+ if TOML_UNLIKELY (static_cast<size_t>(end - msd) > traits::max_digits)
+ set_error_and_return_default("'"sv, traits::full_prefix, std::string_view{digits, length},
+ "' is not representable in 64 bits"sv);
+
+ // do the thing
+ {
+ uint64_t result = {};
+ {
+ uint64_t power = 1;
+ while (--end >= msd) {
+ if constexpr (base == 16)
+ result += power * hex_to_dec(*end);
+ else
+ result += power * static_cast<uint64_t>(*end - '0');
+
+ power *= base;
+ }
+ }
+
+ // range check
+ static constexpr auto i64_max =
+ static_cast<uint64_t>((std::numeric_limits<int64_t>::max)());
+ if TOML_UNLIKELY (result > i64_max + (sign < 0 ? 1u : 0u))
+ set_error_and_return_default("'"sv, traits::full_prefix, std::string_view{digits, length},
+ "' is not representable in 64 bits"sv);
+
+ if constexpr (traits::is_signed) {
+ // avoid signed multiply UB when parsing INT64_MIN
+ if TOML_UNLIKELY (sign < 0 && result == i64_max + 1u)
+ return (std::numeric_limits<int64_t>::min)();
+
+ return static_cast<int64_t>(result) * sign;
+ } else
+ return static_cast<int64_t>(result);
+ }
+ }
+
+ TOML_NODISCARD
+ TOML_NEVER_INLINE
+ date parse_date(bool part_of_datetime = false) {
+ return_if_error({});
+ assert_not_eof();
+ TOML_ASSERT_ASSUME(is_decimal_digit(*cp));
+ push_parse_scope("date"sv);
+
+ // "YYYY"
+ uint32_t digits[4];
+ if (!consume_digit_sequence(digits, 4u))
+ set_error_and_return_default("expected 4-digit year, saw '"sv, to_sv(cp), "'"sv);
+ const auto year = digits[3] + digits[2] * 10u + digits[1] * 100u + digits[0] * 1000u;
+ const auto is_leap_year = (year % 4u == 0u) && ((year % 100u != 0u) || (year % 400u == 0u));
+ set_error_and_return_if_eof({});
+
+ // '-'
+ if (*cp != U'-') set_error_and_return_default("expected '-', saw '"sv, to_sv(*cp), "'"sv);
+ advance_and_return_if_error_or_eof({});
+
+ // "MM"
+ if (!consume_digit_sequence(digits, 2u))
+ set_error_and_return_default("expected 2-digit month, saw '"sv, to_sv(cp), "'"sv);
+ const auto month = digits[1] + digits[0] * 10u;
+ if (month == 0u || month > 12u)
+ set_error_and_return_default("expected month between 1 and 12 (inclusive), saw "sv, month);
+ const auto max_days_in_month =
+ month == 2u ? (is_leap_year ? 29u : 28u)
+ : (month == 4u || month == 6u || month == 9u || month == 11u ? 30u : 31u);
+ set_error_and_return_if_eof({});
+
+ // '-'
+ if (*cp != U'-') set_error_and_return_default("expected '-', saw '"sv, to_sv(*cp), "'"sv);
+ advance_and_return_if_error_or_eof({});
+
+ // "DD"
+ if (!consume_digit_sequence(digits, 2u))
+ set_error_and_return_default("expected 2-digit day, saw '"sv, to_sv(cp), "'"sv);
+ const auto day = digits[1] + digits[0] * 10u;
+ if (day == 0u || day > max_days_in_month)
+ set_error_and_return_default("expected day between 1 and "sv, max_days_in_month,
+ " (inclusive), saw "sv, day);
+
+ if (!part_of_datetime && !is_eof() && !is_value_terminator(*cp))
+ set_error_and_return_default("expected value-terminator, saw '"sv, to_sv(*cp), "'"sv);
+
+ return {year, month, day};
+ }
+
+ TOML_NODISCARD
+ TOML_NEVER_INLINE
+ time parse_time(bool part_of_datetime = false) {
+ return_if_error({});
+ assert_not_eof();
+ TOML_ASSERT_ASSUME(is_decimal_digit(*cp));
+ push_parse_scope("time"sv);
+
+ static constexpr size_t max_digits = 64; // far more than necessary but needed to allow
+ // fractional millisecond truncation per the spec
+ uint32_t digits[max_digits];
+
+ // "HH"
+ if (!consume_digit_sequence(digits, 2u))
+ set_error_and_return_default("expected 2-digit hour, saw '"sv, to_sv(cp), "'"sv);
+ const auto hour = digits[1] + digits[0] * 10u;
+ if (hour > 23u)
+ set_error_and_return_default("expected hour between 0 to 59 (inclusive), saw "sv, hour);
+ set_error_and_return_if_eof({});
+
+ // ':'
+ if (*cp != U':') set_error_and_return_default("expected ':', saw '"sv, to_sv(*cp), "'"sv);
+ advance_and_return_if_error_or_eof({});
+
+ // "MM"
+ if (!consume_digit_sequence(digits, 2u))
+ set_error_and_return_default("expected 2-digit minute, saw '"sv, to_sv(cp), "'"sv);
+ const auto minute = digits[1] + digits[0] * 10u;
+ if (minute > 59u)
+ set_error_and_return_default("expected minute between 0 and 59 (inclusive), saw "sv,
+ minute);
+ auto time = toml::time{hour, minute};
+
+ // ':'
+ if constexpr (TOML_LANG_UNRELEASED) // toml/issues/671 (allow omission of seconds)
+ {
+ if (is_eof() || is_value_terminator(*cp) ||
+ (part_of_datetime && is_match(*cp, U'+', U'-', U'Z', U'z')))
+ return time;
+ } else
+ set_error_and_return_if_eof({});
+ if (*cp != U':') set_error_and_return_default("expected ':', saw '"sv, to_sv(*cp), "'"sv);
+ advance_and_return_if_error_or_eof({});
+
+ // "SS"
+ if (!consume_digit_sequence(digits, 2u))
+ set_error_and_return_default("expected 2-digit second, saw '"sv, to_sv(cp), "'"sv);
+ const auto second = digits[1] + digits[0] * 10u;
+ if (second > 59u)
+ set_error_and_return_default("expected second between 0 and 59 (inclusive), saw "sv,
+ second);
+ time.second = static_cast<decltype(time.second)>(second);
+
+ // '.' (early-exiting is allowed; fractional is optional)
+ if (is_eof() || is_value_terminator(*cp) ||
+ (part_of_datetime && is_match(*cp, U'+', U'-', U'Z', U'z')))
+ return time;
+ if (*cp != U'.') set_error_and_return_default("expected '.', saw '"sv, to_sv(*cp), "'"sv);
+ advance_and_return_if_error_or_eof({});
+
+ // "FFFFFFFFF"
+ size_t digit_count = consume_variable_length_digit_sequence(digits, max_digits);
+ if (!digit_count) {
+ set_error_and_return_if_eof({});
+ set_error_and_return_default("expected fractional digits, saw '"sv, to_sv(*cp), "'"sv);
+ } else if (!is_eof()) {
+ if (digit_count == max_digits && is_decimal_digit(*cp))
+ set_error_and_return_default("fractional component exceeds maximum precision of "sv,
+ max_digits);
+ else if (!part_of_datetime && !is_value_terminator(*cp))
+ set_error_and_return_default("expected value-terminator, saw '"sv, to_sv(*cp), "'"sv);
+ }
+ uint32_t value = 0u;
+ uint32_t place = 1u;
+ for (auto i = impl::min<size_t>(digit_count, 9u); i-- > 0u;) {
+ value += digits[i] * place;
+ place *= 10u;
+ }
+ for (auto i = digit_count; i < 9u; i++) // implicit zeros
+ value *= 10u;
+ time.nanosecond = value;
+ return time;
+ }
+
+ TOML_NODISCARD
+ TOML_NEVER_INLINE
+ date_time parse_date_time() {
+ return_if_error({});
+ assert_not_eof();
+ TOML_ASSERT_ASSUME(is_decimal_digit(*cp));
+ push_parse_scope("date-time"sv);
+
+ // "YYYY-MM-DD"
+ auto date = parse_date(true);
+ set_error_and_return_if_eof({});
+
+ // ' ', 'T' or 't'
+ if (!is_match(*cp, U' ', U'T', U't'))
+ set_error_and_return_default("expected space, 'T' or 't', saw '"sv, to_sv(*cp), "'"sv);
+ advance_and_return_if_error_or_eof({});
+
+ // "HH:MM:SS.FFFFFFFFF"
+ auto time = parse_time(true);
+ return_if_error({});
+
+ // no offset
+ if (is_eof() || is_value_terminator(*cp)) return {date, time};
+
+ // zero offset ('Z' or 'z')
+ time_offset offset{};
+ if (is_match(*cp, U'Z', U'z')) advance_and_return_if_error({});
+
+ // explicit offset ("+/-HH:MM")
+ else if (is_match(*cp, U'+', U'-')) {
+ push_parse_scope("date-time offset"sv);
+
+ // sign
+ int sign = *cp == U'-' ? -1 : 1;
+ advance_and_return_if_error_or_eof({});
+
+ // "HH"
+ int digits[2];
+ if (!consume_digit_sequence(digits, 2u))
+ set_error_and_return_default("expected 2-digit hour, saw '"sv, to_sv(cp), "'"sv);
+ const auto hour = digits[1] + digits[0] * 10;
+ if (hour > 23)
+ set_error_and_return_default("expected hour between 0 and 23 (inclusive), saw "sv, hour);
+ set_error_and_return_if_eof({});
+
+ // ':'
+ if (*cp != U':') set_error_and_return_default("expected ':', saw '"sv, to_sv(*cp), "'"sv);
+ advance_and_return_if_error_or_eof({});
+
+ // "MM"
+ if (!consume_digit_sequence(digits, 2u))
+ set_error_and_return_default("expected 2-digit minute, saw '"sv, to_sv(cp), "'"sv);
+ const auto minute = digits[1] + digits[0] * 10;
+ if (minute > 59)
+ set_error_and_return_default("expected minute between 0 and 59 (inclusive), saw "sv,
+ minute);
+ offset.minutes = static_cast<decltype(offset.minutes)>((hour * 60 + minute) * sign);
+ }
+
+ if (!is_eof() && !is_value_terminator(*cp))
+ set_error_and_return_default("expected value-terminator, saw '"sv, to_sv(*cp), "'"sv);
+
+ return {date, time, offset};
+ }
+
+ TOML_NODISCARD
+ node_ptr parse_array();
+
+ TOML_NODISCARD
+ node_ptr parse_inline_table();
+
+ TOML_NODISCARD
+ node_ptr parse_value_known_prefixes() {
+ return_if_error({});
+ assert_not_eof();
+ TOML_ASSERT_ASSUME(!is_control_character(*cp));
+ TOML_ASSERT_ASSUME(*cp != U'_');
+
+ switch (cp->value) {
+ // arrays
+ case U'[':
+ return parse_array();
+
+ // inline tables
+ case U'{':
+ return parse_inline_table();
+
+ // floats beginning with '.'
+ case U'.':
+ return node_ptr{new value{parse_float()}};
+
+ // strings
+ case U'"':
+ [[fallthrough]];
+ case U'\'':
+ return node_ptr{new value{parse_string().value}};
+
+ default: {
+ const auto cp_upper = static_cast<uint_least32_t>(cp->value) & ~0x20u;
+
+ // bools
+ if (cp_upper == 70u || cp_upper == 84u) // F or T
+ return node_ptr{new value{parse_boolean()}};
+
+ // inf/nan
+ else if (cp_upper == 73u || cp_upper == 78u) // I or N
+ return node_ptr{new value{parse_inf_or_nan()}};
+
+ else
+ return nullptr;
+ }
+ }
+ TOML_UNREACHABLE;
+ }
+
+ TOML_NODISCARD
+ node_ptr parse_value() {
+ return_if_error({});
+ assert_not_eof();
+ TOML_ASSERT_ASSUME(!is_value_terminator(*cp));
+ push_parse_scope("value"sv);
+
+ const depth_counter_scope depth_counter{nested_values};
+ if TOML_UNLIKELY (nested_values > max_nested_values)
+ set_error_and_return_default("exceeded maximum nested value depth of "sv, max_nested_values,
+ " (TOML_MAX_NESTED_VALUES)"sv);
+
+ // check if it begins with some control character
+ // (note that this will also fail for whitespace but we're assuming we've
+ // called consume_leading_whitespace() before calling parse_value())
+ if TOML_UNLIKELY (is_control_character(*cp))
+ set_error_and_return_default("unexpected control character"sv);
+
+ // underscores at the beginning
+ else if (*cp == U'_')
+ set_error_and_return_default("values may not begin with underscores"sv);
+
+ const auto begin_pos = cp->position;
+ node_ptr val;
+
+ do {
+ TOML_ASSERT_ASSUME(!is_control_character(*cp));
+ TOML_ASSERT_ASSUME(*cp != U'_');
+
+ // detect the value type and parse accordingly,
+ // starting with value types that can be detected
+ // unambiguously from just one character.
+
+ val = parse_value_known_prefixes();
+ return_if_error({});
+ if (val) break;
+
+ // value types from here down require more than one character to unambiguously identify
+ // so scan ahead and collect a set of value 'traits'.
+ enum TOML_CLOSED_FLAGS_ENUM value_traits : int {
+ has_nothing = 0,
+ has_digits = 1,
+ has_b = 1 << 1, // as second char only (0b)
+ has_e = 1 << 2, // only float exponents
+ has_o = 1 << 3, // as second char only (0o)
+ has_p = 1 << 4, // only hexfloat exponents
+ has_t = 1 << 5,
+ has_x = 1 << 6, // as second or third char only (0x, -0x, +0x)
+ has_z = 1 << 7,
+ has_colon = 1 << 8,
+ has_plus = 1 << 9,
+ has_minus = 1 << 10,
+ has_dot = 1 << 11,
+ begins_sign = 1 << 12,
+ begins_digit = 1 << 13,
+ begins_zero = 1 << 14,
+
+ signs_msk = has_plus | has_minus,
+ bdigit_msk = has_digits | begins_digit,
+ bzero_msk = bdigit_msk | begins_zero,
+ };
+ value_traits traits = has_nothing;
+ const auto has_any = [&](auto t) noexcept { return (traits & t) != has_nothing; };
+ const auto has_none = [&](auto t) noexcept { return (traits & t) == has_nothing; };
+ const auto add_trait = [&](auto t) noexcept {
+ traits = static_cast<value_traits>(traits | t);
+ };
+
+ // examine the first character to get the 'begins with' traits
+ // (good fail-fast opportunity; all the remaining types begin with numeric digits or signs)
+ if (is_decimal_digit(*cp)) {
+ add_trait(begins_digit);
+ if (*cp == U'0') add_trait(begins_zero);
+ } else if (is_match(*cp, U'+', U'-'))
+ add_trait(begins_sign);
+ else
+ break;
+
+ // scan the rest of the value to determine the remaining traits
+ char32_t chars[utf8_buffered_reader::max_history_length];
+ size_t char_count = {}, advance_count = {};
+ bool eof_while_scanning = false;
+ const auto scan = [&]() noexcept(!TOML_COMPILER_HAS_EXCEPTIONS) {
+ if (is_eof()) return;
+ TOML_ASSERT_ASSUME(!is_value_terminator(*cp));
+
+ do {
+ if (const auto c = **cp; c != U'_') {
+ chars[char_count++] = c;
+
+ if (is_decimal_digit(c))
+ add_trait(has_digits);
+ else if (is_ascii_letter(c)) {
+ TOML_ASSERT_ASSUME((c >= U'a' && c <= U'z') || (c >= U'A' && c <= U'Z'));
+ switch (static_cast<char32_t>(c | 32u)) {
+ case U'b':
+ if (char_count == 2u && has_any(begins_zero)) add_trait(has_b);
+ break;
+
+ case U'e':
+ if (char_count > 1u &&
+ has_none(has_b | has_o | has_p | has_t | has_x | has_z | has_colon) &&
+ (has_none(has_plus | has_minus) || has_any(begins_sign)))
+ add_trait(has_e);
+ break;
+
+ case U'o':
+ if (char_count == 2u && has_any(begins_zero)) add_trait(has_o);
+ break;
+
+ case U'p':
+ if (has_any(has_x)) add_trait(has_p);
+ break;
+
+ case U'x':
+ if ((char_count == 2u && has_any(begins_zero)) ||
+ (char_count == 3u && has_any(begins_sign) && chars[1] == U'0'))
+ add_trait(has_x);
+ break;
+
+ case U't':
+ add_trait(has_t);
+ break;
+ case U'z':
+ add_trait(has_z);
+ break;
+ }
+ } else if (c <= U':') {
+ TOML_ASSERT_ASSUME(c < U'0' || c > U'9');
+ switch (c) {
+ case U'+':
+ add_trait(has_plus);
+ break;
+ case U'-':
+ add_trait(has_minus);
+ break;
+ case U'.':
+ add_trait(has_dot);
+ break;
+ case U':':
+ add_trait(has_colon);
+ break;
+ }
+ }
+ }
+
+ advance_and_return_if_error();
+ advance_count++;
+ eof_while_scanning = is_eof();
+ } while (advance_count < (utf8_buffered_reader::max_history_length - 1u) && !is_eof() &&
+ !is_value_terminator(*cp));
+ };
+ scan();
+ return_if_error({});
+
+ // force further scanning if this could have been a date-time with a space instead of a T
+ if (char_count == 10u //
+ && (traits | begins_zero) == (bzero_msk | has_minus) //
+ && chars[4] == U'-' //
+ && chars[7] == U'-' //
+ && !is_eof() //
+ && *cp == U' ') {
+ const auto pre_advance_count = advance_count;
+ const auto pre_scan_traits = traits;
+ chars[char_count++] = *cp;
+ add_trait(has_t);
+
+ const auto backpedal = [&]() noexcept {
+ go_back(advance_count - pre_advance_count);
+ advance_count = pre_advance_count;
+ traits = pre_scan_traits;
+ char_count = 10u;
+ };
+
+ advance_and_return_if_error({});
+ advance_count++;
+
+ if (is_eof() || !is_decimal_digit(*cp))
+ backpedal();
+ else {
+ chars[char_count++] = *cp;
+
+ advance_and_return_if_error({});
+ advance_count++;
+
+ scan();
+ return_if_error({});
+
+ if (char_count == 12u) backpedal();
+ }
+ }
+
+ // set the reader back to where we started
+ go_back(advance_count);
+
+ // if after scanning ahead we still only have one value character,
+ // the only valid value type is an integer.
+ if (char_count == 1u) {
+ if (has_any(begins_digit)) {
+ val.reset(new value{static_cast<int64_t>(chars[0] - U'0')});
+ advance(); // skip the digit
+ break;
+ }
+
+ // anything else would be ambiguous.
+ else
+ set_error_and_return_default(eof_while_scanning ? "encountered end-of-file"sv
+ : "could not determine value type"sv);
+ }
+
+ // now things that can be identified from two or more characters
+ return_if_error({});
+ TOML_ASSERT_ASSUME(char_count >= 2u);
+
+ // do some 'fuzzy matching' where there's no ambiguity, since that allows the specific
+ // typed parse functions to take over and show better diagnostics if there's an issue
+ // (as opposed to the fallback "could not determine type" message)
+ if (has_any(has_p))
+ val.reset(new value{parse_hex_float()});
+ else if (has_any(has_x | has_o | has_b)) {
+ int64_t i;
+ value_flags flags;
+ if (has_any(has_x)) {
+ i = parse_integer<16>();
+ flags = value_flags::format_as_hexadecimal;
+ } else if (has_any(has_o)) {
+ i = parse_integer<8>();
+ flags = value_flags::format_as_octal;
+ } else // has_b
+ {
+ i = parse_integer<2>();
+ flags = value_flags::format_as_binary;
+ }
+ return_if_error({});
+
+ val.reset(new value{i});
+ val->ref_cast<int64_t>().flags(flags);
+ } else if (has_any(has_e) || (has_any(begins_digit) && chars[1] == U'.'))
+ val.reset(new value{parse_float()});
+ else if (has_any(begins_sign)) {
+ // single-digit signed integers
+ if (char_count == 2u && has_any(has_digits)) {
+ val.reset(
+ new value{static_cast<int64_t>(chars[1] - U'0') * (chars[0] == U'-' ? -1LL : 1LL)});
+ advance(); // skip the sign
+ advance(); // skip the digit
+ break;
+ }
+
+ // simple signed floats (e.g. +1.0)
+ if (is_decimal_digit(chars[1]) && chars[2] == U'.') val.reset(new value{parse_float()});
+
+ // signed infinity or nan
+ else if (is_match(chars[1], U'i', U'n', U'I', U'N'))
+ val.reset(new value{parse_inf_or_nan()});
+ }
+
+ return_if_error({});
+ if (val) break;
+
+ // match trait masks against what they can match exclusively.
+ // all correct value parses will come out of this list, so doing this as a switch is likely
+ // to be a better friend to the optimizer on the success path (failure path can be slow but
+ // that doesn't matter much).
+ switch (unwrap_enum(traits)) {
+ // binary integers
+ // 0b10
+ case bzero_msk | has_b:
+ val.reset(new value{parse_integer<2>()});
+ val->ref_cast<int64_t>().flags(value_flags::format_as_binary);
+ break;
+
+ // octal integers
+ // 0o10
+ case bzero_msk | has_o:
+ val.reset(new value{parse_integer<8>()});
+ val->ref_cast<int64_t>().flags(value_flags::format_as_octal);
+ break;
+
+ // decimal integers
+ // 00
+ // 10
+ // +10
+ // -10
+ case bzero_msk:
+ [[fallthrough]];
+ case bdigit_msk:
+ [[fallthrough]];
+ case begins_sign | has_digits | has_minus:
+ [[fallthrough]];
+ case begins_sign | has_digits | has_plus: {
+ // if the value was so long we exhausted the history buffer it's reasonable to assume
+ // there was more and the value's actual type is impossible to identify without making
+ // the buffer bigger (since it could have actually been a float), so emit an error.
+ //
+ // (this will likely only come up during fuzzing and similar scenarios)
+ static constexpr size_t max_numeric_value_length =
+ utf8_buffered_reader::max_history_length - 2u;
+ if TOML_UNLIKELY (!eof_while_scanning && advance_count > max_numeric_value_length)
+ set_error_and_return_default(
+ "numeric value too long to identify type - cannot exceed "sv,
+ max_numeric_value_length, " characters"sv);
+
+ val.reset(new value{parse_integer<10>()});
+ break;
+ }
+
+ // hexadecimal integers
+ // 0x10
+ case bzero_msk | has_x:
+ val.reset(new value{parse_integer<16>()});
+ val->ref_cast<int64_t>().flags(value_flags::format_as_hexadecimal);
+ break;
+
+ // decimal floats
+ // 0e1
+ // 0e-1
+ // 0e+1
+ // 0.0
+ // 0.0e1
+ // 0.0e-1
+ // 0.0e+1
+ case bzero_msk | has_e:
+ [[fallthrough]];
+ case bzero_msk | has_e | has_minus:
+ [[fallthrough]];
+ case bzero_msk | has_e | has_plus:
+ [[fallthrough]];
+ case bzero_msk | has_dot:
+ [[fallthrough]];
+ case bzero_msk | has_dot | has_e:
+ [[fallthrough]];
+ case bzero_msk | has_dot | has_e | has_minus:
+ [[fallthrough]];
+ case bzero_msk | has_dot | has_e | has_plus:
+ [[fallthrough]];
+ // 1e1
+ // 1e-1
+ // 1e+1
+ // 1.0
+ // 1.0e1
+ // 1.0e-1
+ // 1.0e+1
+ case bdigit_msk | has_e:
+ [[fallthrough]];
+ case bdigit_msk | has_e | has_minus:
+ [[fallthrough]];
+ case bdigit_msk | has_e | has_plus:
+ [[fallthrough]];
+ case bdigit_msk | has_dot:
+ [[fallthrough]];
+ case bdigit_msk | has_dot | has_e:
+ [[fallthrough]];
+ case bdigit_msk | has_dot | has_e | has_minus:
+ [[fallthrough]];
+ case bdigit_msk | has_dot | has_e | has_plus:
+ [[fallthrough]];
+ // +1e1
+ // +1.0
+ // +1.0e1
+ // +1.0e+1
+ // +1.0e-1
+ // -1.0e+1
+ case begins_sign | has_digits | has_e | has_plus:
+ [[fallthrough]];
+ case begins_sign | has_digits | has_dot | has_plus:
+ [[fallthrough]];
+ case begins_sign | has_digits | has_dot | has_e | has_plus:
+ [[fallthrough]];
+ case begins_sign | has_digits | has_dot | has_e | signs_msk:
+ [[fallthrough]];
+ // -1e1
+ // -1e+1
+ // +1e-1
+ // -1.0
+ // -1.0e1
+ // -1.0e-1
+ case begins_sign | has_digits | has_e | has_minus:
+ [[fallthrough]];
+ case begins_sign | has_digits | has_e | signs_msk:
+ [[fallthrough]];
+ case begins_sign | has_digits | has_dot | has_minus:
+ [[fallthrough]];
+ case begins_sign | has_digits | has_dot | has_e | has_minus:
+ val.reset(new value{parse_float()});
+ break;
+
+ // hexadecimal floats
+ // 0x10p0
+ // 0x10p-0
+ // 0x10p+0
+ case bzero_msk | has_x | has_p:
+ [[fallthrough]];
+ case bzero_msk | has_x | has_p | has_minus:
+ [[fallthrough]];
+ case bzero_msk | has_x | has_p | has_plus:
+ [[fallthrough]];
+ // -0x10p0
+ // -0x10p-0
+ // +0x10p0
+ // +0x10p+0
+ // -0x10p+0
+ // +0x10p-0
+ case begins_sign | has_digits | has_x | has_p | has_minus:
+ [[fallthrough]];
+ case begins_sign | has_digits | has_x | has_p | has_plus:
+ [[fallthrough]];
+ case begins_sign | has_digits | has_x | has_p | signs_msk:
+ [[fallthrough]];
+ // 0x10.1p0
+ // 0x10.1p-0
+ // 0x10.1p+0
+ case bzero_msk | has_x | has_dot | has_p:
+ [[fallthrough]];
+ case bzero_msk | has_x | has_dot | has_p | has_minus:
+ [[fallthrough]];
+ case bzero_msk | has_x | has_dot | has_p | has_plus:
+ [[fallthrough]];
+ // -0x10.1p0
+ // -0x10.1p-0
+ // +0x10.1p0
+ // +0x10.1p+0
+ // -0x10.1p+0
+ // +0x10.1p-0
+ case begins_sign | has_digits | has_x | has_dot | has_p | has_minus:
+ [[fallthrough]];
+ case begins_sign | has_digits | has_x | has_dot | has_p | has_plus:
+ [[fallthrough]];
+ case begins_sign | has_digits | has_x | has_dot | has_p | signs_msk:
+ val.reset(new value{parse_hex_float()});
+ break;
+
+ // times
+ // HH:MM
+ // HH:MM:SS
+ // HH:MM:SS.FFFFFF
+ case bzero_msk | has_colon:
+ [[fallthrough]];
+ case bzero_msk | has_colon | has_dot:
+ [[fallthrough]];
+ case bdigit_msk | has_colon:
+ [[fallthrough]];
+ case bdigit_msk | has_colon | has_dot:
+ val.reset(new value{parse_time()});
+ break;
+
+ // local dates
+ // YYYY-MM-DD
+ case bzero_msk | has_minus:
+ [[fallthrough]];
+ case bdigit_msk | has_minus:
+ val.reset(new value{parse_date()});
+ break;
+
+ // date-times
+ // YYYY-MM-DDTHH:MM
+ // YYYY-MM-DDTHH:MM-HH:MM
+ // YYYY-MM-DDTHH:MM+HH:MM
+ // YYYY-MM-DD HH:MM
+ // YYYY-MM-DD HH:MM-HH:MM
+ // YYYY-MM-DD HH:MM+HH:MM
+ // YYYY-MM-DDTHH:MM:SS
+ // YYYY-MM-DDTHH:MM:SS-HH:MM
+ // YYYY-MM-DDTHH:MM:SS+HH:MM
+ // YYYY-MM-DD HH:MM:SS
+ // YYYY-MM-DD HH:MM:SS-HH:MM
+ // YYYY-MM-DD HH:MM:SS+HH:MM
+ case bzero_msk | has_minus | has_colon | has_t:
+ [[fallthrough]];
+ case bzero_msk | signs_msk | has_colon | has_t:
+ [[fallthrough]];
+ case bdigit_msk | has_minus | has_colon | has_t:
+ [[fallthrough]];
+ case bdigit_msk | signs_msk | has_colon | has_t:
+ [[fallthrough]];
+ // YYYY-MM-DDTHH:MM:SS.FFFFFF
+ // YYYY-MM-DDTHH:MM:SS.FFFFFF-HH:MM
+ // YYYY-MM-DDTHH:MM:SS.FFFFFF+HH:MM
+ // YYYY-MM-DD HH:MM:SS.FFFFFF
+ // YYYY-MM-DD HH:MM:SS.FFFFFF-HH:MM
+ // YYYY-MM-DD HH:MM:SS.FFFFFF+HH:MM
+ case bzero_msk | has_minus | has_colon | has_dot | has_t:
+ [[fallthrough]];
+ case bzero_msk | signs_msk | has_colon | has_dot | has_t:
+ [[fallthrough]];
+ case bdigit_msk | has_minus | has_colon | has_dot | has_t:
+ [[fallthrough]];
+ case bdigit_msk | signs_msk | has_colon | has_dot | has_t:
+ [[fallthrough]];
+ // YYYY-MM-DDTHH:MMZ
+ // YYYY-MM-DD HH:MMZ
+ // YYYY-MM-DDTHH:MM:SSZ
+ // YYYY-MM-DD HH:MM:SSZ
+ // YYYY-MM-DDTHH:MM:SS.FFFFFFZ
+ // YYYY-MM-DD HH:MM:SS.FFFFFFZ
+ case bzero_msk | has_minus | has_colon | has_z | has_t:
+ [[fallthrough]];
+ case bzero_msk | has_minus | has_colon | has_dot | has_z | has_t:
+ [[fallthrough]];
+ case bdigit_msk | has_minus | has_colon | has_z | has_t:
+ [[fallthrough]];
+ case bdigit_msk | has_minus | has_colon | has_dot | has_z | has_t:
+ val.reset(new value{parse_date_time()});
+ break;
+ }
+ } while (false);
+
+ if (!val) {
+ set_error_at(begin_pos, "could not determine value type"sv);
+ return_after_error({});
+ }
+
+ val->source_ = {begin_pos, current_position(1), reader.source_path()};
+ return val;
+ }
+
+ TOML_NEVER_INLINE
+ bool parse_key() {
+ return_if_error({});
+ assert_not_eof();
+ TOML_ASSERT_ASSUME(is_bare_key_character(*cp) || is_string_delimiter(*cp));
+ push_parse_scope("key"sv);
+
+ key_buffer.clear();
+ recording_whitespace = false;
+
+ while (!is_error()) {
+ std::string_view key_segment;
+ const auto key_begin = current_position();
+
+ // bare_key_segment
+ if (is_bare_key_character(*cp)) key_segment = parse_bare_key_segment();
+
+ // "quoted key segment"
+ else if (is_string_delimiter(*cp)) {
+ const auto begin_pos = cp->position;
+
+ recording_whitespace = true;
+ parsed_string str = parse_string();
+ recording_whitespace = false;
+ return_if_error({});
+
+ if (str.was_multi_line) {
+ set_error_at(begin_pos, "multi-line strings are prohibited in "sv,
+ key_buffer.empty() ? ""sv : "dotted "sv, "keys"sv);
+ return_after_error({});
+ } else
+ key_segment = str.value;
+ }
+
+ // ???
+ else
+ set_error_and_return_default(
+ "expected bare key starting character or string delimiter, saw '"sv, to_sv(*cp),
+ "'"sv);
+
+ const auto key_end = current_position();
+
+ // whitespace following the key segment
+ consume_leading_whitespace();
+
+ // store segment
+ key_buffer.push_back(key_segment, key_begin, key_end);
+
+ // eof or no more key to come
+ if (is_eof() || *cp != U'.') break;
+
+ // was a dotted key - go around again
+ advance_and_return_if_error_or_eof({});
+ consume_leading_whitespace();
+ set_error_and_return_if_eof({});
+ }
+ return_if_error({});
+
+ return true;
+ }
+
+ TOML_NODISCARD
+ key make_key(size_t segment_index) const {
+ TOML_ASSERT(key_buffer.size() > segment_index);
+
+ return key{key_buffer[segment_index],
+ source_region{key_buffer.starts[segment_index], key_buffer.ends[segment_index],
+ root.source().path}};
+ }
+
+ TOML_NODISCARD
+ TOML_NEVER_INLINE
+ table* parse_table_header() {
+ return_if_error({});
+ assert_not_eof();
+ TOML_ASSERT_ASSUME(*cp == U'[');
+ push_parse_scope("table header"sv);
+
+ const source_position header_begin_pos = cp->position;
+ source_position header_end_pos;
+ bool is_arr = false;
+
+ // parse header
+ {
+ // skip first '['
+ advance_and_return_if_error_or_eof({});
+
+ // skip past any whitespace that followed the '['
+ const bool had_leading_whitespace = consume_leading_whitespace();
+ set_error_and_return_if_eof({});
+
+ // skip second '[' (if present)
+ if (*cp == U'[') {
+ if (had_leading_whitespace)
+ set_error_and_return_default(
+ "[[array-of-table]] brackets must be contiguous (i.e. [ [ this ] ] is prohibited)"sv);
+
+ is_arr = true;
+ advance_and_return_if_error_or_eof({});
+
+ // skip past any whitespace that followed the '['
+ consume_leading_whitespace();
+ set_error_and_return_if_eof({});
+ }
+
+ // check for a premature closing ']'
+ if (*cp == U']')
+ set_error_and_return_default("tables with blank bare keys are explicitly prohibited"sv);
+
+ // get the actual key
+ start_recording();
+ parse_key();
+ stop_recording(1u);
+ return_if_error({});
+
+ // skip past any whitespace that followed the key
+ consume_leading_whitespace();
+ return_if_error({});
+ set_error_and_return_if_eof({});
+
+ // consume the closing ']'
+ if (*cp != U']') set_error_and_return_default("expected ']', saw '"sv, to_sv(*cp), "'"sv);
+ if (is_arr) {
+ advance_and_return_if_error_or_eof({});
+ if (*cp != U']') set_error_and_return_default("expected ']', saw '"sv, to_sv(*cp), "'"sv);
+ }
+ advance_and_return_if_error({});
+ header_end_pos = current_position(1);
+
+ // handle the rest of the line after the header
+ consume_leading_whitespace();
+ if (!is_eof() && !consume_comment() && !consume_line_break())
+ set_error_and_return_default("expected a comment or whitespace, saw '"sv, to_sv(cp),
+ "'"sv);
+ }
+ TOML_ASSERT(!key_buffer.empty());
+
+ // check if each parent is a table/table array, or can be created implicitly as a table.
+ table* parent = &root;
+ for (size_t i = 0, e = key_buffer.size() - 1u; i < e; i++) {
+ const std::string_view segment = key_buffer[i];
+ auto pit = parent->lower_bound(segment);
+
+ // parent already existed
+ if (pit != parent->end() && pit->first == segment) {
+ node& p = pit->second;
+
+ if (auto tbl = p.as_table()) {
+ // adding to closed inline tables is illegal
+ if (tbl->is_inline() &&
+ !impl::find(open_inline_tables.begin(), open_inline_tables.end(), tbl))
+ set_error_and_return_default("cannot insert '"sv, to_sv(recording_buffer),
+ "' into existing inline table"sv);
+
+ parent = tbl;
+ } else if (auto arr = p.as_array();
+ arr && impl::find(table_arrays.begin(), table_arrays.end(), arr)) {
+ // table arrays are a special case;
+ // the spec dictates we select the most recently declared element in the array.
+ TOML_ASSERT(!arr->empty());
+ TOML_ASSERT(arr->back().is_table());
+ parent = &arr->back().ref_cast<table>();
+ } else {
+ if (!is_arr && p.type() == node_type::table)
+ set_error_and_return_default("cannot redefine existing table '"sv,
+ to_sv(recording_buffer), "'"sv);
+ else
+ set_error_and_return_default("cannot redefine existing "sv, to_sv(p.type()), " '"sv,
+ to_sv(recording_buffer), "' as "sv,
+ is_arr ? "array-of-tables"sv : "table"sv);
+ }
+ }
+
+ // need to create a new implicit table
+ else {
+ pit = parent->emplace_hint<table>(pit, make_key(i));
+ table& p = pit->second.ref_cast<table>();
+ p.source_ = {header_begin_pos, header_end_pos, reader.source_path()};
+
+ implicit_tables.push_back(&p);
+ parent = &p;
+ }
+ }
+
+ const auto last_segment = key_buffer.back();
+ auto it = parent->lower_bound(last_segment);
+
+ // if there was already a matching node some sanity checking is necessary;
+ // this is ok if we're making an array and the existing element is already an array (new
+ // element) or if we're making a table and the existing element is an implicitly-created table
+ // (promote it), otherwise this is a redefinition error.
+ if (it != parent->end() && it->first == last_segment) {
+ node& matching_node = it->second;
+ if (auto arr = matching_node.as_array();
+ is_arr && arr && impl::find(table_arrays.begin(), table_arrays.end(), arr)) {
+ table& tbl = arr->emplace_back<table>();
+ tbl.source_ = {header_begin_pos, header_end_pos, reader.source_path()};
+ return &tbl;
+ }
+
+ else if (auto tbl = matching_node.as_table(); !is_arr && tbl && !implicit_tables.empty()) {
+ if (auto found = impl::find(implicit_tables.begin(), implicit_tables.end(), tbl); found) {
+ bool ok = true;
+ if (!tbl->empty()) {
+ for (auto& [_, child] : *tbl) {
+ if (!child.is_table() && !child.is_array_of_tables()) {
+ ok = false;
+ break;
+ }
+ }
+ }
+
+ if (ok) {
+ implicit_tables.erase(implicit_tables.cbegin() + (found - implicit_tables.data()));
+ tbl->source_.begin = header_begin_pos;
+ tbl->source_.end = header_end_pos;
+ return tbl;
+ }
+ }
+ }
+
+ // if we get here it's a redefinition error.
+ if (!is_arr && matching_node.type() == node_type::table) {
+ set_error_at(header_begin_pos, "cannot redefine existing table '"sv,
+ to_sv(recording_buffer), "'"sv);
+ return_after_error({});
+ } else {
+ set_error_at(header_begin_pos, "cannot redefine existing "sv, to_sv(matching_node.type()),
+ " '"sv, to_sv(recording_buffer), "' as "sv,
+ is_arr ? "array-of-tables"sv : "table"sv);
+ return_after_error({});
+ }
+ }
+
+ // there was no matching node, sweet - we can freely instantiate a new table/table array.
+ else {
+ auto last_key = make_key(key_buffer.size() - 1u);
+
+ // if it's an array we need to make the array and it's first table element,
+ // set the starting regions, and return the table element
+ if (is_arr) {
+ it = parent->emplace_hint<array>(it, std::move(last_key));
+ array& tbl_arr = it->second.ref_cast<array>();
+ table_arrays.push_back(&tbl_arr);
+ tbl_arr.source_ = {header_begin_pos, header_end_pos, reader.source_path()};
+
+ table& tbl = tbl_arr.emplace_back<table>();
+ tbl.source_ = {header_begin_pos, header_end_pos, reader.source_path()};
+ return &tbl;
+ }
+
+ // otherwise we're just making a table
+ else {
+ it = parent->emplace_hint<table>(it, std::move(last_key));
+ table& tbl = it->second.ref_cast<table>();
+ tbl.source_ = {header_begin_pos, header_end_pos, reader.source_path()};
+ return &tbl;
+ }
+ }
+ }
+
+ TOML_NEVER_INLINE
+ bool parse_key_value_pair_and_insert(table* tbl) {
+ return_if_error({});
+ assert_not_eof();
+ TOML_ASSERT_ASSUME(is_string_delimiter(*cp) || is_bare_key_character(*cp));
+ push_parse_scope("key-value pair"sv);
+
+ // read the key into the key buffer
+ start_recording();
+ parse_key();
+ stop_recording(1u);
+ return_if_error({});
+ TOML_ASSERT(key_buffer.size() >= 1u);
+
+ // skip past any whitespace that followed the key
+ consume_leading_whitespace();
+ set_error_and_return_if_eof({});
+
+ // '='
+ if (*cp != U'=') set_error_and_return_default("expected '=', saw '"sv, to_sv(*cp), "'"sv);
+ advance_and_return_if_error_or_eof({});
+
+ // skip past any whitespace that followed the '='
+ consume_leading_whitespace();
+ return_if_error({});
+ set_error_and_return_if_eof({});
+
+ // check that the next character could actually be a value
+ if (is_value_terminator(*cp))
+ set_error_and_return_default("expected value, saw '"sv, to_sv(*cp), "'"sv);
+
+ // if it's a dotted kvp we need to spawn the parent sub-tables if necessary,
+ // and set the target table to the second-to-last one in the chain
+ if (key_buffer.size() > 1u) {
+ for (size_t i = 0; i < key_buffer.size() - 1u; i++) {
+ const std::string_view segment = key_buffer[i];
+ auto pit = tbl->lower_bound(segment);
+
+ // parent already existed
+ if (pit != tbl->end() && pit->first == segment) {
+ table* p = pit->second.as_table();
+
+ // redefinition
+ if TOML_UNLIKELY (!p ||
+ !(impl::find(dotted_key_tables.begin(), dotted_key_tables.end(), p) ||
+ impl::find(implicit_tables.begin(), implicit_tables.end(), p))) {
+ set_error_at(key_buffer.starts[i], "cannot redefine existing "sv,
+ to_sv(pit->second.type()), " as dotted key-value pair"sv);
+ return_after_error({});
+ }
+
+ tbl = p;
+ }
+
+ // need to create a new implicit table
+ else {
+ pit = tbl->emplace_hint<table>(pit, make_key(i));
+ table& p = pit->second.ref_cast<table>();
+ p.source_ = pit->first.source();
+
+ dotted_key_tables.push_back(&p);
+ tbl = &p;
+ }
+ }
+ }
+
+ // ensure this isn't a redefinition
+ const std::string_view last_segment = key_buffer.back();
+ auto it = tbl->lower_bound(last_segment);
+ if (it != tbl->end() && it->first == last_segment) {
+ set_error("cannot redefine existing "sv, to_sv(it->second.type()), " '"sv,
+ to_sv(recording_buffer), "'"sv);
+ return_after_error({});
+ }
+
+ // create the key first since the key buffer will likely get overwritten during value parsing
+ // (inline tables)
+ auto last_key = make_key(key_buffer.size() - 1u);
+
+ // now we can actually parse the value
+ node_ptr val = parse_value();
+ return_if_error({});
+
+ tbl->emplace_hint<node_ptr>(it, std::move(last_key), std::move(val));
+ return true;
+ }
+
+ void parse_document() {
+ assert_not_error();
+ assert_not_eof();
+ push_parse_scope("root table"sv);
+
+ table* current_table = &root;
+
+ do {
+ return_if_error();
+
+ // leading whitespace, line endings, comments
+ if (consume_leading_whitespace() || consume_line_break() || consume_comment()) continue;
+ return_if_error();
+
+ // [tables]
+ // [[table array]]
+ if (*cp == U'[') current_table = parse_table_header();
+
+ // bare_keys
+ // dotted.keys
+ // "quoted keys"
+ else if (is_bare_key_character(*cp) || is_string_delimiter(*cp)) {
+ push_parse_scope("key-value pair"sv);
+
+ parse_key_value_pair_and_insert(current_table);
+
+ // handle the rest of the line after the kvp
+ // (this is not done in parse_key_value_pair() because that is also used for inline
+ // tables)
+ consume_leading_whitespace();
+ return_if_error();
+ if (!is_eof() && !consume_comment() && !consume_line_break())
+ set_error("expected a comment or whitespace, saw '"sv, to_sv(cp), "'"sv);
+ }
+
+ else // ??
+ set_error("expected keys, tables, whitespace or comments, saw '"sv, to_sv(cp), "'"sv);
+ } while (!is_eof());
+
+ auto eof_pos = current_position(1);
+ root.source_.end = eof_pos;
+ if (current_table && current_table != &root &&
+ current_table->source_.end <= current_table->source_.begin)
+ current_table->source_.end = eof_pos;
+ }
+
+ static void update_region_ends(node& nde) noexcept {
+ const auto type = nde.type();
+ if (type > node_type::array) return;
+
+ if (type == node_type::table) {
+ auto& tbl = nde.ref_cast<table>();
+ if (tbl.is_inline()) // inline tables (and all their inline descendants) are already
+ // correctly terminated
+ return;
+
+ auto end = nde.source_.end;
+ for (auto&& [k, v] : tbl) {
+ TOML_UNUSED(k);
+ update_region_ends(v);
+ if (end < v.source_.end) end = v.source_.end;
+ }
+ } else // arrays
+ {
+ auto& arr = nde.ref_cast<array>();
+ auto end = nde.source_.end;
+ for (auto&& v : arr) {
+ update_region_ends(v);
+ if (end < v.source_.end) end = v.source_.end;
+ }
+ nde.source_.end = end;
+ }
+ }
+
+ public:
+ parser(utf8_reader_interface&& reader_) //
+ : reader{reader_} {
+ root.source_ = {prev_pos, prev_pos, reader.source_path()};
+
+ if (!reader.peek_eof()) {
+ cp = reader.read_next();
#if !TOML_EXCEPTIONS
- if (reader.error())
- {
- err = std::move(reader.error());
- return;
- }
+ if (reader.error()) {
+ err = std::move(reader.error());
+ return;
+ }
#endif
- if (cp)
- parse_document();
- }
+ if (cp) parse_document();
+ }
- update_region_ends(root);
- }
+ update_region_ends(root);
+ }
- TOML_NODISCARD
- operator parse_result() && noexcept
- {
+ TOML_NODISCARD
+ operator parse_result() && noexcept {
#if TOML_EXCEPTIONS
- return { std::move(root) };
+ return {std::move(root)};
#else
- if (err)
- return parse_result{ *std::move(err) };
- else
- return parse_result{ std::move(root) };
+ if (err)
+ return parse_result{*std::move(err)};
+ else
+ return parse_result{std::move(root)};
#endif
- }
- };
-
- TOML_EXTERNAL_LINKAGE
- node_ptr parser::parse_array()
- {
- return_if_error({});
- assert_not_eof();
- TOML_ASSERT_ASSUME(*cp == U'[');
- push_parse_scope("array"sv);
-
- // skip opening '['
- advance_and_return_if_error_or_eof({});
-
- node_ptr arr_ptr{ new array{} };
- array& arr = arr_ptr->ref_cast<array>();
- enum class TOML_CLOSED_ENUM parse_type : int
- {
- none,
- comma,
- val
- };
- parse_type prev = parse_type::none;
-
- while (!is_error())
- {
- while (consume_leading_whitespace() || consume_line_break() || consume_comment())
- continue;
- set_error_and_return_if_eof({});
-
- // commas - only legal after a value
- if (*cp == U',')
- {
- if (prev == parse_type::val)
- {
- prev = parse_type::comma;
- advance_and_return_if_error_or_eof({});
- continue;
- }
- set_error_and_return_default("expected value or closing ']', saw comma"sv);
- }
-
- // closing ']'
- else if (*cp == U']')
- {
- advance_and_return_if_error({});
- break;
- }
-
- // must be a value
- else
- {
- if (prev == parse_type::val)
- {
- set_error_and_return_default("expected comma or closing ']', saw '"sv, to_sv(*cp), "'"sv);
- continue;
- }
- prev = parse_type::val;
-
- auto val = parse_value();
- return_if_error({});
-
- if (!arr.capacity())
- arr.reserve(4u);
- arr.emplace_back<node_ptr>(std::move(val));
- }
- }
-
- return_if_error({});
- return arr_ptr;
- }
-
- TOML_EXTERNAL_LINKAGE
- node_ptr parser::parse_inline_table()
- {
- return_if_error({});
- assert_not_eof();
- TOML_ASSERT_ASSUME(*cp == U'{');
- push_parse_scope("inline table"sv);
-
- // skip opening '{'
- advance_and_return_if_error_or_eof({});
-
- node_ptr tbl_ptr{ new table{} };
- table& tbl = tbl_ptr->ref_cast<table>();
- tbl.is_inline(true);
- table_vector_scope table_scope{ open_inline_tables, tbl };
-
- enum class TOML_CLOSED_ENUM parse_type : int
- {
- none,
- comma,
- kvp
- };
- parse_type prev = parse_type::none;
- while (!is_error())
- {
- if constexpr (TOML_LANG_UNRELEASED) // toml/issues/516 (newlines/trailing commas in inline tables)
- {
- while (consume_leading_whitespace() || consume_line_break() || consume_comment())
- continue;
- }
- else
- {
- while (consume_leading_whitespace())
- continue;
- }
- return_if_error({});
- set_error_and_return_if_eof({});
-
- // commas - only legal after a key-value pair
- if (*cp == U',')
- {
- if (prev == parse_type::kvp)
- {
- prev = parse_type::comma;
- advance_and_return_if_error_or_eof({});
- }
- else
- set_error_and_return_default("expected key-value pair or closing '}', saw comma"sv);
- }
-
- // closing '}'
- else if (*cp == U'}')
- {
- if constexpr (!TOML_LANG_UNRELEASED) // toml/issues/516 (newlines/trailing commas in inline tables)
- {
- if (prev == parse_type::comma)
- {
- set_error_and_return_default("expected key-value pair, saw closing '}' (dangling comma)"sv);
- continue;
- }
- }
- advance_and_return_if_error({});
- break;
- }
-
- // key-value pair
- else if (is_string_delimiter(*cp) || is_bare_key_character(*cp))
- {
- if (prev == parse_type::kvp)
- set_error_and_return_default("expected comma or closing '}', saw '"sv, to_sv(*cp), "'"sv);
- else
- {
- prev = parse_type::kvp;
- parse_key_value_pair_and_insert(&tbl);
- }
- }
-
- /// ???
- else
- set_error_and_return_default("expected key or closing '}', saw '"sv, to_sv(*cp), "'"sv);
- }
-
- return_if_error({});
- return tbl_ptr;
- }
-
- TOML_ABI_NAMESPACE_END; // TOML_EXCEPTIONS
+ }
+ };
+
+ TOML_EXTERNAL_LINKAGE
+ node_ptr parser::parse_array() {
+ return_if_error({});
+ assert_not_eof();
+ TOML_ASSERT_ASSUME(*cp == U'[');
+ push_parse_scope("array"sv);
+
+ // skip opening '['
+ advance_and_return_if_error_or_eof({});
+
+ node_ptr arr_ptr{new array{}};
+ array& arr = arr_ptr->ref_cast<array>();
+ enum class TOML_CLOSED_ENUM parse_type : int { none, comma, val };
+ parse_type prev = parse_type::none;
+
+ while (!is_error()) {
+ while (consume_leading_whitespace() || consume_line_break() || consume_comment()) continue;
+ set_error_and_return_if_eof({});
+
+ // commas - only legal after a value
+ if (*cp == U',') {
+ if (prev == parse_type::val) {
+ prev = parse_type::comma;
+ advance_and_return_if_error_or_eof({});
+ continue;
+ }
+ set_error_and_return_default("expected value or closing ']', saw comma"sv);
+ }
+
+ // closing ']'
+ else if (*cp == U']') {
+ advance_and_return_if_error({});
+ break;
+ }
+
+ // must be a value
+ else {
+ if (prev == parse_type::val) {
+ set_error_and_return_default("expected comma or closing ']', saw '"sv, to_sv(*cp), "'"sv);
+ continue;
+ }
+ prev = parse_type::val;
+
+ auto val = parse_value();
+ return_if_error({});
+
+ if (!arr.capacity()) arr.reserve(4u);
+ arr.emplace_back<node_ptr>(std::move(val));
+ }
+ }
+
+ return_if_error({});
+ return arr_ptr;
+ }
+
+ TOML_EXTERNAL_LINKAGE
+ node_ptr parser::parse_inline_table() {
+ return_if_error({});
+ assert_not_eof();
+ TOML_ASSERT_ASSUME(*cp == U'{');
+ push_parse_scope("inline table"sv);
+
+ // skip opening '{'
+ advance_and_return_if_error_or_eof({});
+
+ node_ptr tbl_ptr{new table{}};
+ table& tbl = tbl_ptr->ref_cast<table>();
+ tbl.is_inline(true);
+ table_vector_scope table_scope{open_inline_tables, tbl};
+
+ enum class TOML_CLOSED_ENUM parse_type : int { none, comma, kvp };
+ parse_type prev = parse_type::none;
+ while (!is_error()) {
+ if constexpr (TOML_LANG_UNRELEASED) // toml/issues/516 (newlines/trailing commas in inline
+ // tables)
+ {
+ while (consume_leading_whitespace() || consume_line_break() || consume_comment()) continue;
+ } else {
+ while (consume_leading_whitespace()) continue;
+ }
+ return_if_error({});
+ set_error_and_return_if_eof({});
+
+ // commas - only legal after a key-value pair
+ if (*cp == U',') {
+ if (prev == parse_type::kvp) {
+ prev = parse_type::comma;
+ advance_and_return_if_error_or_eof({});
+ } else
+ set_error_and_return_default("expected key-value pair or closing '}', saw comma"sv);
+ }
+
+ // closing '}'
+ else if (*cp == U'}') {
+ if constexpr (!TOML_LANG_UNRELEASED) // toml/issues/516 (newlines/trailing commas in inline
+ // tables)
+ {
+ if (prev == parse_type::comma) {
+ set_error_and_return_default(
+ "expected key-value pair, saw closing '}' (dangling comma)"sv);
+ continue;
+ }
+ }
+ advance_and_return_if_error({});
+ break;
+ }
+
+ // key-value pair
+ else if (is_string_delimiter(*cp) || is_bare_key_character(*cp)) {
+ if (prev == parse_type::kvp)
+ set_error_and_return_default("expected comma or closing '}', saw '"sv, to_sv(*cp), "'"sv);
+ else {
+ prev = parse_type::kvp;
+ parse_key_value_pair_and_insert(&tbl);
+ }
+ }
+
+ /// ???
+ else
+ set_error_and_return_default("expected key or closing '}', saw '"sv, to_sv(*cp), "'"sv);
+ }
+
+ return_if_error({});
+ return tbl_ptr;
+ }
+
+ TOML_ABI_NAMESPACE_END; // TOML_EXCEPTIONS
}
TOML_IMPL_NAMESPACE_END;
@@ -3747,171 +3354,160 @@ TOML_IMPL_NAMESPACE_END;
#undef utf8_reader_error_check
#undef utf8_reader_return_after_error
-//#---------------------------------------------------------------------------------------------------------------------
-//# PARSER PUBLIC IMPLEMENTATION
-//#---------------------------------------------------------------------------------------------------------------------
-
-TOML_ANON_NAMESPACE_START
-{
- TOML_NODISCARD
- TOML_INTERNAL_LINKAGE
- parse_result do_parse(utf8_reader_interface && reader)
- {
- return impl::parser{ std::move(reader) };
- }
-
- TOML_NODISCARD
- TOML_INTERNAL_LINKAGE
- parse_result do_parse_file(std::string_view file_path)
- {
+// #---------------------------------------------------------------------------------------------------------------------
+// # PARSER PUBLIC IMPLEMENTATION
+// #---------------------------------------------------------------------------------------------------------------------
+
+TOML_ANON_NAMESPACE_START {
+ TOML_NODISCARD
+ TOML_INTERNAL_LINKAGE
+ parse_result do_parse(utf8_reader_interface && reader) {
+ return impl::parser{std::move(reader)};
+ }
+
+ TOML_NODISCARD
+ TOML_INTERNAL_LINKAGE
+ parse_result do_parse_file(std::string_view file_path) {
#if TOML_EXCEPTIONS
-#define TOML_PARSE_FILE_ERROR(msg, path) \
- throw parse_error{ msg, source_position{}, std::make_shared<const std::string>(std::move(path)) }
+#define TOML_PARSE_FILE_ERROR(msg, path) \
+ throw parse_error { \
+ msg, source_position{}, std::make_shared<const std::string>(std::move(path)) \
+ }
#else
-#define TOML_PARSE_FILE_ERROR(msg, path) \
- return parse_result \
- { \
- parse_error \
- { \
- msg, source_position{}, std::make_shared<const std::string>(std::move(path)) \
- } \
- }
+#define TOML_PARSE_FILE_ERROR(msg, path) \
+ return parse_result { \
+ parse_error { \
+ msg, source_position{}, std::make_shared<const std::string>(std::move(path)) \
+ } \
+ }
#endif
- std::string file_path_str(file_path);
+ std::string file_path_str(file_path);
- // open file with a custom-sized stack buffer
- std::ifstream file;
- TOML_OVERALIGNED char file_buffer[sizeof(void*) * 1024u];
- file.rdbuf()->pubsetbuf(file_buffer, sizeof(file_buffer));
+ // open file with a custom-sized stack buffer
+ std::ifstream file;
+ TOML_OVERALIGNED char file_buffer[sizeof(void*) * 1024u];
+ file.rdbuf()->pubsetbuf(file_buffer, sizeof(file_buffer));
#if TOML_WINDOWS
- file.open(impl::widen(file_path_str).c_str(), std::ifstream::in | std::ifstream::binary | std::ifstream::ate);
+ file.open(impl::widen(file_path_str).c_str(),
+ std::ifstream::in | std::ifstream::binary | std::ifstream::ate);
#else
- file.open(file_path_str, std::ifstream::in | std::ifstream::binary | std::ifstream::ate);
+ file.open(file_path_str, std::ifstream::in | std::ifstream::binary | std::ifstream::ate);
#endif
- if (!file.is_open())
- TOML_PARSE_FILE_ERROR("File could not be opened for reading", file_path_str);
-
- // get size
- const auto file_size = file.tellg();
- if (file_size == -1)
- TOML_PARSE_FILE_ERROR("Could not determine file size", file_path_str);
- file.seekg(0, std::ifstream::beg);
-
- // read the whole file into memory first if the file isn't too large
- constexpr auto large_file_threshold = 1024 * 1024 * 2; // 2 MB
- if (file_size <= large_file_threshold)
- {
- std::vector<char> file_data;
- file_data.resize(static_cast<size_t>(file_size));
- file.read(file_data.data(), static_cast<std::streamsize>(file_size));
- return parse(std::string_view{ file_data.data(), file_data.size() }, std::move(file_path_str));
- }
-
- // otherwise parse it using the streams
- else
- return parse(file, std::move(file_path_str));
+ if (!file.is_open())
+ TOML_PARSE_FILE_ERROR("File could not be opened for reading", file_path_str);
+
+ // get size
+ const auto file_size = file.tellg();
+ if (file_size == -1) TOML_PARSE_FILE_ERROR("Could not determine file size", file_path_str);
+ file.seekg(0, std::ifstream::beg);
+
+ // read the whole file into memory first if the file isn't too large
+ constexpr auto large_file_threshold = 1024 * 1024 * 2; // 2 MB
+ if (file_size <= large_file_threshold) {
+ std::vector<char> file_data;
+ file_data.resize(static_cast<size_t>(file_size));
+ file.read(file_data.data(), static_cast<std::streamsize>(file_size));
+ return parse(std::string_view{file_data.data(), file_data.size()}, std::move(file_path_str));
+ }
+
+ // otherwise parse it using the streams
+ else
+ return parse(file, std::move(file_path_str));
#undef TOML_PARSE_FILE_ERROR
- }
+ }
}
TOML_ANON_NAMESPACE_END;
-TOML_NAMESPACE_START
-{
- TOML_ABI_NAMESPACE_BOOL(TOML_EXCEPTIONS, ex, noex);
-
- TOML_EXTERNAL_LINKAGE
- parse_result TOML_CALLCONV parse(std::string_view doc, std::string_view source_path)
- {
- return TOML_ANON_NAMESPACE::do_parse(TOML_ANON_NAMESPACE::utf8_reader{ doc, source_path });
- }
-
- TOML_EXTERNAL_LINKAGE
- parse_result TOML_CALLCONV parse(std::string_view doc, std::string && source_path)
- {
- return TOML_ANON_NAMESPACE::do_parse(TOML_ANON_NAMESPACE::utf8_reader{ doc, std::move(source_path) });
- }
-
- TOML_EXTERNAL_LINKAGE
- parse_result TOML_CALLCONV parse(std::istream & doc, std::string_view source_path)
- {
- return TOML_ANON_NAMESPACE::do_parse(TOML_ANON_NAMESPACE::utf8_reader{ doc, source_path });
- }
-
- TOML_EXTERNAL_LINKAGE
- parse_result TOML_CALLCONV parse(std::istream & doc, std::string && source_path)
- {
- return TOML_ANON_NAMESPACE::do_parse(TOML_ANON_NAMESPACE::utf8_reader{ doc, std::move(source_path) });
- }
-
- TOML_EXTERNAL_LINKAGE
- parse_result TOML_CALLCONV parse_file(std::string_view file_path)
- {
- return TOML_ANON_NAMESPACE::do_parse_file(file_path);
- }
+TOML_NAMESPACE_START {
+ TOML_ABI_NAMESPACE_BOOL(TOML_EXCEPTIONS, ex, noex);
+
+ TOML_EXTERNAL_LINKAGE
+ parse_result TOML_CALLCONV parse(std::string_view doc, std::string_view source_path) {
+ return TOML_ANON_NAMESPACE::do_parse(TOML_ANON_NAMESPACE::utf8_reader{doc, source_path});
+ }
+
+ TOML_EXTERNAL_LINKAGE
+ parse_result TOML_CALLCONV parse(std::string_view doc, std::string && source_path) {
+ return TOML_ANON_NAMESPACE::do_parse(
+ TOML_ANON_NAMESPACE::utf8_reader{doc, std::move(source_path)});
+ }
+
+ TOML_EXTERNAL_LINKAGE
+ parse_result TOML_CALLCONV parse(std::istream & doc, std::string_view source_path) {
+ return TOML_ANON_NAMESPACE::do_parse(TOML_ANON_NAMESPACE::utf8_reader{doc, source_path});
+ }
+
+ TOML_EXTERNAL_LINKAGE
+ parse_result TOML_CALLCONV parse(std::istream & doc, std::string && source_path) {
+ return TOML_ANON_NAMESPACE::do_parse(
+ TOML_ANON_NAMESPACE::utf8_reader{doc, std::move(source_path)});
+ }
+
+ TOML_EXTERNAL_LINKAGE
+ parse_result TOML_CALLCONV parse_file(std::string_view file_path) {
+ return TOML_ANON_NAMESPACE::do_parse_file(file_path);
+ }
#if TOML_HAS_CHAR8
- TOML_EXTERNAL_LINKAGE
- parse_result TOML_CALLCONV parse(std::u8string_view doc, std::string_view source_path)
- {
- return TOML_ANON_NAMESPACE::do_parse(TOML_ANON_NAMESPACE::utf8_reader{ doc, source_path });
- }
-
- TOML_EXTERNAL_LINKAGE
- parse_result TOML_CALLCONV parse(std::u8string_view doc, std::string && source_path)
- {
- return TOML_ANON_NAMESPACE::do_parse(TOML_ANON_NAMESPACE::utf8_reader{ doc, std::move(source_path) });
- }
-
- TOML_EXTERNAL_LINKAGE
- parse_result TOML_CALLCONV parse_file(std::u8string_view file_path)
- {
- std::string file_path_str;
- file_path_str.resize(file_path.length());
- memcpy(file_path_str.data(), file_path.data(), file_path.length());
- return TOML_ANON_NAMESPACE::do_parse_file(file_path_str);
- }
-
-#endif // TOML_HAS_CHAR8
+ TOML_EXTERNAL_LINKAGE
+ parse_result TOML_CALLCONV parse(std::u8string_view doc, std::string_view source_path) {
+ return TOML_ANON_NAMESPACE::do_parse(TOML_ANON_NAMESPACE::utf8_reader{doc, source_path});
+ }
+
+ TOML_EXTERNAL_LINKAGE
+ parse_result TOML_CALLCONV parse(std::u8string_view doc, std::string && source_path) {
+ return TOML_ANON_NAMESPACE::do_parse(
+ TOML_ANON_NAMESPACE::utf8_reader{doc, std::move(source_path)});
+ }
+
+ TOML_EXTERNAL_LINKAGE
+ parse_result TOML_CALLCONV parse_file(std::u8string_view file_path) {
+ std::string file_path_str;
+ file_path_str.resize(file_path.length());
+ memcpy(file_path_str.data(), file_path.data(), file_path.length());
+ return TOML_ANON_NAMESPACE::do_parse_file(file_path_str);
+ }
+
+#endif // TOML_HAS_CHAR8
#if TOML_ENABLE_WINDOWS_COMPAT
- TOML_EXTERNAL_LINKAGE
- parse_result TOML_CALLCONV parse(std::string_view doc, std::wstring_view source_path)
- {
- return TOML_ANON_NAMESPACE::do_parse(TOML_ANON_NAMESPACE::utf8_reader{ doc, impl::narrow(source_path) });
- }
+ TOML_EXTERNAL_LINKAGE
+ parse_result TOML_CALLCONV parse(std::string_view doc, std::wstring_view source_path) {
+ return TOML_ANON_NAMESPACE::do_parse(
+ TOML_ANON_NAMESPACE::utf8_reader{doc, impl::narrow(source_path)});
+ }
- TOML_EXTERNAL_LINKAGE
- parse_result TOML_CALLCONV parse(std::istream & doc, std::wstring_view source_path)
- {
- return TOML_ANON_NAMESPACE::do_parse(TOML_ANON_NAMESPACE::utf8_reader{ doc, impl::narrow(source_path) });
- }
+ TOML_EXTERNAL_LINKAGE
+ parse_result TOML_CALLCONV parse(std::istream & doc, std::wstring_view source_path) {
+ return TOML_ANON_NAMESPACE::do_parse(
+ TOML_ANON_NAMESPACE::utf8_reader{doc, impl::narrow(source_path)});
+ }
- TOML_EXTERNAL_LINKAGE
- parse_result TOML_CALLCONV parse_file(std::wstring_view file_path)
- {
- return TOML_ANON_NAMESPACE::do_parse_file(impl::narrow(file_path));
- }
+ TOML_EXTERNAL_LINKAGE
+ parse_result TOML_CALLCONV parse_file(std::wstring_view file_path) {
+ return TOML_ANON_NAMESPACE::do_parse_file(impl::narrow(file_path));
+ }
-#endif // TOML_ENABLE_WINDOWS_COMPAT
+#endif // TOML_ENABLE_WINDOWS_COMPAT
#if TOML_HAS_CHAR8 && TOML_ENABLE_WINDOWS_COMPAT
- TOML_EXTERNAL_LINKAGE
- parse_result TOML_CALLCONV parse(std::u8string_view doc, std::wstring_view source_path)
- {
- return TOML_ANON_NAMESPACE::do_parse(TOML_ANON_NAMESPACE::utf8_reader{ doc, impl::narrow(source_path) });
- }
+ TOML_EXTERNAL_LINKAGE
+ parse_result TOML_CALLCONV parse(std::u8string_view doc, std::wstring_view source_path) {
+ return TOML_ANON_NAMESPACE::do_parse(
+ TOML_ANON_NAMESPACE::utf8_reader{doc, impl::narrow(source_path)});
+ }
-#endif // TOML_HAS_CHAR8 && TOML_ENABLE_WINDOWS_COMPAT
+#endif // TOML_HAS_CHAR8 && TOML_ENABLE_WINDOWS_COMPAT
- TOML_ABI_NAMESPACE_END; // TOML_EXCEPTIONS
+ TOML_ABI_NAMESPACE_END; // TOML_EXCEPTIONS
}
TOML_NAMESPACE_END;
#undef TOML_OVERALIGNED
#include "header_end.hpp"
-#endif // TOML_ENABLE_PARSER
+#endif // TOML_ENABLE_PARSER