summaryrefslogtreecommitdiffhomepage
path: root/vendor/toml++/impl/unicode.hpp
diff options
context:
space:
mode:
authorAmlal El Mahrouss <amlal@nekernel.org>2026-03-08 15:50:45 +0100
committerGitHub <noreply@github.com>2026-03-08 15:50:45 +0100
commit386b6ba6702aaf121a8667b68fba86385dad68ed (patch)
tree985c7eda4fafa827eaad88b6b469b0baba791817 /vendor/toml++/impl/unicode.hpp
parent7a469801ecb55fcde0199d4e41b1cec3a17dcb05 (diff)
parentddb1cbc831b6d13b985d91022f01e955e24ae871 (diff)
Merge pull request #25 from ne-foss-org/nebuild-patches-deref
[CHORE] Patching TOML manifest parser to avoid null deref.
Diffstat (limited to 'vendor/toml++/impl/unicode.hpp')
-rw-r--r--vendor/toml++/impl/unicode.hpp341
1 files changed, 155 insertions, 186 deletions
diff --git a/vendor/toml++/impl/unicode.hpp b/vendor/toml++/impl/unicode.hpp
index 9bad395..3b399c3 100644
--- a/vendor/toml++/impl/unicode.hpp
+++ b/vendor/toml++/impl/unicode.hpp
@@ -1,195 +1,164 @@
-//# This file is a part of toml++ and is subject to the the terms of the MIT license.
-//# Copyright (c) Mark Gillard <mark.gillard@outlook.com.au>
-//# See https://github.com/marzer/tomlplusplus/blob/master/LICENSE for the full license text.
-// SPDX-License-Identifier: MIT
+// # This file is a part of toml++ and is subject to the the terms of the MIT license.
+// # Copyright (c) Mark Gillard <mark.gillard@outlook.com.au>
+// # See https://github.com/marzer/tomlplusplus/blob/master/LICENSE for the full license text.
+// SPDX-License-Identifier: MIT
#pragma once
-#include "unicode_autogenerated.hpp"
#include "header_start.hpp"
+#include "unicode_autogenerated.hpp"
/// \cond
-TOML_IMPL_NAMESPACE_START
-{
- TOML_CONST_GETTER
- constexpr bool is_string_delimiter(char32_t c) noexcept
- {
- return c == U'"' || c == U'\'';
- }
-
- TOML_CONST_GETTER
- constexpr bool is_ascii_letter(char32_t c) noexcept
- {
- return (c >= U'a' && c <= U'z') || (c >= U'A' && c <= U'Z');
- }
-
- TOML_CONST_GETTER
- constexpr bool is_binary_digit(char32_t c) noexcept
- {
- return c == U'0' || c == U'1';
- }
-
- TOML_CONST_GETTER
- constexpr bool is_octal_digit(char32_t c) noexcept
- {
- return (c >= U'0' && c <= U'7');
- }
-
- TOML_CONST_GETTER
- constexpr bool is_decimal_digit(char32_t c) noexcept
- {
- return (c >= U'0' && c <= U'9');
- }
-
- TOML_CONST_GETTER
- constexpr bool is_hexadecimal_digit(char32_t c) noexcept
- {
- return U'0' <= c && c <= U'f' && (1ull << (static_cast<uint_least64_t>(c) - 0x30u)) & 0x7E0000007E03FFull;
- }
-
- template <typename T>
- TOML_CONST_GETTER
- constexpr uint_least32_t hex_to_dec(const T c) noexcept
- {
- if constexpr (std::is_same_v<remove_cvref<T>, uint_least32_t>)
- return c >= 0x41u // >= 'A'
- ? 10u + (c | 0x20u) - 0x61u // - 'a'
- : c - 0x30u // - '0'
- ;
- else
- return hex_to_dec(static_cast<uint_least32_t>(c));
- }
-
- TOML_CONST_GETTER
- constexpr bool is_horizontal_whitespace(char32_t c) noexcept
- {
- return is_ascii_horizontal_whitespace(c) || is_non_ascii_horizontal_whitespace(c);
- }
-
- TOML_CONST_GETTER
- constexpr bool is_vertical_whitespace(char32_t c) noexcept
- {
- return is_ascii_vertical_whitespace(c) || is_non_ascii_vertical_whitespace(c);
- }
-
- TOML_CONST_GETTER
- constexpr bool is_whitespace(char32_t c) noexcept
- {
- return is_horizontal_whitespace(c) || is_vertical_whitespace(c);
- }
-
- TOML_CONST_GETTER
- constexpr bool is_bare_key_character(char32_t c) noexcept
- {
- return is_ascii_bare_key_character(c)
-#if TOML_LANG_UNRELEASED // toml/pull/891 (unicode bare keys)
- || is_non_ascii_bare_key_character(c)
+TOML_IMPL_NAMESPACE_START {
+ TOML_CONST_GETTER
+ constexpr bool is_string_delimiter(char32_t c) noexcept {
+ return c == U'"' || c == U'\'';
+ }
+
+ TOML_CONST_GETTER
+ constexpr bool is_ascii_letter(char32_t c) noexcept {
+ return (c >= U'a' && c <= U'z') || (c >= U'A' && c <= U'Z');
+ }
+
+ TOML_CONST_GETTER
+ constexpr bool is_binary_digit(char32_t c) noexcept {
+ return c == U'0' || c == U'1';
+ }
+
+ TOML_CONST_GETTER
+ constexpr bool is_octal_digit(char32_t c) noexcept {
+ return (c >= U'0' && c <= U'7');
+ }
+
+ TOML_CONST_GETTER
+ constexpr bool is_decimal_digit(char32_t c) noexcept {
+ return (c >= U'0' && c <= U'9');
+ }
+
+ TOML_CONST_GETTER
+ constexpr bool is_hexadecimal_digit(char32_t c) noexcept {
+ return U'0' <= c && c <= U'f' &&
+ (1ull << (static_cast<uint_least64_t>(c) - 0x30u)) & 0x7E0000007E03FFull;
+ }
+
+ template <typename T>
+ TOML_CONST_GETTER constexpr uint_least32_t hex_to_dec(const T c) noexcept {
+ if constexpr (std::is_same_v<remove_cvref<T>, uint_least32_t>)
+ return c >= 0x41u // >= 'A'
+ ? 10u + (c | 0x20u) - 0x61u // - 'a'
+ : c - 0x30u // - '0'
+ ;
+ else
+ return hex_to_dec(static_cast<uint_least32_t>(c));
+ }
+
+ TOML_CONST_GETTER
+ constexpr bool is_horizontal_whitespace(char32_t c) noexcept {
+ return is_ascii_horizontal_whitespace(c) || is_non_ascii_horizontal_whitespace(c);
+ }
+
+ TOML_CONST_GETTER
+ constexpr bool is_vertical_whitespace(char32_t c) noexcept {
+ return is_ascii_vertical_whitespace(c) || is_non_ascii_vertical_whitespace(c);
+ }
+
+ TOML_CONST_GETTER
+ constexpr bool is_whitespace(char32_t c) noexcept {
+ return is_horizontal_whitespace(c) || is_vertical_whitespace(c);
+ }
+
+ TOML_CONST_GETTER
+ constexpr bool is_bare_key_character(char32_t c) noexcept {
+ return is_ascii_bare_key_character(c)
+#if TOML_LANG_UNRELEASED // toml/pull/891 (unicode bare keys)
+ || is_non_ascii_bare_key_character(c)
#endif
- ;
- }
-
- TOML_CONST_GETTER
- constexpr bool is_value_terminator(char32_t c) noexcept
- {
- return is_whitespace(c) || c == U']' || c == U'}' || c == U',' || c == U'#';
- }
-
- TOML_CONST_GETTER
- constexpr bool is_control_character(char c) noexcept
- {
- return c <= '\u001F' || c == '\u007F';
- }
-
- TOML_CONST_GETTER
- constexpr bool is_control_character(char32_t c) noexcept
- {
- return c <= U'\u001F' || c == U'\u007F';
- }
-
- TOML_CONST_GETTER
- constexpr bool is_nontab_control_character(char32_t c) noexcept
- {
- return c <= U'\u0008' || (c >= U'\u000A' && c <= U'\u001F') || c == U'\u007F';
- }
-
- TOML_CONST_GETTER
- constexpr bool is_unicode_surrogate(char32_t c) noexcept
- {
- return c >= 0xD800u && c <= 0xDFFF;
- }
-
- struct utf8_decoder
- {
- // utf8_decoder based on this: https://bjoern.hoehrmann.de/utf-8/decoder/dfa/
- // Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de>
-
- uint_least32_t state{};
- char32_t codepoint{};
-
- static constexpr uint8_t state_table[]{
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 8, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 10, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 11, 6, 6,
- 6, 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
-
- 0, 12, 24, 36, 60, 96, 84, 12, 12, 12, 48, 72, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 0, 12,
- 12, 12, 12, 12, 0, 12, 0, 12, 12, 12, 24, 12, 12, 12, 12, 12, 24, 12, 24, 12, 12, 12, 12, 12, 12, 12, 12,
- 12, 24, 12, 12, 12, 12, 12, 24, 12, 12, 12, 12, 12, 12, 12, 24, 12, 12, 12, 12, 12, 12, 12, 12, 12, 36, 12,
- 36, 12, 12, 12, 36, 12, 12, 12, 12, 12, 36, 12, 36, 12, 12, 12, 36, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12
- };
-
- TOML_PURE_INLINE_GETTER
- constexpr bool error() const noexcept
- {
- return state == uint_least32_t{ 12u };
- }
-
- TOML_PURE_INLINE_GETTER
- constexpr bool has_code_point() const noexcept
- {
- return state == uint_least32_t{};
- }
-
- TOML_PURE_INLINE_GETTER
- constexpr bool needs_more_input() const noexcept
- {
- return !has_code_point() && !error();
- }
-
- constexpr void operator()(uint8_t byte) noexcept
- {
- TOML_ASSERT_ASSUME(!error());
-
- const auto type = state_table[byte];
-
- codepoint = static_cast<char32_t>(has_code_point() ? (uint_least32_t{ 255u } >> type) & byte
- : (byte & uint_least32_t{ 63u })
- | (static_cast<uint_least32_t>(codepoint) << 6));
-
- state = state_table[state + uint_least32_t{ 256u } + type];
- }
-
- TOML_ALWAYS_INLINE
- constexpr void operator()(char c) noexcept
- {
- operator()(static_cast<uint8_t>(c));
- }
-
- TOML_ALWAYS_INLINE
- constexpr void reset() noexcept
- {
- state = {};
- }
- };
-
- TOML_PURE_GETTER
- TOML_ATTR(nonnull)
- bool is_ascii(const char* str, size_t len) noexcept;
+ ;
+ }
+
+ TOML_CONST_GETTER
+ constexpr bool is_value_terminator(char32_t c) noexcept {
+ return is_whitespace(c) || c == U']' || c == U'}' || c == U',' || c == U'#';
+ }
+
+ TOML_CONST_GETTER
+ constexpr bool is_control_character(char c) noexcept {
+ return c <= '\u001F' || c == '\u007F';
+ }
+
+ TOML_CONST_GETTER
+ constexpr bool is_control_character(char32_t c) noexcept {
+ return c <= U'\u001F' || c == U'\u007F';
+ }
+
+ TOML_CONST_GETTER
+ constexpr bool is_nontab_control_character(char32_t c) noexcept {
+ return c <= U'\u0008' || (c >= U'\u000A' && c <= U'\u001F') || c == U'\u007F';
+ }
+
+ TOML_CONST_GETTER
+ constexpr bool is_unicode_surrogate(char32_t c) noexcept {
+ return c >= 0xD800u && c <= 0xDFFF;
+ }
+
+ struct utf8_decoder {
+ // utf8_decoder based on this: https://bjoern.hoehrmann.de/utf-8/decoder/dfa/
+ // Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de>
+
+ uint_least32_t state{};
+ char32_t codepoint{};
+
+ static constexpr uint8_t state_table[]{
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 10, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 11, 6,
+ 6, 6, 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+
+ 0, 12, 24, 36, 60, 96, 84, 12, 12, 12, 48, 72, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 0, 12, 12, 12, 12, 12, 0, 12, 0, 12, 12, 12, 24, 12, 12, 12, 12, 12, 24,
+ 12, 24, 12, 12, 12, 12, 12, 12, 12, 12, 12, 24, 12, 12, 12, 12, 12, 24, 12, 12, 12, 12,
+ 12, 12, 12, 24, 12, 12, 12, 12, 12, 12, 12, 12, 12, 36, 12, 36, 12, 12, 12, 36, 12, 12,
+ 12, 12, 12, 36, 12, 36, 12, 12, 12, 36, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12};
+
+ TOML_PURE_INLINE_GETTER
+ constexpr bool error() const noexcept { return state == uint_least32_t{12u}; }
+
+ TOML_PURE_INLINE_GETTER
+ constexpr bool has_code_point() const noexcept { return state == uint_least32_t{}; }
+
+ TOML_PURE_INLINE_GETTER
+ constexpr bool needs_more_input() const noexcept { return !has_code_point() && !error(); }
+
+ constexpr void operator()(uint8_t byte) noexcept {
+ TOML_ASSERT_ASSUME(!error());
+
+ const auto type = state_table[byte];
+
+ codepoint = static_cast<char32_t>(has_code_point()
+ ? (uint_least32_t{255u} >> type) & byte
+ : (byte & uint_least32_t{63u}) |
+ (static_cast<uint_least32_t>(codepoint) << 6));
+
+ state = state_table[state + uint_least32_t{256u} + type];
+ }
+
+ TOML_ALWAYS_INLINE
+ constexpr void operator()(char c) noexcept { operator()(static_cast<uint8_t>(c)); }
+
+ TOML_ALWAYS_INLINE
+ constexpr void reset() noexcept { state = {}; }
+ };
+
+ TOML_PURE_GETTER
+ TOML_ATTR(nonnull)
+ bool is_ascii(const char* str, size_t len) noexcept;
}
TOML_IMPL_NAMESPACE_END;