diff options
| author | Amlal El Mahrouss <amlal@nekernel.org> | 2026-03-08 15:50:45 +0100 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2026-03-08 15:50:45 +0100 |
| commit | 386b6ba6702aaf121a8667b68fba86385dad68ed (patch) | |
| tree | 985c7eda4fafa827eaad88b6b469b0baba791817 /vendor/toml++/impl/unicode.hpp | |
| parent | 7a469801ecb55fcde0199d4e41b1cec3a17dcb05 (diff) | |
| parent | ddb1cbc831b6d13b985d91022f01e955e24ae871 (diff) | |
Merge pull request #25 from ne-foss-org/nebuild-patches-deref
[CHORE] Patching TOML manifest parser to avoid null deref.
Diffstat (limited to 'vendor/toml++/impl/unicode.hpp')
| -rw-r--r-- | vendor/toml++/impl/unicode.hpp | 341 |
1 files changed, 155 insertions, 186 deletions
diff --git a/vendor/toml++/impl/unicode.hpp b/vendor/toml++/impl/unicode.hpp index 9bad395..3b399c3 100644 --- a/vendor/toml++/impl/unicode.hpp +++ b/vendor/toml++/impl/unicode.hpp @@ -1,195 +1,164 @@ -//# This file is a part of toml++ and is subject to the the terms of the MIT license. -//# Copyright (c) Mark Gillard <mark.gillard@outlook.com.au> -//# See https://github.com/marzer/tomlplusplus/blob/master/LICENSE for the full license text. -// SPDX-License-Identifier: MIT +// # This file is a part of toml++ and is subject to the the terms of the MIT license. +// # Copyright (c) Mark Gillard <mark.gillard@outlook.com.au> +// # See https://github.com/marzer/tomlplusplus/blob/master/LICENSE for the full license text. +// SPDX-License-Identifier: MIT #pragma once -#include "unicode_autogenerated.hpp" #include "header_start.hpp" +#include "unicode_autogenerated.hpp" /// \cond -TOML_IMPL_NAMESPACE_START -{ - TOML_CONST_GETTER - constexpr bool is_string_delimiter(char32_t c) noexcept - { - return c == U'"' || c == U'\''; - } - - TOML_CONST_GETTER - constexpr bool is_ascii_letter(char32_t c) noexcept - { - return (c >= U'a' && c <= U'z') || (c >= U'A' && c <= U'Z'); - } - - TOML_CONST_GETTER - constexpr bool is_binary_digit(char32_t c) noexcept - { - return c == U'0' || c == U'1'; - } - - TOML_CONST_GETTER - constexpr bool is_octal_digit(char32_t c) noexcept - { - return (c >= U'0' && c <= U'7'); - } - - TOML_CONST_GETTER - constexpr bool is_decimal_digit(char32_t c) noexcept - { - return (c >= U'0' && c <= U'9'); - } - - TOML_CONST_GETTER - constexpr bool is_hexadecimal_digit(char32_t c) noexcept - { - return U'0' <= c && c <= U'f' && (1ull << (static_cast<uint_least64_t>(c) - 0x30u)) & 0x7E0000007E03FFull; - } - - template <typename T> - TOML_CONST_GETTER - constexpr uint_least32_t hex_to_dec(const T c) noexcept - { - if constexpr (std::is_same_v<remove_cvref<T>, uint_least32_t>) - return c >= 0x41u // >= 'A' - ? 10u + (c | 0x20u) - 0x61u // - 'a' - : c - 0x30u // - '0' - ; - else - return hex_to_dec(static_cast<uint_least32_t>(c)); - } - - TOML_CONST_GETTER - constexpr bool is_horizontal_whitespace(char32_t c) noexcept - { - return is_ascii_horizontal_whitespace(c) || is_non_ascii_horizontal_whitespace(c); - } - - TOML_CONST_GETTER - constexpr bool is_vertical_whitespace(char32_t c) noexcept - { - return is_ascii_vertical_whitespace(c) || is_non_ascii_vertical_whitespace(c); - } - - TOML_CONST_GETTER - constexpr bool is_whitespace(char32_t c) noexcept - { - return is_horizontal_whitespace(c) || is_vertical_whitespace(c); - } - - TOML_CONST_GETTER - constexpr bool is_bare_key_character(char32_t c) noexcept - { - return is_ascii_bare_key_character(c) -#if TOML_LANG_UNRELEASED // toml/pull/891 (unicode bare keys) - || is_non_ascii_bare_key_character(c) +TOML_IMPL_NAMESPACE_START { + TOML_CONST_GETTER + constexpr bool is_string_delimiter(char32_t c) noexcept { + return c == U'"' || c == U'\''; + } + + TOML_CONST_GETTER + constexpr bool is_ascii_letter(char32_t c) noexcept { + return (c >= U'a' && c <= U'z') || (c >= U'A' && c <= U'Z'); + } + + TOML_CONST_GETTER + constexpr bool is_binary_digit(char32_t c) noexcept { + return c == U'0' || c == U'1'; + } + + TOML_CONST_GETTER + constexpr bool is_octal_digit(char32_t c) noexcept { + return (c >= U'0' && c <= U'7'); + } + + TOML_CONST_GETTER + constexpr bool is_decimal_digit(char32_t c) noexcept { + return (c >= U'0' && c <= U'9'); + } + + TOML_CONST_GETTER + constexpr bool is_hexadecimal_digit(char32_t c) noexcept { + return U'0' <= c && c <= U'f' && + (1ull << (static_cast<uint_least64_t>(c) - 0x30u)) & 0x7E0000007E03FFull; + } + + template <typename T> + TOML_CONST_GETTER constexpr uint_least32_t hex_to_dec(const T c) noexcept { + if constexpr (std::is_same_v<remove_cvref<T>, uint_least32_t>) + return c >= 0x41u // >= 'A' + ? 10u + (c | 0x20u) - 0x61u // - 'a' + : c - 0x30u // - '0' + ; + else + return hex_to_dec(static_cast<uint_least32_t>(c)); + } + + TOML_CONST_GETTER + constexpr bool is_horizontal_whitespace(char32_t c) noexcept { + return is_ascii_horizontal_whitespace(c) || is_non_ascii_horizontal_whitespace(c); + } + + TOML_CONST_GETTER + constexpr bool is_vertical_whitespace(char32_t c) noexcept { + return is_ascii_vertical_whitespace(c) || is_non_ascii_vertical_whitespace(c); + } + + TOML_CONST_GETTER + constexpr bool is_whitespace(char32_t c) noexcept { + return is_horizontal_whitespace(c) || is_vertical_whitespace(c); + } + + TOML_CONST_GETTER + constexpr bool is_bare_key_character(char32_t c) noexcept { + return is_ascii_bare_key_character(c) +#if TOML_LANG_UNRELEASED // toml/pull/891 (unicode bare keys) + || is_non_ascii_bare_key_character(c) #endif - ; - } - - TOML_CONST_GETTER - constexpr bool is_value_terminator(char32_t c) noexcept - { - return is_whitespace(c) || c == U']' || c == U'}' || c == U',' || c == U'#'; - } - - TOML_CONST_GETTER - constexpr bool is_control_character(char c) noexcept - { - return c <= '\u001F' || c == '\u007F'; - } - - TOML_CONST_GETTER - constexpr bool is_control_character(char32_t c) noexcept - { - return c <= U'\u001F' || c == U'\u007F'; - } - - TOML_CONST_GETTER - constexpr bool is_nontab_control_character(char32_t c) noexcept - { - return c <= U'\u0008' || (c >= U'\u000A' && c <= U'\u001F') || c == U'\u007F'; - } - - TOML_CONST_GETTER - constexpr bool is_unicode_surrogate(char32_t c) noexcept - { - return c >= 0xD800u && c <= 0xDFFF; - } - - struct utf8_decoder - { - // utf8_decoder based on this: https://bjoern.hoehrmann.de/utf-8/decoder/dfa/ - // Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de> - - uint_least32_t state{}; - char32_t codepoint{}; - - static constexpr uint8_t state_table[]{ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 8, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 10, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 11, 6, 6, - 6, 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - - 0, 12, 24, 36, 60, 96, 84, 12, 12, 12, 48, 72, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 0, 12, - 12, 12, 12, 12, 0, 12, 0, 12, 12, 12, 24, 12, 12, 12, 12, 12, 24, 12, 24, 12, 12, 12, 12, 12, 12, 12, 12, - 12, 24, 12, 12, 12, 12, 12, 24, 12, 12, 12, 12, 12, 12, 12, 24, 12, 12, 12, 12, 12, 12, 12, 12, 12, 36, 12, - 36, 12, 12, 12, 36, 12, 12, 12, 12, 12, 36, 12, 36, 12, 12, 12, 36, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12 - }; - - TOML_PURE_INLINE_GETTER - constexpr bool error() const noexcept - { - return state == uint_least32_t{ 12u }; - } - - TOML_PURE_INLINE_GETTER - constexpr bool has_code_point() const noexcept - { - return state == uint_least32_t{}; - } - - TOML_PURE_INLINE_GETTER - constexpr bool needs_more_input() const noexcept - { - return !has_code_point() && !error(); - } - - constexpr void operator()(uint8_t byte) noexcept - { - TOML_ASSERT_ASSUME(!error()); - - const auto type = state_table[byte]; - - codepoint = static_cast<char32_t>(has_code_point() ? (uint_least32_t{ 255u } >> type) & byte - : (byte & uint_least32_t{ 63u }) - | (static_cast<uint_least32_t>(codepoint) << 6)); - - state = state_table[state + uint_least32_t{ 256u } + type]; - } - - TOML_ALWAYS_INLINE - constexpr void operator()(char c) noexcept - { - operator()(static_cast<uint8_t>(c)); - } - - TOML_ALWAYS_INLINE - constexpr void reset() noexcept - { - state = {}; - } - }; - - TOML_PURE_GETTER - TOML_ATTR(nonnull) - bool is_ascii(const char* str, size_t len) noexcept; + ; + } + + TOML_CONST_GETTER + constexpr bool is_value_terminator(char32_t c) noexcept { + return is_whitespace(c) || c == U']' || c == U'}' || c == U',' || c == U'#'; + } + + TOML_CONST_GETTER + constexpr bool is_control_character(char c) noexcept { + return c <= '\u001F' || c == '\u007F'; + } + + TOML_CONST_GETTER + constexpr bool is_control_character(char32_t c) noexcept { + return c <= U'\u001F' || c == U'\u007F'; + } + + TOML_CONST_GETTER + constexpr bool is_nontab_control_character(char32_t c) noexcept { + return c <= U'\u0008' || (c >= U'\u000A' && c <= U'\u001F') || c == U'\u007F'; + } + + TOML_CONST_GETTER + constexpr bool is_unicode_surrogate(char32_t c) noexcept { + return c >= 0xD800u && c <= 0xDFFF; + } + + struct utf8_decoder { + // utf8_decoder based on this: https://bjoern.hoehrmann.de/utf-8/decoder/dfa/ + // Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de> + + uint_least32_t state{}; + char32_t codepoint{}; + + static constexpr uint8_t state_table[]{ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 10, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 11, 6, + 6, 6, 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + + 0, 12, 24, 36, 60, 96, 84, 12, 12, 12, 48, 72, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 0, 12, 12, 12, 12, 12, 0, 12, 0, 12, 12, 12, 24, 12, 12, 12, 12, 12, 24, + 12, 24, 12, 12, 12, 12, 12, 12, 12, 12, 12, 24, 12, 12, 12, 12, 12, 24, 12, 12, 12, 12, + 12, 12, 12, 24, 12, 12, 12, 12, 12, 12, 12, 12, 12, 36, 12, 36, 12, 12, 12, 36, 12, 12, + 12, 12, 12, 36, 12, 36, 12, 12, 12, 36, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12}; + + TOML_PURE_INLINE_GETTER + constexpr bool error() const noexcept { return state == uint_least32_t{12u}; } + + TOML_PURE_INLINE_GETTER + constexpr bool has_code_point() const noexcept { return state == uint_least32_t{}; } + + TOML_PURE_INLINE_GETTER + constexpr bool needs_more_input() const noexcept { return !has_code_point() && !error(); } + + constexpr void operator()(uint8_t byte) noexcept { + TOML_ASSERT_ASSUME(!error()); + + const auto type = state_table[byte]; + + codepoint = static_cast<char32_t>(has_code_point() + ? (uint_least32_t{255u} >> type) & byte + : (byte & uint_least32_t{63u}) | + (static_cast<uint_least32_t>(codepoint) << 6)); + + state = state_table[state + uint_least32_t{256u} + type]; + } + + TOML_ALWAYS_INLINE + constexpr void operator()(char c) noexcept { operator()(static_cast<uint8_t>(c)); } + + TOML_ALWAYS_INLINE + constexpr void reset() noexcept { state = {}; } + }; + + TOML_PURE_GETTER + TOML_ATTR(nonnull) + bool is_ascii(const char* str, size_t len) noexcept; } TOML_IMPL_NAMESPACE_END; |
