diff options
| author | Amlal El Mahrouss <amlal.elmahrouss@icloud.com> | 2023-12-30 23:39:37 +0100 |
|---|---|---|
| committer | Amlal El Mahrouss <amlal.elmahrouss@icloud.com> | 2023-12-30 23:39:37 +0100 |
| commit | 263915832993dd12beee10e204f9ebcc6c786ed2 (patch) | |
| tree | 862e51208a99c35746e574a76564a4532b3a4a49 /CompilerDriver/cc2/source | |
Meta: initial commit of WestCo optimized toolchain.
Signed-off-by: Amlal El Mahrouss <amlal.elmahrouss@icloud.com>
Diffstat (limited to 'CompilerDriver/cc2/source')
| -rw-r--r-- | CompilerDriver/cc2/source/build.info | 1 | ||||
| -rw-r--r-- | CompilerDriver/cc2/source/common.h | 968 | ||||
| -rw-r--r-- | CompilerDriver/cc2/source/cpp2util.h | 1 | ||||
| -rw-r--r-- | CompilerDriver/cc2/source/cppfront.cpp | 115 | ||||
| -rw-r--r-- | CompilerDriver/cc2/source/io.h | 1079 | ||||
| -rw-r--r-- | CompilerDriver/cc2/source/lex.h | 1989 | ||||
| -rw-r--r-- | CompilerDriver/cc2/source/parse.h | 9263 | ||||
| -rw-r--r-- | CompilerDriver/cc2/source/reflect.h | 1965 | ||||
| -rw-r--r-- | CompilerDriver/cc2/source/reflect.h2 | 1447 | ||||
| -rw-r--r-- | CompilerDriver/cc2/source/sema.h | 1892 | ||||
| -rw-r--r-- | CompilerDriver/cc2/source/to_cpp1.h | 6750 | ||||
| -rw-r--r-- | CompilerDriver/cc2/source/version.info | 1 |
12 files changed, 25471 insertions, 0 deletions
diff --git a/CompilerDriver/cc2/source/build.info b/CompilerDriver/cc2/source/build.info new file mode 100644 index 0000000..1d47385 --- /dev/null +++ b/CompilerDriver/cc2/source/build.info @@ -0,0 +1 @@ +"8C20:1314"
\ No newline at end of file diff --git a/CompilerDriver/cc2/source/common.h b/CompilerDriver/cc2/source/common.h new file mode 100644 index 0000000..fe0301c --- /dev/null +++ b/CompilerDriver/cc2/source/common.h @@ -0,0 +1,968 @@ + +// Copyright (c) Herb Sutter +// SPDX-License-Identifier: CC-BY-NC-ND-4.0 + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + + +#ifdef _MSC_VER +#pragma warning(disable: 4456) +#endif + +#include "cpp2util.h" + + +//=========================================================================== +// Common types +//=========================================================================== + +#ifndef CPP2_COMMON_H +#define CPP2_COMMON_H + +#include <string> +#include <string_view> +#include <vector> +#include <cstdint> +#include <cctype> +#include <cassert> +#include <iomanip> +#include <compare> +#include <algorithm> +#include <unordered_map> + +namespace cpp2 { + +//----------------------------------------------------------------------- +// +// source_line: represents a source code line +// +//----------------------------------------------------------------------- +// +struct source_line +{ + std::string text; + + enum class category { empty, preprocessor, comment, import, cpp1, cpp2, rawstring }; + category cat; + + bool all_tokens_are_densely_spaced = true; // to be overridden in lexing if they're not + + source_line( + std::string_view t = {}, + category c = category::empty + ) + : text{t} + , cat{c} + { } + + auto indent() const + -> int + { + return + std::find_if_not( text.begin(), text.end(), &isspace ) + - text.begin(); + } + + auto prefix() const + -> std::string + { + switch (cat) { + break;case category::empty: return "/* */ "; + break;case category::preprocessor: return "/* # */ "; + break;case category::comment: return "/* / */ "; + break;case category::import: return "/* i */ "; + break;case category::cpp1: return "/* 1 */ "; + break;case category::cpp2: return "/* 2 */ "; + break;case category::rawstring: return "/* R */ "; + break;default: assert(!"illegal category"); abort(); + } + } +}; + + +using lineno_t = int32_t; +using colno_t = int32_t; // not int16_t... encountered >80,000 char line during testing + +struct source_position +{ + lineno_t lineno; // one-based offset into program source + colno_t colno; // one-based offset into line + + source_position(lineno_t l = 1, colno_t c = 1 ) + : lineno{ l }, colno{ c } + { + } + + auto operator<=>(source_position const&) const = default; + + auto to_string() const + -> std::string + { + return "(" + std::to_string(lineno) + "," + std::to_string(colno) + ")"; + } +}; + +struct comment +{ + enum class comment_kind { line_comment = 0, stream_comment }; + + comment_kind kind; + source_position start; + source_position end; + std::string text; + + mutable bool dbg_was_printed = false; +}; + +struct string_parts { + struct cpp_code { std::string text; }; + struct raw_string { std::string text; }; + enum adds_sequences { no_ends = 0, on_the_beginning = 1, on_the_end = 2, on_both_ends = 3 }; + + string_parts(const std::string& beginseq, + const std::string& endseq, + adds_sequences strateg) + : begin_seq{beginseq} + , end_seq{endseq} + , strategy{strateg} + { + if (!(strategy & on_the_beginning)) { + parts.push_back(raw_string{""}); + } + } + + void add_code(const std::string& text) { parts.push_back(cpp_code{text});} + void add_string(const std::string& text) { parts.push_back(raw_string{text});} + void add_string(const std::string_view& text) { parts.push_back(raw_string{std::string(text)});} + + void clear() { parts.clear(); } + + auto generate() const -> std::string { + + if (parts.empty()) { + return (strategy & on_the_beginning ? begin_seq : std::string{}) + + (strategy & on_the_end ? end_seq : std::string{}); + } + + auto result = std::visit(begin_visit{begin_seq, strategy}, + parts.front()); + + if (std::ssize(parts) > 1) { + auto it1 = parts.cbegin(); + auto it2 = parts.cbegin()+1; + for(;it2 != parts.cend(); ++it1, ++it2) { + result += std::visit(generator_visit{begin_seq, end_seq}, *it1, *it2); + } + } + + if (!(strategy & on_the_end)) { + result += std::visit([this](const auto& lhs) { + return generator_visit{begin_seq, end_seq}(lhs, raw_string{""}); + }, parts.back()); + } + + result += std::visit(end_visit{end_seq, strategy}, parts.back()); + + return result; + } + + auto is_expanded() const -> bool { + for (const auto& p : parts) { + if (std::holds_alternative<cpp_code>(p)) { + return true; + } + } + return false; + } + +private: + std::string begin_seq; + std::string end_seq; + adds_sequences strategy; + std::vector<std::variant<raw_string, cpp_code>> parts; + + struct begin_visit { + std::string begin_seq; + adds_sequences strategy; + + auto operator()(const raw_string& part) const -> std::string { + return (strategy & on_the_beginning ? begin_seq : "") + part.text; + } + auto operator()(const cpp_code& part) const -> std::string { + return part.text; + } + }; + + struct end_visit { + std::string end_seq; + adds_sequences strategy; + auto operator()(const raw_string&) const -> std::string { + return strategy & on_the_end ? end_seq : ""; + } + auto operator()(const cpp_code&) const -> std::string { + return {}; + } + }; + + struct generator_visit { + std::string begin_seq; + std::string end_seq; + + auto operator()(const raw_string&, const cpp_code& part ) const -> std::string { + return end_seq + " + " + part.text; + } + auto operator()(const cpp_code&, const raw_string& part ) const -> std::string { + return " + " + begin_seq + part.text; + } + auto operator()(const raw_string&, const raw_string& part ) const -> std::string { + return part.text; + } + auto operator()(const cpp_code&, const cpp_code& part ) const -> std::string { + return " + " + part.text; + } + }; +}; + +struct raw_string +{ + source_position start; + std::string text; + std::string opening_seq; + std::string closing_seq; + bool should_interpolate = false; +}; + +struct multiline_raw_string +{ + std::string text; + source_position end = {0, 0}; +}; + +//----------------------------------------------------------------------- +// +// error: represents a user-readable error message +// +//----------------------------------------------------------------------- +// +struct error_entry +{ + source_position where; + std::string msg; + bool internal = false; + bool fallback = false; // only emit this message if there was nothing better + + error_entry( + source_position w, + std::string_view m, + bool i = false, + bool f = false + ) + : where{w} + , msg{m} + , internal{i} + , fallback{f} + { } + + auto operator==(error_entry const& that) + -> bool + { + return + where == that.where + && msg == that.msg + ; + } + + auto print(auto& o, std::string const& file) const + -> void; +}; + + +//----------------------------------------------------------------------- +// +// Digit classification, with '\'' digit separators +// +//----------------------------------------------------------------------- +// + +//G binary-digit: +//G one of '0' '1' +//G +auto is_binary_digit(char c) + -> bool +{ + return + c == '0' + || c == '1' + ; +} + +//G digit: one of +//G binary-digit +//G one of '2' '3' '4' '5' '6' '7' '8' '9' +//G +auto is_digit(char c) + -> bool +{ + return isdigit(c); +} + +//G hexadecimal-digit: +//G digit +//G one of 'A' 'B' 'C' 'D' 'E' 'F' +//G +auto is_hexadecimal_digit(char c) + -> bool +{ + return isxdigit(c); +} + +//G nondigit: +//G one of 'a'..'z' +//G one of 'A'..'Z' +//G _ +//G +auto is_nondigit(char c) + -> bool +{ + return + isalpha(c) + || c == '_' + ; +}; + +//G identifier-start: +//G nondigit +//G +auto is_identifier_start(char c) + -> bool +{ + return is_nondigit(c); +} + +//G identifier-continue: +//G digit +//G nondigit +//G +auto is_identifier_continue(char c) + -> bool +{ + return + is_digit(c) + || is_nondigit(c) + ; +} + +//G identifier: +//G '__identifier__' keyword [Note: without whitespace before the keyword] +//G identifier-start +//G identifier identifier-continue +//G 'operator' operator +//G +auto starts_with_identifier(std::string_view s) + -> int +{ + if (is_identifier_start(s[0])) { + auto j = 1; + while ( + j < std::ssize(s) + && is_identifier_continue(s[j]) + ) + { + ++j; + } + return j; + } + return 0; +}; + + +// Helper to allow one of the above or a digit separator +// Example: is_separator_or( is_binary_digit (c) ) +// +auto is_separator_or(auto pred, char c) + -> bool +{ + return + c == '\'' + || pred(c) + ; +} + + +// Bool to string +// +template<typename T> + requires std::is_same_v<T, std::string> +auto _as(bool b) + -> T +{ + return b ? "true" : "false"; +} + + +// Explicit cast +// +template<typename T> +auto _as(auto x) + -> T +{ + return T(x); +} + + +// String path prefix from filename +// +auto strip_path(std::string const& file) + -> std::string +{ + auto i = std::ssize(file)-1; + while ( + i >= 0 + && file[i] != '\\' + && file[i] != '/' + ) + { + --i; + } + return {file, _as<size_t>(i+1)}; +} + + +//----------------------------------------------------------------------- +// +// Misc helpers +// +//----------------------------------------------------------------------- +// +auto replace_all(std::string& s, std::string_view what, std::string_view with) +{ + for ( + std::string::size_type pos{}; + s.npos != (pos = s.find(what.data(), pos, what.length())); + pos += with.length() + ) + { + s.replace(pos, what.length(), with.data(), with.length()); + } + return s; +} + + +auto to_upper(char c) + -> char +{ + // C toupper is only not-UB in [0,127] and returns the wrong type, + // so wrap the range check and the type cast here in one place... + // note the 126 (not 127) is intentional to avoid a GCC warning + if (0 <= c && c <= 126) { return static_cast<char>(std::toupper(c)); } + // else + return c; +} + + +auto to_upper_and_underbar(std::string_view s) + -> std::string +{ + auto ret = std::string{s}; + for (char& c : ret) { + if (std::isalnum(c)) { c = to_upper(c); } + else { c = '_'; } + } + return ret; +} + + +auto is_empty_or_a_decimal_number(std::string_view s) + -> bool +{ + auto size = std::ssize(s); + if (size == 0) { return true; } + + auto i = 0; + while (i < size && isspace(s[i]) ) { ++i; } + while (i < size && isdigit(s[i]) ) { ++i; } + while (i < size && isspace(s[i]) ) { ++i; } + return i == size; +} + + +auto starts_with( + std::string const& s, + std::string_view sv +) + -> bool +{ + return std::string_view(s).starts_with(sv); +} + + +auto contains( + auto const& range, + auto const& value +) + -> bool +{ + return std::find( + range.begin(), + range.end(), + value + ) + != range.end(); +} + +auto contains( + std::string const& s, + auto const& value +) + -> bool +{ + return s.find(value) != s.npos; +} + + +// In keep trying to write string+string_view, and it ought to Just Work without +// the current workarounds. Not having that is a minor impediment to using safe +// and efficient string_views, which we should be encouraging. So for my own use +// and to remove that minor impediment to writing safe and efficient code, I'm +// just going to add this until we get P2591 in C++26(?) -- See: wg21.link/p2591 +// +template<class charT, class traits, class Allocator> +[[nodiscard]] constexpr auto operator+( + std::basic_string<charT, traits, Allocator> lhs, + std::type_identity_t<std::basic_string_view<charT, traits>> rhs + ) + -> std::basic_string<charT, traits, Allocator> +{ + return lhs.append(rhs); +} + +template<class charT, class traits, class Allocator> +[[nodiscard]] constexpr auto operator+( + std::type_identity_t<std::basic_string_view<charT, traits>> lhs, + std::basic_string<charT, traits, Allocator> rhs + ) + -> std::basic_string<charT, traits, Allocator> +{ + return rhs.insert(0, lhs); +} + + +//----------------------------------------------------------------------- +// +// Command line handling +// +//----------------------------------------------------------------------- +// + +class cmdline_processor +{ + bool help_requested = false; + + struct arg + { + int pos; + std::string text; + + arg(int p, char* t) : pos{p}, text{t} { } + }; + std::vector<arg> args; + + using callback0 = void (*)(); + using callback1 = void (*)(std::string const&); + struct flag + { + int group = 0; + std::string name; + int unique_prefix = 0; + std::string description; + callback0 handler0; + callback1 handler1; + std::string synonym; + bool opt_out; + + flag(int g, std::string_view n, std::string_view d, callback0 h0, callback1 h1, std::string_view s, bool o) + : group{g}, name{n}, description{d}, handler0{h0}, handler1{h1}, synonym{s}, opt_out{o} + { } + }; + std::vector<flag> flags; + int max_flag_length = 0; + + std::unordered_map<int, std::string> labels = { + { 2, "Additional dynamic safety checks and contract information" }, + { 4, "Support for constrained target environments" }, + { 9, "Other options" } + }; + + // Define this in the main .cpp to avoid bringing <iostream> into the headers, + // so that we can't accidentally start depending on iostreams in the compiler body + static auto print(std::string_view, int width = 0) + -> void; + +public: + auto process_flags() + -> void + { + constexpr auto processed = -1; + + // Calculate the unique prefixes + for (auto flag1 = flags.begin(); flag1 != flags.end(); ++flag1) { + for (auto flag2 = flag1+1; flag2 != flags.end(); ++flag2) { + int i = 0; + while ( + i < std::ssize(flag1->name) + && i < std::ssize(flag2->name) + && flag1->name[i] != ' ' + && flag2->name[i] != ' ' + && flag1->name[i] == flag2->name[i] + ) + { + ++i; + } + // Record that we found the unique prefix must be at least this long + flag1->unique_prefix = std::max( flag1->unique_prefix, i+1 ); + flag2->unique_prefix = std::max( flag2->unique_prefix, i+1 ); + } + } + + // Look for matches + for (auto arg = args.begin(); arg != args.end(); ++arg) + { + // The arg should never be empty, but we're going to do a [0] + // subscript next so we should either check or assert + if (arg->text.empty()) { + continue; + } + + // Provide a way to ignore the rest of the command line + // for the purpose of looking for switches + if (arg->text == "--") { + arg->pos = processed; + break; + } + + for (auto& flag : flags) { + auto length_to_match = std::max(flag.unique_prefix, _as<int>(arg->text.length())-1); + if ( + flag.opt_out + && arg->text.ends_with("-") + ) + { + length_to_match = std::max(flag.unique_prefix, _as<int>(arg->text.length())-2); + } + + // Allow a switch to start with either - or / + if (arg->text.starts_with("-" + flag.name.substr(0, length_to_match)) + || arg->text.starts_with("/" + flag.name.substr(0, length_to_match)) + || arg->text == "-" + flag.synonym + || arg->text == "/" + flag.synonym + ) + { + assert(flag.handler0 || flag.handler1); + + // If this is a standalone switch, just process it + if (flag.handler0) { + flag.handler0(); + } + + // Else + else { + // If this is a switch that could be suffixed with "-" to opt out + if (flag.opt_out) { + flag.handler1( arg->text.ends_with("-") ? "-" : "" ); + } + // Else this is a switch that takes the next arg as its value, so pass that + else { + if (arg+1 == args.end()) { + print("Missing argument to option " + arg->text + " (try -help)\n"); + help_requested = true; + } + arg->pos = processed; + ++arg; // move to next argument, which is the argument to this switch + flag.handler1(arg->text); + } + } + + arg->pos = processed; + break; + } + } + } + + std::erase_if( args, [=](auto& arg){ return arg.pos == processed; } ); + } + + auto print_help() + -> void + { + help_requested = true; + + std::sort( + flags.begin(), + flags.end(), + [](auto& a, auto& b){ return a.group < b.group || (a.group == b.group && a.name < b.name); } + ); + + print("\nUsage: cppfront [options] file ...\n\nOptions:\n"); + int last_group = -1; + for (auto& flag : flags) { + // Skip hidden flags + if (flag.name.front() == '_') { + continue; + } + + if (last_group != flag.group) { + print("\n"); + last_group = flag.group; + if (!labels[flag.group].empty()) { + print( labels[flag.group] + "\n"); + } + } + print(" -"); + auto n = flag.name.substr(0, flag.unique_prefix); + if (flag.unique_prefix < std::ssize(flag.name)) { + auto name_length = _as<int>(std::min(flag.name.find(' '), flag.name.size())); + n += "["; + n += flag.name.substr(flag.unique_prefix, name_length - flag.unique_prefix); + n += "]"; + n += flag.name.substr(name_length); + } + if (flag.opt_out) { + n += "[-]"; + } + if (!flag.synonym.empty()) { + n += ", -" + flag.synonym; + } + print(n, max_flag_length + 3); + print(flag.description); + print("\n"); + } + } + + auto add_flag( + int group, + std::string_view name, + std::string_view description, + callback0 handler0, + callback1 handler1, + std::string_view synonym, + bool opt_out + ) + -> void + { + flags.emplace_back( group, name, description, handler0, handler1, synonym, opt_out ); + auto length = std::ssize(name); + if (opt_out) { length += 3; } // space to print "[-]" + if (max_flag_length < length) { + max_flag_length = length; + } + } + struct register_flag { + register_flag( + int group, + std::string_view name, + std::string_view description, + callback0 handler0, + callback1 handler1 = {}, + std::string_view synonym = {}, + bool opt_out = false + ); + }; + + auto set_args( + int argc, + char* argv[] + ) + -> void + { + for (auto i = 1; i < argc; ++i) { + args.emplace_back( i, argv[i] ); + } + } + + auto help_was_requested() + -> bool + { + return help_requested; + } + + auto arguments() + -> std::vector<arg>& + { + return args; + } + + // This is used only by the owner of the 'main' branch + // to generate stable build version strings + auto gen_version() + -> void + { + help_requested = true; + std::string_view a = __DATE__; + std::string_view b = __TIME__; + std::unordered_map<std::string_view, char> m = { {"Jan",'1'}, {"Feb",'2'}, {"Mar",'3'}, {"Apr",'4'}, {"May",'5'}, {"Jun",'6'}, {"Jul",'7'}, {"Aug",'8'}, {"Sep",'9'}, {"Oct",'A'}, {"Nov",'B'}, {"Dec",'C'} }; + + auto stamp = std::to_string(atoi(&a[9])-15); + stamp += m[a.substr(0, 3)]; + stamp += a.substr(4,2); + stamp += ":"; + stamp += b.substr(0,2); + stamp += b.substr(3,2); + for (auto& c : stamp) { if (c == ' ') { c = '0'; } } + print( "\"" + stamp + "\""); + } + + auto print_version() + -> void + { + help_requested = true; + print("\ncppfront compiler " + #include "version.info" + " Build " + #include "build.info" + ); + print("\nCopyright(c) Herb Sutter All rights reserved\n"); + print("\nSPDX-License-Identifier: CC-BY-NC-ND-4.0"); + print("\n No commercial use"); + print("\n No forks/derivatives"); + print("\n Note: This license emphasizes that this is a personal"); + print("\n experiment; it will be upgraded if that changes\n"); + print("\nAbsolutely no warranty - try at your own risk\n"); + } + +} cmdline; + +cmdline_processor::register_flag::register_flag( + int group, + std::string_view name, + std::string_view description, + callback0 handler0, + callback1 handler1, + std::string_view synonym, + bool opt_out +) +{ + cmdline.add_flag( group, name, description, handler0, handler1, synonym, opt_out ); +} + +static cmdline_processor::register_flag cmd_help ( + 0, + "help", + "Print help", + []{ cmdline.print_help(); }, + nullptr, + "?" +); + +static cmdline_processor::register_flag cmd_version( + 0, + "version", + "Print version information", + []{ cmdline.print_version(); } +); + +static cmdline_processor::register_flag cmd_gen_version( + 0, + "_gen_version", + "Generate version information", + []{ cmdline.gen_version(); } +); + +static auto flag_internal_debug = false; +static cmdline_processor::register_flag cmd_internal_debug( + 0, + "_debug", + "Generate internal debug instrumentation", + []{ flag_internal_debug = true; } +); + + +//----------------------------------------------------------------------- +// +// Internal instrumentation +// +//----------------------------------------------------------------------- +// + +class stackinstr +{ + struct entry + { + ptrdiff_t delta; + ptrdiff_t cumulative; + std::string_view func_name; + std::string_view file; + int line; + char* ptr; + + entry( + std::string_view n, + std::string_view f, + int l, + char* p + ) + : delta { entries.empty() ? 0 : std::abs(entries.back().ptr - p) } + , cumulative{ entries.empty() ? 0 : entries.back().cumulative + delta } + , func_name { n } + , file { f } + , line { l } + , ptr { p } + { } + }; + static std::vector<entry> entries; + static std::vector<entry> deepest; + static std::vector<entry> largest; + + static auto print(auto&& ee, std::string_view label) { + std::cout << "\n=== Stack debug information: " << label << " stack ===\n"; + for (auto& e: ee) + if (e.ptr) { + std::cout + << " " << std::setw(6) + << ((std::abs(e.delta) < 1000000)? std::to_string(e.delta) : "-----") << " " + << std::setw(8) + << ((std::abs(e.delta) < 1000000)? std::to_string(e.cumulative) : "-------") << " " + << e.func_name << " (" << e.file << ":" << e.line << ")\n"; + } + } + +public: + struct guard { + guard( std::string_view name, std::string_view file, int line, char* p ) { + if (flag_internal_debug) { + entries.emplace_back(name, file, line ,p); + if (ssize(deepest) < ssize(entries)) { + deepest = entries; + } + if (largest.empty() || largest.back().cumulative < entries.back().cumulative) { + largest = entries; + } + } + } + ~guard() { + if (flag_internal_debug) { + entries.pop_back(); + } + } + }; + + static auto print_entries() { print( entries, "Current" ); } + static auto print_deepest() { print( deepest, "Deepest" ); } + static auto print_largest() { print( largest, "Largest" ); } +}; + +std::vector<stackinstr::entry> stackinstr::entries; +std::vector<stackinstr::entry> stackinstr::deepest; +std::vector<stackinstr::entry> stackinstr::largest; + +#define STACKINSTR stackinstr::guard _s_guard{ __func__, __FILE__, __LINE__, reinterpret_cast<char*>(&_s_guard) }; + + +} + +#endif diff --git a/CompilerDriver/cc2/source/cpp2util.h b/CompilerDriver/cc2/source/cpp2util.h new file mode 100644 index 0000000..a98d92a --- /dev/null +++ b/CompilerDriver/cc2/source/cpp2util.h @@ -0,0 +1 @@ +#include "../include/cpp2util.h" diff --git a/CompilerDriver/cc2/source/cppfront.cpp b/CompilerDriver/cc2/source/cppfront.cpp new file mode 100644 index 0000000..989a6ad --- /dev/null +++ b/CompilerDriver/cc2/source/cppfront.cpp @@ -0,0 +1,115 @@ + +// Copyright (c) Herb Sutter +// SPDX-License-Identifier: CC-BY-NC-ND-4.0 + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + + +//=========================================================================== +// main - driver +//=========================================================================== + +#include "to_cpp1.h" + +static auto flag_debug_output = false; +static cpp2::cmdline_processor::register_flag cmd_debug( + 9, + "debug", + "Emit compiler debug output", + []{ flag_debug_output = true; } +); + +auto main( + int argc, + char* argv[] +) + -> int +{ + using namespace cpp2; + + cmdline.set_args(argc, argv); + cmdline.process_flags(); + + if (cmdline.help_was_requested()) { + return EXIT_SUCCESS; + } + + if (cmdline.arguments().empty()) { + std::cerr << "cppfront: error: no input files (try -help)\n"; + return EXIT_FAILURE; + } + + // For each Cpp2 source file + int exit_status = EXIT_SUCCESS; + for (auto const& arg : cmdline.arguments()) + { + auto& out = flag_cpp1_filename != "stdout" ? std::cout : std::cerr; + + out << arg.text << "..."; + + // Load + lex + parse + sema + cppfront c(arg.text); + + // Generate Cpp1 (this may catch additional late errors) + auto count = c.lower_to_cpp1(); + + // If there were no errors, say so and generate Cpp1 + if (c.had_no_errors()) + { + if (!c.has_cpp1()) { + out << " ok (all Cpp2, passes safety checks)\n"; + } + else if (c.has_cpp2()) { + out << " ok (mixed Cpp1/Cpp2, Cpp2 code passes safety checks)\n"; + } + else { + out << " ok (all Cpp1)\n"; + } + + if (flag_verbose) { + out << " Cpp1: " << count.cpp1_lines << " line" << (count.cpp1_lines != 1 ? "s" : ""); + out << "\n Cpp2: " << count.cpp2_lines << " line" << (count.cpp2_lines != 1 ? "s" : ""); + auto total = count.cpp1_lines + count.cpp2_lines; + if (total > 0) { + out << " ("; + if (count.cpp2_lines / count.cpp1_lines > 25) { + out << std::setprecision(3) + << 100.0 * count.cpp2_lines / total; + } + else { + out << 100 * count.cpp2_lines / total; + } + out << "%)"; + } + } + + out << "\n"; + } + // Otherwise, print the errors + else + { + std::cerr << "\n"; + c.print_errors(); + std::cerr << "\n"; + exit_status = EXIT_FAILURE; + } + + // And, if requested, the debug information + if (flag_debug_output) { + c.debug_print(); + } + } + + if (flag_internal_debug) { + stackinstr::print_deepest(); + stackinstr::print_largest(); + } + + return exit_status; +} diff --git a/CompilerDriver/cc2/source/io.h b/CompilerDriver/cc2/source/io.h new file mode 100644 index 0000000..cf016a3 --- /dev/null +++ b/CompilerDriver/cc2/source/io.h @@ -0,0 +1,1079 @@ + +// Copyright (c) Herb Sutter +// SPDX-License-Identifier: CC-BY-NC-ND-4.0 + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + + +//=========================================================================== +// Source loader +//=========================================================================== + +#ifndef CPP2_IO_H +#define CPP2_IO_H + +#include "common.h" +#include <fstream> +#include <ostream> +#include <iterator> +#include <cctype> + + +namespace cpp2 { + +//--------------------------------------------------------------------------- +// move_next: advances i as long as p(line[i]) is true or the end of line +// +// line current line being processed +// i current index +// p predicate to apply +// +auto move_next( + std::string const& line, + int& i, + auto p +) + -> bool +{ + while ( + i < ssize(line) + && line[i] + && p(line[i]) + ) + { + ++i; + } + return + i < ssize(line) + && line[i] + ; +} + + +//--------------------------------------------------------------------------- +// peek_first_non_whitespace: returns the first non-whitespace char in line +// +// line current line being processed +// +auto peek_first_non_whitespace(std::string const& line) + -> char +{ + auto i = 0; + + // find first non-whitespace character + if (!move_next(line, i, isspace)) { + return '\0'; + } + + return line[i]; +} + + +//--------------------------------------------------------------------------- +// is_preprocessor: returns whether this is a preprocessor line starting +// with #, and whether it will be followed by another preprocessor line +// +// line current line being processed +// first_line whether this is supposed to be the first line (start with #) +// +struct is_preprocessor_ret { + bool is_preprocessor; + bool has_continuation; +}; +auto is_preprocessor( + std::string const& line, + bool first_line +) + -> is_preprocessor_ret +{ + // see if the first non-whitespace is # + if ( + first_line + && peek_first_non_whitespace(line) != '#' + ) + { + return { false, false }; + } + + // return true iff last character is a \ continuation + return { true, line.back() == '\\' }; +} + + +//--------------------------------------------------------------------------- +// starts_with_import: returns whether the line starts with "import" +// +// line current line being processed +// +auto starts_with_import(std::string const& line) + -> bool +{ + auto i = 0; + + // find first non-whitespace character + if (!move_next(line, i, isspace)) { + return false; + } + + static constexpr auto import_keyword = std::string_view{"import"}; + + // the first token must begin with 'import' + if (!std::string_view(line).substr(i).starts_with(import_keyword)) { + return false; + } + + // and not be immediately followed by an _identifier-continue_ + return !is_identifier_continue(line[i + import_keyword.size()]); +} + + +//--------------------------------------------------------------------------- +// starts_with_whitespace_slash_slash: is this a "// comment" line +// +// line current line being processed +// +auto starts_with_whitespace_slash_slash(std::string const& line) + -> bool +{ + auto i = 0; + + // find first non-whitespace character + if (!move_next(line, i, isspace)) { + return false; + } + + return + i < ssize(line)-1 + && line[i] == '/' + && line[i+1] == '/' + ; +} + + +//--------------------------------------------------------------------------- +// starts_with_whitespace_slash_star_and_no_star_slash: is this a "/* comment" line +// +// line current line being processed +// +auto starts_with_whitespace_slash_star_and_no_star_slash(std::string const& line) + -> bool +{ + auto i = 0; + + // find first non-whitespace character + if (!move_next(line, i, isspace)) { + return false; + } + + if ( + i < ssize(line) - 1 + && line[i] == '/' + && line[i + 1] == '*' + ) + { + return line.find("*/", i) == std::string::npos; + } + else { + return false; + } +} + + +//--------------------------------------------------------------------------- +// starts_with_operator: returns whether the line starts with the string "operator" +// followed by the symbols of an operator +// +// line current line being processed +// +auto starts_with_operator(std::string_view s) + -> int +{ + if (s.starts_with("operator")) + { + auto j = 8; + + // skip any spaces + while ( + j < std::ssize(s) + && isspace(s[j]) + ) + { + ++j; + } + if (j >= std::ssize(s)) { + return 0; + } + + auto c1 = [&]{ if (j < std::ssize(s)) { return s[j ]; } return '\0'; }(); + auto c2 = [&]{ if (j+1 < std::ssize(s)) { return s[j+1]; } return '\0'; }(); + auto c3 = [&]{ if (j+2 < std::ssize(s)) { return s[j+2]; } return '\0'; }(); + + switch (c1) + { + // /= / + // == = + // ! != + // *= * + // %= % + // ^= ^ + // ~= ~ + break;case '/': + case '=': + case '!': + case '*': + case '%': + case '^': + case '~': + if (c2 == '=') { return j+2; } + return j+1; + + // ++ += + + break;case '+': + if (c2 == '=' || c2 == '+') { return j+2; } + return j+1; + + // -- -= -> - + break;case '-': + if (c2 == '=' || c2 == '-' || c2 == '>') { return j+2; } + return j+1; + + // ||= || |= | + // &&= && &= & + break;case '|': + case '&': + if (c2 == c1 && c3 == '=') { return j+3; } + if (c2 == c1 || c2 == '=') { return j+2; } + return j+1; + + // >>= >> >= > + break;case '>': + if (c2 == '>' && c3 == '=') { return j + 3; } + if (c2 == '>' || c2 == '=') { return j + 2; } + return j+1; + + // <<= << <=> <= < + break;case '<': + if (c2 == '<' && c3 == '=') { return j + 3; } + if (c2 == '=' && c3 == '>') { return j + 3; } + if (c2 == '<' || c2 == '=') { return j + 2; } + return j+1; + + break;default: + ; + } + } + + return 0; +} + +//--------------------------------------------------------------------------- +// starts_with_identifier_colon: returns whether the line starts with an +// identifier followed by one colon (not ::) (possibly preceded by an access specifier) +// +// line current line being processed +// +auto starts_with_identifier_colon(std::string const& line) + -> bool +{ + auto i = 0; + + // find first non-whitespace character + if (!move_next(line, i, isspace)) { + return false; + } + + // see if it's an access-specifier + auto s = std::string_view( &line[i], std::ssize(line) - i ); + auto j = 0; + assert (!isspace(s[j])); + if (s.starts_with("public")) { + j += 6; + } + else if (s.starts_with("protected")) { + j += 9; + } + else if (s.starts_with("private")) { + j += 7; + } + while ( + j < std::ssize(s) + && isspace(s[j]) + ) + { + ++j; + } + s.remove_prefix(j); + i += j; + + // see if it's an "operator @" name + j = starts_with_operator(s); + // else see if it's a single identifier + if (j == 0) { + j = starts_with_identifier(s); + } + // if it's neither, bail + if (j == 0) { + return false; + } + i += j; + + if (!move_next(line, i, isalnum)) { + return false; + } + + // find first non-whitespace character + if (!move_next(line, i, isspace)) { + return false; + } + + // it's Cpp2 iff what's here is : not followed by another : + // (e.g., not a Cpp1 "using ::something") + assert (i < ssize(line)); + return + line[i] == ':' + && (i == ssize(line)-1 || line[i+1] != ':') + ; +} + + +//--------------------------------------------------------------------------- +// braces_tracker: to track brace depth +// +// Normally we don't emit diagnostics for Cpp1 code, but we do for a +// brace mismatch since we're relying on balanced {()} to find Cpp2 code +// +class braces_tracker +{ + // to track preprocessor #if brace depth and brace counts + // + class pre_if_depth_info + { + int if_net_braces = 0; + bool found_else = false; + int else_net_braces = 0; + + public: + auto found_open_brace() -> void { + if (!found_else) { ++if_net_braces; } + else { ++else_net_braces; } + } + + auto found_close_brace() -> void { + if (!found_else) { --if_net_braces; } + else { --else_net_braces; } + } + + auto found_preprocessor_else() -> void { + assert (!found_else); + found_else = true; + } + + // If the "if" and "else" branches opened/closed the same net number + // of unbalanced braces, they were double-counted in the brace + // matching and to try to keep going we can apply this adjustment + auto braces_to_ignore() -> int { + if ( + if_net_braces >= 0 + && if_net_braces == else_net_braces + ) + { + return if_net_braces; + } + else { + return 0; + } + } + }; + std::vector<pre_if_depth_info> preprocessor = { {} }; // sentinel + char current_open_type = ' '; + std::vector<lineno_t> open_braces; + std::vector<error_entry>& errors; + +public: + braces_tracker( std::vector<error_entry>& errors ) + : errors{errors} + { } + + // --- Brace matching functions - { and }, or ( and ) + + auto found_open_brace(lineno_t lineno, char brace) -> void { + assert(std::ssize(preprocessor) > 0); + if (open_braces.empty()) { + current_open_type = brace; + } + if (current_open_type == brace) { + open_braces.push_back(lineno); + preprocessor.back().found_open_brace(); + } + } + + auto found_close_brace(source_position pos, char brace) -> void { + assert(std::ssize(preprocessor) > 0); + + if ( + (current_open_type == '{' && brace == '}') + || (current_open_type == '(' && brace == ')') + ) + { + if (std::ssize(open_braces) < 1) { + errors.emplace_back( + pos, + "closing } does not match a prior {" + ); + } + else { + open_braces.pop_back(); + } + + preprocessor.back().found_close_brace(); + } + } + + auto found_eof(source_position pos) const -> void { + // Emit diagnostic if braces didn't match + // + if (current_depth() != 0) { + std::string unmatched_brace_lines; + for (auto i = 0; i < std::ssize(open_braces); ++i) { + if (i > 0 && std::size(open_braces)>2) { unmatched_brace_lines += ","; }; + if (i > 0 && i == std::ssize(open_braces)-1) { unmatched_brace_lines += " and"; }; + unmatched_brace_lines += " " + std::to_string(open_braces[i]); + } + errors.emplace_back( + pos, + std::string("end of file reached with ") + + std::to_string(current_depth()) + + " missing } to match earlier { on line" + + (current_depth() > 1 ? "s" : "") + + unmatched_brace_lines + ); + } + } + + auto current_depth() const -> int { + return std::ssize(open_braces); + } + + // --- Preprocessor matching functions - #if/#else/#endif + + // Entering an #if + auto found_pre_if() -> void { + assert(std::ssize(preprocessor) > 0); + preprocessor.push_back({}); + } + + // Encountered an #else + auto found_pre_else() -> void { + assert(std::ssize(preprocessor) > 1); + preprocessor.back().found_preprocessor_else(); + } + + // Exiting an #endif + auto found_pre_endif() -> void { + assert(std::ssize(preprocessor) > 1); + + // If the #if/#else/#endif introduced the same net number of braces, + // then we will have recorded that number too many open braces, and + // braces_to_ignore() will be the positive number of those net open braces + // that this loop will now throw away + for (auto i = 0; i < preprocessor.back().braces_to_ignore(); ++i) { + found_close_brace( source_position{}, current_open_type == '{' ? '}' : ')' ); + } + + preprocessor.pop_back(); + } +}; + + +//--------------------------------------------------------------------------- +// starts_with_preprocessor_if_else_endif: the line starts with a preprocessor conditional +// +// line current line being processed +// +enum class preprocessor_conditional { + none = 0, pre_if, pre_else, pre_endif +}; +auto starts_with_preprocessor_if_else_endif( + std::string const& line +) + -> preprocessor_conditional +{ + auto i = 0; + + // find first non-whitespace character + if (!move_next(line, i, isspace)) { + return preprocessor_conditional::none; + } + + // if it's not #, this isn't an #if/#else/#endif + if (line[i] != '#') { + return preprocessor_conditional::none; + } + + // find next non-whitespace character + ++i; + if (!move_next(line, i, isspace)) { + return preprocessor_conditional::none; + } + + if (line.substr(i).starts_with("if")) { + return preprocessor_conditional::pre_if; + } + else if (line.substr(i).starts_with("else")) { + return preprocessor_conditional::pre_else; + } + else if (line.substr(i).starts_with("endif")) { + return preprocessor_conditional::pre_endif; + } + else { + return preprocessor_conditional::none; + } +} + + +//--------------------------------------------------------------------------- +// process_cpp_line: just enough to know what to skip over +// +// line current line being processed +// in_comment track whether we're in a comment +// in_string_literal track whether we're in a string literal +// +struct process_line_ret { + bool all_comment_line; + bool empty_line; + bool all_rawstring_line; +}; +auto process_cpp_line( + std::string const& line, + bool& in_comment, + bool& in_string_literal, + bool& in_raw_string_literal, + std::string& raw_string_closing_seq, + braces_tracker& braces, + lineno_t lineno +) + -> process_line_ret +{ + if ( + !in_comment + && !in_string_literal + && !in_raw_string_literal + ) + { + if (starts_with_whitespace_slash_slash(line)) { + return { true, false, false }; + } + else if (starts_with_whitespace_slash_star_and_no_star_slash(line)) { + in_comment = true; + return { true, false, false }; + } + } + + struct process_line_ret r { in_comment, true , in_raw_string_literal}; + + auto prev = ' '; + auto prev2 = ' '; + for (auto i = colno_t{0}; i < ssize(line); ++i) + { + // Local helper functions for readability + // Note: in_literal is for { and } and so doesn't have to work for escaped ' characters + // + auto peek = [&](int num) { return (i+num < std::ssize(line)) ? line[i+num] : '\0'; }; + auto in_literal = [&] { return in_string_literal || in_raw_string_literal || (prev == '\'' && peek(1) == '\''); }; + + // Process this source character + // + if (!isspace(line[i])) { + r.empty_line = false; + } + + if ( + in_comment + && !in_string_literal + && !in_raw_string_literal + ) + { + switch (line[i]) { + break;case '/': if (prev == '*') { in_comment = false; } + break;default: ; + } + } + else if (in_raw_string_literal) { + auto end_pos = line.find(raw_string_closing_seq, i); + if (end_pos == std::string::npos) { + return r; + } + in_raw_string_literal = false; + i = end_pos+raw_string_closing_seq.size()-1; + } + else { + r.all_comment_line = false; + r.all_rawstring_line = false; + switch (line[i]) { + break;case 'R': + if ( + !in_comment + && !in_string_literal + && !in_raw_string_literal + && peek(1) == '"' + ) + { + i+=2; + if (i < ssize(line) - 1) + { + if (auto paren_pos = line.find("(", i); + paren_pos != std::string::npos + ) + { + raw_string_closing_seq = ")"+line.substr(i, paren_pos-i)+"\""; + in_raw_string_literal = true; + } + } + } + + break;case '\"': + // If this isn't an escaped quote, toggle string literal state + if ( + !in_comment + && (prev != '\\' || prev2 == '\\') + && (in_string_literal || prev != '\'') + && !in_raw_string_literal + ) + { + in_string_literal = !in_string_literal; + } + + break;case '{': + if (!in_literal()) { + braces.found_open_brace(lineno, '{'); + } + + break;case '}': + if (!in_literal()) { + braces.found_close_brace(source_position(lineno, i), '}'); + } + + break;case '*': + if ( + !in_string_literal + && !in_raw_string_literal + && prev == '/' + ) + { + in_comment = true; + } + + break;case '/': + if ( + !in_string_literal + && !in_raw_string_literal + && prev == '/' + ) + { + in_comment = false; + return r; + } + + break;default: ; + } + } + + prev2 = prev; + prev = line[i]; + } + + return r; +} + + +//--------------------------------------------------------------------------- +// process_cpp2_line: to find the end of a Cpp2 definition +// - find first of ; and { +// - if ; we're done +// - if { find matching } +// - then there must be nothing else on the last line +// +// line current line being processed +// in_comment whether this line begins inside a multi-line comment +// +// Returns: whether additional lines should be inspected +// +auto process_cpp2_line( + std::string const& line, + bool& in_comment, + braces_tracker& braces, + lineno_t lineno, + std::vector<error_entry>& errors +) + -> bool +{ + auto found_end = false; + + auto prev = ' '; + auto prev2 = ' '; + auto in_string_literal = false; + auto in_char_literal = false; + + for (auto i = colno_t{0}; i < ssize(line); ++i) + { + if (in_comment) + { + switch (line[i]) { + break;case '/': if (prev == '*') { in_comment = false; } + break;default: ; + } + } + else if (in_string_literal) + { + switch (line[i]) { + break;case '"': if (prev != '\\' || prev2 == '\\') { in_string_literal = false; } + break;default: ; + } + } + else if (in_char_literal) + { + switch (line[i]) { + break;case '\'': if (prev != '\\' || prev2 == '\\') { in_char_literal = false; } + break;default: ; + } + } + else + { + switch (line[i]) + { + // For finding Cpp2 definition endings, count () as {} + break;case '{': + case '(': + braces.found_open_brace( lineno, line[i] ); + + break;case '}': + case ')': + braces.found_close_brace( source_position(lineno, i), line[i]); + if (braces.current_depth() < 1 && line[i] != ')') { + found_end = true; + } + + break;case ';': + if (braces.current_depth() < 1) { found_end = true; } + + break;case '*': + if (prev == '/') { + in_comment = true; + if (found_end) { + errors.emplace_back( + source_position(lineno, i), + std::string("alpha limitation:" + " after the closing ; or } of a definition, the rest" + " of the line cannot begin a /*...*/ comment") + ); + } + } + + break;case '/': + if (prev == '/') { in_comment = false; return found_end; } + + break;case '"': + if (prev != '\\' || prev2 == '\\') { in_string_literal = true; } + + break;case '\'': + if (prev != '\\' || prev2 == '\\') { + // Also check that this isn't a digit separator + in_char_literal = !is_hexadecimal_digit(prev); + } + + break;default: ; + } + } + + prev2 = prev; + prev = line[i]; + } + + if (in_char_literal) { + errors.emplace_back( + source_position(lineno, ssize(line)), + std::string("line ended before character literal was terminated") + ); + } + + return found_end; +} + + +//----------------------------------------------------------------------- +// +// source: Represents a program source file +// +//----------------------------------------------------------------------- +// +class source +{ + std::vector<error_entry>& errors; + std::vector<source_line> lines; + bool cpp1_found = false; + bool cpp2_found = false; + + static const int max_line_len = 90'000; + // do not reduce this - I encountered an 80,556-char + // line in real world code during testing + char buf[max_line_len]; + +public: + //----------------------------------------------------------------------- + // Constructor + // + // errors error list + // + source( + std::vector<error_entry>& errors_ + ) + : errors{ errors_ } + , lines( 1 ) // extra blank to avoid off-by-one everywhere + , buf{0} + { + } + + + //----------------------------------------------------------------------- + // has_cpp1: Returns true if this file has some Cpp1/preprocessor lines + // (note: import lines don't count toward Cpp1 or Cpp2) + // + auto has_cpp1() const -> bool { + return cpp1_found; + } + + + //----------------------------------------------------------------------- + // has_cpp2: Returns true if this file has some Cpp2 lines + // (note: import lines don't count toward Cpp1 or Cpp2) + // + auto has_cpp2() const -> bool { + return cpp2_found; + } + + + //----------------------------------------------------------------------- + // load: Read a line-by-line view of 'filename', preserving line breaks + // + // filename the source file to be loaded + // source program textual representation + // + auto load( + std::string const& filename + ) + -> bool + { + std::ifstream in{ filename }; + if (!in.is_open()) { + return false; + } + + auto in_comment = false; + auto in_string_literal = false; + auto in_raw_string_literal = false; + std::string raw_string_closing_seq; + + auto braces = braces_tracker(errors); + + auto add_preprocessor_line = [&] { + lines.push_back({ &buf[0], source_line::category::preprocessor }); + if (auto pre = starts_with_preprocessor_if_else_endif(lines.back().text); + pre != preprocessor_conditional::none + ) + { + switch (pre) { + break;case preprocessor_conditional::pre_if: + braces.found_pre_if(); + break;case preprocessor_conditional::pre_else: + braces.found_pre_else(); + break;case preprocessor_conditional::pre_endif: + braces.found_pre_endif(); + break;default: + assert(false); + } + } + }; + + while (in.getline(&buf[0], max_line_len)) { + + // Handle preprocessor source separately, they're outside the language + // + if (auto pre = is_preprocessor(buf, true); + pre.is_preprocessor + && !in_comment + && !in_raw_string_literal + ) + { + cpp1_found = true; + add_preprocessor_line(); + while ( + pre.has_continuation + && in.getline(&buf[0], max_line_len) + ) + { + add_preprocessor_line(); + pre = is_preprocessor(buf, false); + } + } + + else + { + lines.push_back({ &buf[0], source_line::category::cpp1 }); + + // Switch to cpp2 mode if we're not in a comment, not inside nested { }, + // and the line starts with "nonwhitespace :" but not "::" + // + if (!in_comment + && !in_raw_string_literal + && braces.current_depth() < 1 + && starts_with_identifier_colon(lines.back().text) + ) + { + cpp2_found= true; + + // Mark this line, and preceding comment/blank source, as cpp2 + lines.back().cat = source_line::category::cpp2; + if (std::ssize(lines) > 1) { + auto prev = --std::end(lines); + while ( + --prev != std::begin(lines) + && (prev->cat == source_line::category::empty + || prev->cat == source_line::category::comment) + ) + { + prev->cat = source_line::category::cpp2; + } + } + + // Find the end of the definition: + while ( + !process_cpp2_line( + lines.back().text, + in_comment, + braces, + std::ssize(lines)-1, + errors + ) + && in.getline(&buf[0], max_line_len) + ) + { + lines.push_back({ &buf[0], source_line::category::cpp2 }); + } + } + + // Else still in Cpp1 code, but could be a comment, empty, or import + // + else + { + if (starts_with_import(lines.back().text)) { + lines.back().cat = source_line::category::import; + } + else { + auto stats = process_cpp_line( + lines.back().text, + in_comment, + in_string_literal, + in_raw_string_literal, + raw_string_closing_seq, + braces, + std::ssize(lines) - 1 + ); + if (stats.all_comment_line) { + lines.back().cat = source_line::category::comment; + } + else if (stats.all_rawstring_line) { + lines.back().cat = source_line::category::rawstring; + } + else if (stats.empty_line) { + lines.back().cat = source_line::category::empty; + } + else { + cpp1_found = true; + } + } + } + + } + } + + // Because I encountered very long lines in real-world code during testing + // + if (in.gcount() >= max_line_len-1) + { + errors.emplace_back( + source_position(lineno_t(std::ssize(lines)), 0), + std::string("source line too long - length must be less than ") + + std::to_string(max_line_len) + ); + return false; + } + + // This shouldn't be possible, so check it anyway + // + if (!in.eof()) + { + errors.emplace_back( + source_position(lineno_t(std::ssize(lines)), 0), + std::string("unexpected error reading source lines - did not reach EOF"), + false, + true // a noisy fallback error + ); + return false; + } + + braces.found_eof( source_position(lineno_t(std::ssize(lines)), 0) ); + + return true; + } + + + //----------------------------------------------------------------------- + // get_lines: Access the source lines + // + auto get_lines() -> std::vector<source_line>& + { + return lines; + } + + auto get_lines() const -> std::vector<source_line> const& + { + return lines; + } + + //----------------------------------------------------------------------- + // debug_print + // + auto debug_print(std::ostream& o) const -> void + { + for (auto lineno = 0; auto const& line : lines) { + // Skip dummy first entry + if (lineno > 0) { + if (line.all_tokens_are_densely_spaced) { + o << "+"; + } + else { + o << " "; + } + o << line.prefix() << line.text << '\n'; + } + ++lineno; + } + } + + // No copying + // + source(source const&) = delete; + source& operator=(source const&) = delete; + source(source&&) = delete; + source& operator=(source&&) = delete; +}; + +} + +#endif diff --git a/CompilerDriver/cc2/source/lex.h b/CompilerDriver/cc2/source/lex.h new file mode 100644 index 0000000..478fe3d --- /dev/null +++ b/CompilerDriver/cc2/source/lex.h @@ -0,0 +1,1989 @@ + +// Copyright (c) Herb Sutter +// SPDX-License-Identifier: CC-BY-NC-ND-4.0 + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + + +//=========================================================================== +// Lexer +//=========================================================================== + +#ifndef CPP2_LEX_H +#define CPP2_LEX_H + +#include "io.h" +#include <map> +#include <climits> +#include <deque> +#include <cstring> + + +namespace cpp2 { + +//----------------------------------------------------------------------- +// +// lexeme: represents the type of a token +// +//----------------------------------------------------------------------- +// + +enum class lexeme : std::int8_t { + SlashEq, + Slash, + LeftShiftEq, + LeftShift, + Spaceship, + LessEq, + Less, + RightShiftEq, + RightShift, + GreaterEq, + Greater, + PlusPlus, + PlusEq, + Plus, + MinusMinus, + MinusEq, + Arrow, + Minus, + LogicalOrEq, + LogicalOr, + PipeEq, + Pipe, + LogicalAndEq, + LogicalAnd, + MultiplyEq, + Multiply, + ModuloEq, + Modulo, + AmpersandEq, + Ampersand, + CaretEq, + Caret, + TildeEq, + Tilde, + EqualComparison, + Assignment, + NotEqualComparison, + Not, + LeftBrace, + RightBrace, + LeftParen, + RightParen, + LeftBracket, + RightBracket, + Scope, + Colon, + Semicolon, + Comma, + Dot, + Ellipsis, + QuestionMark, + At, + Dollar, + FloatLiteral, + BinaryLiteral, + DecimalLiteral, + HexadecimalLiteral, + StringLiteral, + CharacterLiteral, + UserDefinedLiteralSuffix, + Keyword, + Cpp1MultiKeyword, + Cpp2FixedType, + Identifier, + None = 127 +}; + +auto is_literal(lexeme l) -> bool { + switch (l) { + break;case lexeme::FloatLiteral: + case lexeme::BinaryLiteral: + case lexeme::DecimalLiteral: + case lexeme::HexadecimalLiteral: + case lexeme::StringLiteral: + case lexeme::CharacterLiteral: return true; + break;default: return false; + } +} + +auto close_paren_type(lexeme l) + -> lexeme +{ + switch (l) { + break;case lexeme::LeftBrace: return lexeme::RightBrace; + break;case lexeme::LeftBracket: return lexeme::RightBracket; + break;case lexeme::LeftParen: return lexeme::RightParen; + break;default: return lexeme::None; + } +} + + +template<typename T> + requires std::is_same_v<T, std::string> +auto _as(lexeme l) + -> std::string +{ + switch (l) { + break;case lexeme::SlashEq: return "SlashEq"; + break;case lexeme::Slash: return "Slash"; + break;case lexeme::LeftShiftEq: return "LeftShiftEq"; + break;case lexeme::LeftShift: return "LeftShift"; + break;case lexeme::Spaceship: return "Spaceship"; + break;case lexeme::LessEq: return "LessEq"; + break;case lexeme::Less: return "Less"; + break;case lexeme::RightShiftEq: return "RightShiftEq"; + break;case lexeme::RightShift: return "RightShift"; + break;case lexeme::GreaterEq: return "GreaterEq"; + break;case lexeme::Greater: return "Greater"; + break;case lexeme::PlusPlus: return "PlusPlus"; + break;case lexeme::PlusEq: return "PlusEq"; + break;case lexeme::Plus: return "Plus"; + break;case lexeme::MinusMinus: return "MinusMinus"; + break;case lexeme::MinusEq: return "MinusEq"; + break;case lexeme::Arrow: return "Arrow"; + break;case lexeme::Minus: return "Minus"; + break;case lexeme::LogicalOrEq: return "LogicalOrEq"; + break;case lexeme::LogicalOr: return "LogicalOr"; + break;case lexeme::PipeEq: return "PipeEq"; + break;case lexeme::Pipe: return "Pipe"; + break;case lexeme::LogicalAndEq: return "LogicalAndEq"; + break;case lexeme::LogicalAnd: return "LogicalAnd"; + break;case lexeme::MultiplyEq: return "MultiplyEq"; + break;case lexeme::Multiply: return "Multiply"; + break;case lexeme::ModuloEq: return "ModuloEq"; + break;case lexeme::Modulo: return "Modulo"; + break;case lexeme::AmpersandEq: return "AmpersandEq"; + break;case lexeme::Ampersand: return "Ampersand"; + break;case lexeme::CaretEq: return "CaretEq"; + break;case lexeme::Caret: return "Caret"; + break;case lexeme::TildeEq: return "TildeEq"; + break;case lexeme::Tilde: return "Tilde"; + break;case lexeme::EqualComparison: return "EqualComparison"; + break;case lexeme::Assignment: return "Assignment"; + break;case lexeme::NotEqualComparison: return "NotEqualComparison"; + break;case lexeme::Not: return "Not"; + break;case lexeme::LeftBrace: return "LeftBrace"; + break;case lexeme::RightBrace: return "RightBrace"; + break;case lexeme::LeftParen: return "LeftParen"; + break;case lexeme::RightParen: return "RightParen"; + break;case lexeme::LeftBracket: return "LeftBracket"; + break;case lexeme::RightBracket: return "RightBracket"; + break;case lexeme::Scope: return "Scope"; + break;case lexeme::Colon: return "Colon"; + break;case lexeme::Semicolon: return "Semicolon"; + break;case lexeme::Comma: return "Comma"; + break;case lexeme::Dot: return "Dot"; + break;case lexeme::Ellipsis: return "Ellipsis"; + break;case lexeme::QuestionMark: return "QuestionMark"; + break;case lexeme::At: return "At"; + break;case lexeme::Dollar: return "Dollar"; + break;case lexeme::FloatLiteral: return "FloatLiteral"; + break;case lexeme::BinaryLiteral: return "BinaryLiteral"; + break;case lexeme::DecimalLiteral: return "DecimalLiteral"; + break;case lexeme::HexadecimalLiteral: return "HexadecimalLiteral"; + break;case lexeme::StringLiteral: return "StringLiteral"; + break;case lexeme::CharacterLiteral: return "CharacterLiteral"; + break;case lexeme::UserDefinedLiteralSuffix: return "UserDefinedLiteralSuffix"; + break;case lexeme::Keyword: return "Keyword"; + break;case lexeme::Cpp1MultiKeyword: return "Cpp1MultiKeyword"; + break;case lexeme::Cpp2FixedType: return "Cpp2FixedType"; + break;case lexeme::Identifier: return "Identifier"; + break;case lexeme::None: return "(NONE)"; + break;default: return "INTERNAL-ERROR"; + } +}; + + +auto is_operator(lexeme l) + -> bool +{ + return l <= lexeme::Not; +} + + +//----------------------------------------------------------------------- +// +// token: represents a single token +// +// Note: by reference, thge test into the program's source lines +// +//----------------------------------------------------------------------- +// +class token +{ +public: + token( + char const* start, + auto count, + source_position pos, + lexeme type + ) + : sv {start, unsafe_narrow<ulong>(count)} + , pos {pos} + , lex_type{type} + { + } + + token( + char const* sz, + source_position pos, + lexeme type + ) + : sv {sz} + , pos {pos} + , lex_type{type} + { + } + + auto as_string_view() const + -> std::string_view + { + assert (sv.data()); + return sv; + } + + operator std::string_view() const + { + return as_string_view(); + } + + auto operator== (token const& t) const + -> bool + { + return operator std::string_view() == t.operator std::string_view(); + } + + auto operator== (std::string_view s) const + -> bool + { + return s == this->operator std::string_view(); + } + + auto to_string() const + -> std::string + { + return std::string{sv}; + } + + friend auto operator<< (auto& o, token const& t) + -> auto& + { + return o << std::string_view(t); + } + + auto position_col_shift( colno_t offset ) + -> void + { + assert (pos.colno + offset > 0); + pos.colno += offset; + } + + auto position() const -> source_position { return pos; } + + auto length () const -> int { return sv.size(); } + + auto type () const -> lexeme { return lex_type; } + + auto set_type(lexeme l) -> void { lex_type = l; } + + auto visit(auto& v, int depth) const + -> void + { + v.start(*this, depth); + } + + auto remove_prefix_if(std::string_view prefix) { + if ( + sv.size() > prefix.size() + && sv.starts_with(prefix) + ) + { + sv.remove_prefix(prefix.size()); + pos.colno += prefix.size(); + } + } + +private: + std::string_view sv; + source_position pos; + lexeme lex_type; +}; + +static_assert (CHAR_BIT == 8); + + +auto labelized_position(token const* t) + -> std::string +{ + auto ret = std::string{}; + if (t) { + ret += + std::to_string(t->position().lineno) + + "_" + + std::to_string(t->position().colno); + } + return ret; +} + + +//----------------------------------------------------------------------- +// +// A StringLiteral could include captures +// +auto expand_string_literal( + std::string_view text, + std::vector<error_entry>& errors, + source_position src_pos +) + -> std::string +{ + auto const length = std::ssize(text); + + assert(length >= 2); + if (text.back() != '"') { + errors.emplace_back( + source_position( src_pos ), + "not a legal string literal", + false, + true // a noisy fallback error message + ); + return {}; + } + + auto pos = 0; + + // Skip prefix to first non-" character + while ( + pos < length + && text[pos] != '"' + ) + { + ++pos; + } + assert( + pos < length + && text[pos] == '"' + ); + ++pos; + auto current_start = pos; // the current offset before which the string has been added to ret + + auto parts = string_parts{std::string(text.substr(0, current_start)), // begin sequence ", U", u8" depends on the string type + "\"", // end sequence + string_parts::on_both_ends}; // add opening and closing sequence to generated string + + bool escape = false; + // Now we're on the first character of the string itself + for ( + ; + pos < length && !(!escape && text[pos] == '"'); + ++pos + ) + { + escape = (text[pos] == '\\' && !escape); + // Find the next )$ + if ( + text[pos] == '$' + && text[pos-1] == ')' + ) + { + // Scan back to find the matching ( + auto paren_depth = 1; + auto open = pos - 2; + + for( ; open > current_start; --open) + { + if (text[open] == ')') { + ++paren_depth; + } + else if (text[open] == '(') { + --paren_depth; + if (paren_depth == 0) { + break; + } + } + } + if (text[open] != '(') + { + errors.emplace_back( + source_position( src_pos.lineno, src_pos.colno + pos ), + "no matching ( for string interpolation ending in )$" + ); + return {}; + } + + // 'open' is now at the matching ( + + // Put the next non-empty non-interpolated chunk straight into ret + if (open != current_start) { + parts.add_string(text.substr(current_start, open - current_start)); + } + + // Then put interpolated chunk into ret + auto chunk = std::string{text.substr(open, pos - open)}; + { // unescape chunk string + auto last_it = std::remove_if( + std::begin(chunk), + std::end(chunk), + [escape = false, prev = ' '](const auto& e) mutable { + escape = !escape && prev != '\'' && e == '\\'; + prev = e; + return escape; + } + ); + chunk.erase(last_it, std::end(chunk)); + } + + // This chunk string is now in the form "(some_capture_text)", + // which might include a :formatter suffix like "(capture_text:formatter)" + + if (std::ssize(chunk) < 1) + { + errors.emplace_back( + source_position( src_pos.lineno, src_pos.colno + pos ), + "string interpolation must not be empty" + ); + return {}; + } + if (chunk.ends_with(':')) + { + errors.emplace_back( + source_position( src_pos.lineno, src_pos.colno + pos ), + "string interpolation ':' must be followed by a std::formatter specifier" + ); + return {}; + } + + // If there's a :formatter suffix, decorate it as: ,"{:formatter}" + if (auto colon = chunk.find_last_of(':'); + colon != chunk.npos + && chunk[colon-1] != ':' // ignore :: scope resolution + ) + { + chunk.insert(colon, ",\"{"); + chunk.insert(chunk.size()-1, "}\""); + } + + parts.add_code("cpp2::to_string" + chunk); + + current_start = pos+1; + } + } + + // Now we should be on the the final " closing the string + assert( + pos == length-1 + && text[pos] == '"' + ); + + // Put the final non-interpolated chunk straight into ret + if (current_start < std::ssize(text)-1) { + parts.add_string(text.substr(current_start, std::ssize(text)-current_start-1)); + } + + return parts.generate(); +} + +auto expand_raw_string_literal( + const std::string& opening_seq, + const std::string& closing_seq, + string_parts::adds_sequences closing_strategy, + std::string_view text, + std::vector<error_entry>& errors, + source_position src_pos +) + -> string_parts +{ + auto const length = std::ssize(text); + auto pos = 0; + auto current_start = pos; // the current offset before which the string has been added to ret + string_parts parts{opening_seq, closing_seq, closing_strategy}; + + // Now we're on the first character of the string itself + for ( ; pos < length; ++pos ) + { + // Find the next )$ + if (text[pos] == '$' && text[pos-1] == ')') + { + // Scan back to find the matching ( + auto paren_depth = 1; + auto open = pos - 2; + + for( ; open > current_start; --open) + { + if (text[open] == ')') { + ++paren_depth; + } + else if (text[open] == '(') { + --paren_depth; + if (paren_depth == 0) { + break; + } + } + } + if (text[open] != '(') + { + errors.emplace_back( + source_position( src_pos.lineno, src_pos.colno + pos ), + "no matching ( for string interpolation ending in )$" + ); + return parts; + } + + // 'open' is now at the matching ( + + // Put the next non-empty non-interpolated chunk straight into ret + if (open != current_start) { + parts.add_string(text.substr(current_start, open - current_start)); + } + // Then put interpolated chunk into ret + parts.add_code("cpp2::to_string" + std::string{text.substr(open, pos - open)}); + + current_start = pos+1; + } + } + + // Put the final non-interpolated chunk straight into ret + if (current_start < std::ssize(text)) { + parts.add_string(text.substr(current_start)); + } + + return parts; +} + +//----------------------------------------------------------------------- +// lex: Tokenize a single line while maintaining inter-line state +// +// mutable_line the line to be tokenized +// lineno the current line number +// in_comment are we currently in a comment +// current_comment the current partial comment +// current_comment_start the current comment's start position +// tokens the token list to add to +// comments the comment token list to add to +// errors the error message list to use for reporting problems +// raw_string_multiline the current optional raw_string state +// + +// A stable place to store additional text for source tokens that are merged +// into a whitespace-containing token (to merge the Cpp1 multi-token keywords) +// -- this isn't about tokens generated later, that's tokens::generated_tokens +static auto generated_text = std::deque<std::string>{}; +static auto generated_lines = std::deque<std::vector<source_line>>{}; + + +static auto multiline_raw_strings = std::deque<multiline_raw_string>{}; + +auto lex_line( + std::string& mutable_line, + int const lineno, + bool& in_comment, + std::string& current_comment, + source_position& current_comment_start, + std::vector<token>& tokens, + std::vector<comment>& comments, + std::vector<error_entry>& errors, + std::optional<raw_string>& raw_string_multiline +) + -> bool +{ + auto const& line = mutable_line; // most accesses will be const, so give that the nice name + + auto original_size = std::ssize(tokens); + + auto i = colno_t{0}; + + // Token merging helpers + // + auto merge_cpp1_multi_token_fundamental_type_names = [&] + { + // If the last token is a non-Cpp1MultiKeyword, we might be at the end + // of a sequence of Cpp1MultiKeyword tokens that need to be merged + + // First, check the last token... only proceed if it is NOT one of those + auto i = std::ssize(tokens)-1; + if ( + i < 0 + || tokens[i].type() == lexeme::Cpp1MultiKeyword + ) + { + return; + } + + // Next, check the two tokens before that... only proceed if they ARE those + --i; + if ( + i < 1 + || tokens[i].type() != lexeme::Cpp1MultiKeyword + || tokens[i-1].type() != lexeme::Cpp1MultiKeyword + ) + { + // If this is just one such token, changed its type to regular ::Keyword + if ( + i >= 0 + && tokens[i].type() == lexeme::Cpp1MultiKeyword + ) + { + tokens[i].set_type( lexeme::Keyword ); + } + return; + } + + // OK, we have found the end of a sequence of 1 or more Cpp1MultiKeywords, so + // replace them with a single synthesized token that contains all their text + // + // Note: It's intentional that this is a kind of token that can contain whitespace + + // Remember the last (non-Cpp1MultiKeyword) token so we can put it back + auto last_token = tokens.back(); + tokens.pop_back(); + + assert(tokens.back().type() == lexeme::Cpp1MultiKeyword); + auto pos = tokens.back().position(); + + auto num_merged_tokens = 0; + auto is_char = 0; + auto is_short = 0; + auto is_int = 0; + auto is_long = 0; + auto is_double = 0; + auto is_signed = 0; + auto is_unsigned = 0; + generated_text.push_back( "" ); + while( + !tokens.empty() + && tokens.back().type() == lexeme::Cpp1MultiKeyword + ) + { + auto text = tokens.back().to_string(); + if (text == "char" ) { ++is_char ; } + if (text == "short" ) { ++is_short ; } + if (text == "int" ) { ++is_int ; } + if (text == "long" ) { ++is_long ; } + if (text == "double" ) { ++is_double ; } + if (text == "signed" ) { ++is_signed ; } + if (text == "unsigned") { ++is_unsigned; } + + if (num_merged_tokens > 0) { + generated_text.back() = " " + generated_text.back(); + } + generated_text.back() = text + generated_text.back(); + pos = tokens.back().position(); + tokens.pop_back(); + ++num_merged_tokens; + } + + tokens.push_back({ + &generated_text.back()[0], + std::ssize(generated_text.back()), + pos, + lexeme::Keyword + }); + + if (num_merged_tokens > 1) + { + auto alt = std::string{}; + if (is_char && is_signed) { alt = "'i8' (usually best) or 'cpp2::_schar'"; } + else if (is_char && is_unsigned) { alt = "'u8' (usually best) or 'cpp2::_uchar'"; } + else if (is_short && !is_unsigned) { alt = "'short'" ; } + else if (is_short && is_unsigned) { alt = "'ushort'" ; } + else if (is_long == 1 && !is_unsigned) { alt = "'long'" ; } + else if (is_long == 1 && is_unsigned) { alt = "'ulong'" ; } + else if (is_long > 1 && !is_unsigned) { alt = "'longlong'" ; } + else if (is_long > 1 && is_unsigned) { alt = "'ulonglong'" ; } + else if (is_int && !is_unsigned) { alt = "'int'" ; } + else if (is_int && is_unsigned) { alt = "'uint'" ; } + else if (is_double && is_long) { alt = "'longdouble'" ; } + + if (std::ssize(alt) > 0) { + errors.emplace_back( + pos, + "'" + tokens.back().to_string() + "' - did you mean " + alt + "?" + ); + } + errors.emplace_back( + pos, + "'" + tokens.back().to_string() + "' is an old-style C/C++ multi-word keyword type\n" + " - most such types should be used only for interoperability with older code\n" + " - using those when you need them is fine, but name them with these short names instead:\n" + " short, ushort, int, uint, long, ulong, longlong, ulonglong, longdouble, _schar, _uchar\n" + " - see also cpp2util.h > \"Convenience names for integer types\"" + ); + } + + tokens.push_back(last_token); + }; + + auto merge_operator_function_names = [&] + { + auto i = std::ssize(tokens)-1; + + // If the third-to-last token is "operator", we may need to + // merge an "operator?" name into a single identifier token + + if ( + i >= 2 + && tokens[i-2] == "operator" + ) + { + // If the tokens after "operator" are ">" and without whitespace one of ">=" ">" "=" + if ( + tokens[i-1].type() == lexeme::Greater + && (tokens[i-1].position() == source_position{tokens[i].position().lineno, tokens[i].position().colno-1}) + && (tokens[i].type() == lexeme::GreaterEq || tokens[i].type() == lexeme::Greater || tokens[i].type() == lexeme::Assignment)) + { + // Merge all three tokens into an identifier + generated_text.push_back( "operator" + tokens[i-1].to_string() + tokens[i].to_string() ); + tokens.pop_back(); + tokens.pop_back(); + auto pos = tokens.back().position(); + tokens.pop_back(); + tokens.push_back({ + &generated_text.back()[0], + std::ssize(generated_text.back()), + pos, + lexeme::Identifier + }); + } + + // Else if token after "operator" is a single-token operator symbol + else if (is_operator(tokens[i-1].type())) + { + // Merge just "operator" + the symbol into an identifier, + generated_text.push_back( "operator" + tokens[i-1].to_string() ); + // and preserve the last token separately + auto last_token = tokens.back(); + + tokens.pop_back(); + tokens.pop_back(); + auto pos = tokens.back().position(); + tokens.pop_back(); + tokens.push_back({ + &generated_text.back()[0], + std::ssize(generated_text.back()), + pos, + lexeme::Identifier + }); + tokens.push_back(last_token); + } + + // Else if token after "operator" is a two-token operator symbol + else if ( + (tokens[i-1].type() == lexeme::LeftParen && tokens[i].type() == lexeme::RightParen) + || (tokens[i-1].type() == lexeme::LeftBracket && tokens[i].type() == lexeme::RightBracket) + ) + { + // Merge just "operator" + the symbols into an identifier, + generated_text.push_back( "operator" + tokens[i-1].to_string() + tokens[i].to_string() ); + + tokens.pop_back(); + tokens.pop_back(); + auto pos = tokens.back().position(); + tokens.pop_back(); + tokens.push_back({ + &generated_text.back()[0], + std::ssize(generated_text.back()), + pos, + lexeme::Identifier + }); + } + + } + }; + + + // Local helper functions for readability + // + auto peek = [&](int num) { + return + (i+num < std::ssize(line) && i+num >= 0) + ? line[i+num] + : '\0'; + }; + + auto store = [&](auto num, lexeme type) + { + tokens.push_back({ + &line[i], + num, + source_position(lineno, i + 1), + type + }); + i += num-1; + + merge_cpp1_multi_token_fundamental_type_names(); + merge_operator_function_names(); + }; + + + //----------------------------------------------------- + // These functions return the length of sequence if + // present at the current location, else 0 + + //G simple-escape-sequence: + //G '\' { any member of the basic character set except u, U, or x } + //G + auto peek_is_simple_escape_sequence = [&](int offset) + { + auto peek1 = peek(offset); + auto peek2 = peek(1 + offset); + if ( + peek1 == '\\' + && peek2 != 'u' + && peek2 != 'U' + && peek2 != 'x' + ) + { + return 2; + } + return 0; + }; + + //G hexadecimal-escape-sequence: + //G '\x' hexadecimal-digit + //G hexadecimal-escape-sequence hexadecimal-digit + //G + auto peek_is_hexadecimal_escape_sequence = [&](int offset) + { + if ( + peek( offset) == '\\' + && peek(1+offset) == 'x' + && is_hexadecimal_digit(peek(2+offset)) + ) + { + auto j = 3; + while ( + peek(j+offset) + && is_hexadecimal_digit(peek(j+offset)) + ) + { + ++j; + } + return j; + } + return 0; + }; + + //G universal-character-name: + //G '\u' hexadecimal-digit hexadecimal-digit hexadecimal-digit hexadecimal-digit + //G '\U' hexadecimal-digit hexadecimal-digit hexadecimal-digit hexadecimal-digit hexadecimal-digit hexadecimal-digit hexadecimal-digit hexadecimal-digit + //G + auto peek_is_universal_character_name = [&](colno_t offset) + { + if ( + peek(offset) == '\\' + && peek(1 + offset) == 'u' + ) + { + auto j = 2; + while ( + j <= 5 + && is_hexadecimal_digit(peek(j + offset)) + ) + { + ++j; + } + if (j == 6) { return j; } + errors.emplace_back( + source_position( lineno, i + offset ), + "invalid universal character name (\\u must" + " be followed by 4 hexadecimal digits)" + ); + } + if ( + peek(offset) == '\\' + && peek(1+offset) == 'U' + ) + { + auto j = 2; + while ( + j <= 9 + && is_hexadecimal_digit(peek(j+offset)) + ) + { + ++j; + } + if (j == 10) { return j; } + errors.emplace_back( + source_position(lineno, i+offset), + "invalid universal character name (\\U must" + " be followed by 8 hexadecimal digits)" + ); + } + return 0; + }; + + //G escape-sequence: + //G hexadecimal-escape-sequence + //G simple-escape-sequence + //G + auto peek_is_escape_sequence = [&](int offset) + { + if (auto h = peek_is_hexadecimal_escape_sequence(offset)) { return h; } + return peek_is_simple_escape_sequence(offset); + }; + + //G s-char: + //G universal-character-name + //G escape-sequence + //G basic-s-char + //G + //G basic-s-char: + //G any member of the basic source character set except '"' '\' or new-line + //G + //G c-char: + //G universal-character-name + //G escape-sequence + //G basic-c-char + //G + //G basic-c-char: + //G any member of the basic source character set except ''' '\' or new-line + //G + auto peek_is_sc_char = [&](int offset, char quote) + { + if (auto u = peek_is_universal_character_name(offset)) { + return u; + } + if (auto e = peek_is_escape_sequence(offset)) { + return e; + } + if ( + peek(offset) + && peek(offset) != quote + && peek(offset) != '\\' + ) + { + return 1; + } + return 0; + }; + + //G keyword: + //G any Cpp1-and-Cpp2 keyword + //G one of: 'import' 'module' 'export' 'is' 'as' + //G + auto do_is_keyword = [&](std::vector<std::string_view> const& r) { + auto remaining_line = std::string_view(line).substr(unsafe_narrow<std::size_t>(i)); + auto m = std::find_if(r.begin(), r.end(), [&](std::string_view s) { + return remaining_line.starts_with(s); + }); + if (m != r.end()) { + // If we matched and what's next is EOL or a non-identifier char, we matched! + if ( + i+std::ssize(*m) == std::ssize(line) // EOL + || !is_identifier_continue(line[unsafe_narrow<std::size_t>(i)+std::size(*m)]) // non-identifier char + ) + { + return static_cast<int>(std::ssize(*m)); + } + } + return 0; + }; + + auto peek_is_keyword = [&] + { + // Cpp2 has a smaller set of the Cpp1 globally reserved keywords, but we continue to + // reserve all the ones Cpp1 has both for compatibility and to not give up a keyword + // Some keywords like "delete" and "union" are not in this list because we reject them elsewhere + // Cpp2 also adds a couple, notably "is" and "as" + static const auto keys = std::vector<std::string_view>{ + "alignas", "alignof", "asm", "as", "auto", + "bool", "break", + "case", "catch", "char16_t", "char32_t", "char8_t", "char", "co_await", "co_return", + "co_yield", "concept", "const_cast", "consteval", "constexpr", "constinit", "const", "continue", + "decltype", "default", "double", "do", "dynamic_cast", + "else", "enum", "explicit", "export", "extern", + "float", "for", "friend", + "goto", + "if", "import", "inline", "int", "is", + "long", + "module", "mutable", + "namespace", "noexcept", + "operator", + "private", "protected", "public", + "register", "reinterpret_cast", "requires", "return", + "short", "signed", "sizeof", "static_assert", "static_cast", "static", "switch", + "template", "this", "thread_local", "throws", "throw", "try", "typedef", "typeid", "typename", + "unsigned", "using", + "virtual", "void", "volatile", + "wchar_t", "while" + }; + + return do_is_keyword(keys); + }; + + auto peek_is_cpp2_fundamental_type_keyword = [&] + { + static const auto keys = std::vector<std::string_view>{ + "i8", "i16", "i32", "i64", "longdouble", "longlong", + "u8", "u16", "u32", "u64", "ulonglong", "ulong", "ushort", + "_schar", "_uchar" + }; + + return do_is_keyword(keys); + }; + + auto peek_is_cpp1_multi_token_fundamental_keyword = [&] + { + static const auto multi_keys = std::vector<std::string_view>{ + "char16_t", "char32_t", "char8_t", "char", + "double", "float", + "int", "long", "short", + "signed", "unsigned" + }; + return do_is_keyword(multi_keys); + }; + + auto reset_processing_of_the_line = [&]() { + // Redo processing of this whole line now that the string is expanded, + // which may have moved it in memory... move i back to the line start + // and _ = any tokens we already tokenized for this line + i = colno_t{-1}; + while ( + !tokens.empty() + && tokens.back().position().lineno == lineno + ) + { + tokens.pop_back(); + } + }; + + auto interpolate_raw_string = [&]( + const std::string& opening_seq, + const std::string& closing_seq, + string_parts::adds_sequences closing_strategy, + std::string_view part, + int pos_to_replace, + int size_to_replace + ) -> bool { + auto parts = expand_raw_string_literal(opening_seq, closing_seq, closing_strategy, part, errors, source_position(lineno, pos_to_replace + 1)); + auto new_part = parts.generate(); + mutable_line.replace( pos_to_replace, size_to_replace, new_part ); + i += std::ssize(new_part)-1; + + if (parts.is_expanded()) { + // raw string was expanded and we need to repeat the processing of this line + reset_processing_of_the_line(); + + // but skipping end of potential multiline raw string that ends on this line + if (!multiline_raw_strings.empty() && multiline_raw_strings.back().end.lineno == lineno) { + i = multiline_raw_strings.back().end.colno; + raw_string_multiline.reset(); + } else if (raw_string_multiline && raw_string_multiline->start.lineno == lineno) { + raw_string_multiline.reset(); + } + return true; + } + return false; + }; + + // + //----------------------------------------------------- + + for ( ; i < ssize(line); ++i) + { + auto peek1 = peek(1); + auto peek2 = peek(2); + auto peek3 = peek(3); + + //G encoding-prefix: one of + //G 'u8' 'u' 'uR' 'u8R' 'U' 'UR' 'L' 'LR' 'R' + //G + auto is_encoding_prefix_and = [&](char next) { + if (line[i] == next) { return 1; } // " + else if (line[i] == 'u') { + if (peek1 == next) { return 2; } // u" + else if (peek1 == '8' && peek2 == next) { return 3; } // u8" + else if (peek1 == 'R' && peek2 == next) { return 3; } // uR" + else if (peek1 == '8' && peek2 == 'R' && peek3 == next) { return 4; } // u8R" + } + else if (line[i] == 'U') { + if ( peek1 == next) { return 2; } // U" + else if (peek1 == 'R' && peek2 == next) { return 3; } // UR" + } + else if (line[i] == 'L') { + if ( peek1 == next ) { return 2; } // L" + else if (peek1 == 'R' && peek2 == next) { return 3; } // LR" + } + else if (line[i] == 'R' && peek1 == next) { return 2; } // R" + return 0; + }; + + // If we're currently in a multiline comment, + // the only thing to look for is the */ comment end + // + if (in_comment) { + switch (line[i]) { + // */ comment end + break;case '*': + if (peek1 == '/') { + current_comment += "*/"; + comments.push_back({ + comment::comment_kind::stream_comment, + current_comment_start, + source_position(lineno, i + 2), + current_comment + }); + in_comment = false; + ++i; + } + break;default: + current_comment += line[i]; + } + } + else if (raw_string_multiline) { + auto end_pos = line.find(raw_string_multiline.value().closing_seq, i); + auto part = line.substr(i, end_pos-i); + + if (const auto& rsm = raw_string_multiline.value(); rsm.should_interpolate) { + + auto closing_strategy = end_pos == line.npos ? string_parts::no_ends : string_parts::on_the_end; + auto size_to_replace = end_pos == line.npos ? std::ssize(line) - i : end_pos - i + std::ssize(rsm.closing_seq); + + if (interpolate_raw_string(rsm.opening_seq, rsm.closing_seq, closing_strategy, part, i, size_to_replace ) ) { + continue; + } + } + // raw string was not expanded + + raw_string_multiline.value().text += part; + if (end_pos == std::string::npos) { + raw_string_multiline.value().text += '\n'; + break; + } + + // here we know that we are dealing with finalized multiline raw string literal + // token needs to use multiline_raw_strings to store string that exists in multiple lines + raw_string_multiline.value().text += raw_string_multiline.value().closing_seq; + + // and position where multiline_raw_string ends (needed for reseting line parsing) + i = end_pos+std::ssize(raw_string_multiline.value().closing_seq)-1; + + const auto& text = raw_string_multiline.value().should_interpolate ? raw_string_multiline.value().text.substr(1) : raw_string_multiline.value().text; + multiline_raw_strings.emplace_back(multiline_raw_string{ text, {lineno, i} }); + + tokens.push_back({ + &multiline_raw_strings.back().text[0], + std::ssize(multiline_raw_strings.back().text), + raw_string_multiline.value().start, + lexeme::StringLiteral + }); + raw_string_multiline.reset(); + continue; + } + + // Otherwise, we will be at the start of a token, a comment, or whitespace + // + else { + //G token: + //G identifier + //G keyword + //G literal + //G operator-or-punctuator + //G + //G operator-or-punctuator: + //G operator + //G punctuator + //G + //G operator: one of + + switch (line[i]) { + + // .* ->* ? aren't currently used in Cpp2, and aren't needed + + // (we do need all the overloadable operators for Cpp1 compat, + // even if we may not keep their meanings for built-in types) + + // /* and // comment starts + //G '/=' '/' + break;case '/': + if (peek1 == '*') { + current_comment = "/*"; + current_comment_start = source_position(lineno, i+1); + in_comment = true; + ++i; + } + else if (peek1 == '/') { + comments.push_back({ + comment::comment_kind::line_comment, + {lineno, i}, + {lineno, _as<colno_t>(std::ssize(line))}, + std::string(&line[i], std::ssize(line) - i) + }); + in_comment = false; + goto END; + } + else if (peek1 == '=') { + store(2, lexeme::SlashEq); + } + else { + store(1, lexeme::Slash); + } + + //G '<<=' '<<' '<=>' '<=' '<' + break;case '<': + if (peek1 == '<') { + if (peek2 == '=') { store(3, lexeme::LeftShiftEq); } + else { store(2, lexeme::LeftShift); } + } + else if (peek1 == '=') { + if (peek2 == '>') { store(3, lexeme::Spaceship); } + else { store(2, lexeme::LessEq); } + } + else { store(1, lexeme::Less); } + + // Note: >> and >>= are not source tokens, they are synthesized from > > and > >= where legal + //G '>>=' '>>' '>=' '>' + break;case '>': + //--------------------------------------------------------- + // Do not uncomment, see above Note + // + //if (peek1 == '>') { + // if (peek2 == '=') { store(3, lexeme::RightShiftEq); } + // else { store(2, lexeme::RightShift); } + //} + //else + //--------------------------------------------------------- + if (peek1 == '=') { store(2, lexeme::GreaterEq); } + else { store(1, lexeme::Greater); } + + //G '++' '+=' '+' + break;case '+': + if (peek1 == '+') { store(2, lexeme::PlusPlus); } + else if (peek1 == '=') { store(2, lexeme::PlusEq); } + else { store(1, lexeme::Plus); } + + //G '--' '-=' '->' '-' + break;case '-': + if (peek1 == '-') { store(2, lexeme::MinusMinus); } + else if (peek1 == '=') { store(2, lexeme::MinusEq); } + else if (peek1 == '>') { store(2, lexeme::Arrow); } + else { store(1, lexeme::Minus); } + + //G '||=' '||' '|=' '|' + break;case '|': + if (peek1 == '|') { + if (peek2 == '=') { store(3, lexeme::LogicalOrEq); } + else { store(2, lexeme::LogicalOr); } + } + else if (peek1 == '=') { store(2, lexeme::PipeEq); } + else { store(1, lexeme::Pipe); } + + //G '&&=' '&&' '&=' '&' + break;case '&': + if (peek1 == '&') { + if (peek2 == '=') { store(3, lexeme::LogicalAndEq); } + else { store(2, lexeme::LogicalAnd); } + } + else if (peek1 == '=') { store(2, lexeme::AmpersandEq); } + else { store(1, lexeme::Ampersand); } + + // Next, all the other operators that have a compound assignment form + + //G '*=' '*' + break;case '*': + if (peek1 == '=') { store(2, lexeme::MultiplyEq); } + else { store(1, lexeme::Multiply); } + + //G '%=' '%' + break;case '%': + if (peek1 == '=') { store(2, lexeme::ModuloEq); } + else { store(1, lexeme::Modulo); } + + //G '^=' '^' + break;case '^': + if (peek1 == '=') { store(2, lexeme::CaretEq); } + else { store(1, lexeme::Caret); } + + //G '~=' '~' + break;case '~': + if (peek1 == '=') { store(2, lexeme::TildeEq); } + else { store(1, lexeme::Tilde); } + + //G '==' '=' + break;case '=': + if (peek1 == '=') { store(2, lexeme::EqualComparison); } + else { store(1, lexeme::Assignment); } + + //G '!=' '!' + break;case '!': + if (peek1 == '=') { store(2, lexeme::NotEqualComparison); } + else { store(1, lexeme::Not); } + + //G + //G punctuator: one of + //G '...' '.' + break;case '.': + if (peek1 == '.' && peek2 == '.') { store(3, lexeme::Ellipsis); } + else { store(1, lexeme::Dot); } + + //G '::' ':' + break;case ':': + if (peek1 == ':') { store(2, lexeme::Scope); } + else { store(1, lexeme::Colon); } + + // All the other single-character tokens + + //G '{' '}' '(' ')' '[' ']' ';' ',' '?' '$' + //G + + break;case '{': + store(1, lexeme::LeftBrace); + + break;case '}': + store(1, lexeme::RightBrace); + + break;case '(': + store(1, lexeme::LeftParen); + + break;case ')': + store(1, lexeme::RightParen); + + break;case '[': + store(1, lexeme::LeftBracket); + + break;case ']': + store(1, lexeme::RightBracket); + + break;case ';': + store(1, lexeme::Semicolon); + + break;case ',': + store(1, lexeme::Comma); + + break; case '?': + store(1, lexeme::QuestionMark); + + break; case '@': + store(1, lexeme::At); + + break;case '$': + if (peek1 == 'R' && peek2 == '"') { + // if peek(j-2) is 'R' it means that we deal with raw-string literal + auto R_pos = i + 1; + auto seq_pos = i + 3; + + if (auto paren_pos = line.find("(", seq_pos); paren_pos != std::string::npos) { + auto opening_seq = line.substr(i, paren_pos - i + 1); + auto closing_seq = ")"+line.substr(seq_pos, paren_pos-seq_pos)+"\""; + + if (auto closing_pos = line.find(closing_seq, paren_pos+1); closing_pos != line.npos) { + if (interpolate_raw_string( + opening_seq, + closing_seq, + string_parts::on_both_ends, + std::string_view(&line[paren_pos+1], closing_pos-paren_pos-1), i, closing_pos-i+std::ssize(closing_seq)) + ) { + continue; + } + + tokens.push_back({ + &line[R_pos], + i - R_pos + 1, + source_position(lineno, R_pos + 1), + lexeme::StringLiteral + }); + } else { + raw_string_multiline.emplace(raw_string{source_position{lineno, i}, opening_seq, opening_seq, closing_seq, true }); + + if (interpolate_raw_string( + opening_seq, + closing_seq, + string_parts::on_the_beginning, + std::string_view(&line[paren_pos+1], std::ssize(line)-(paren_pos+1)), i, std::ssize(line)-i) + ) { + continue; + } + // skip entire raw string opening sequence R" + i = paren_pos; + + // if we are on the end of the line we need to add new line char + if (i+1 == std::ssize(line)) { + raw_string_multiline.value().text += '\n'; + } + } + continue; + } + else { + errors.emplace_back( + source_position(lineno, i + 1), + "invalid new-line in raw string delimiter \"" + std::string(&line[i],3) + + "\" - stray 'R' in program \"" + ); + } + } else { + store(1, lexeme::Dollar); + } + + //G + //G literal: + //G integer-literal + //G character-literal + //G floating-point-literal + //G string-literal + //GTODO boolean-literal + //GTODO pointer-literal + //G + //G integer-literal: + //G binary-literal + //G hexadecimal-literal + //G decimal-literal + //G + //G binary-literal: + //G '0b' binary-digit + //G '0B' binary-digit + //G binary-literal binary-digit + //G binary-literal ''' binary-digit + //G + //G hexadecimal-literal: + //G '0x' hexadecimal-digit + //G '0X' hexadecimal-digit + //G hexadecimal-literal hexadecimal-digit + //G hexadecimal-literal ''' hexadecimal-digit + //G + break;case '0': { + auto j = 3; + if (peek1 == 'b' || peek1 == 'B') { + if (is_binary_digit(peek2)) { + while (is_separator_or(is_binary_digit,peek(j))) { ++j; } + store(j, lexeme::BinaryLiteral); + continue; + } + else { + errors.emplace_back( + source_position(lineno, i), + "binary literal cannot be empty (0B must be followed by binary digits)" + ); + ++i; + } + } + else if (peek1 == 'x' || peek1 == 'X') { + if (is_hexadecimal_digit(peek2)) { + while (is_separator_or(is_hexadecimal_digit,peek(j))) { ++j; } + store(j, lexeme::HexadecimalLiteral); + continue; + } + else { + errors.emplace_back( + source_position(lineno, i), + "hexadecimal literal cannot be empty (0X must be followed by hexadecimal digits)" + ); + ++i; + } + } + } + [[fallthrough]]; + + // NO BREAK: we want 0 to fall through to numeric literal case + // (this will be less kludgy to write with pattern matching) + default: + + if ( + line[i] == 'n' + && peek1 == 'o' + && peek2 == 't' + && isspace(peek3) + ) + { + store(3, lexeme::Not); + } + + //G + //G decimal-literal: + //G digit [uU][lL][lL] + //G decimal-literal digit [uU][lL][lL] + //G decimal-literal ''' digit [uU][lL][lL] + //G + //G floating-point-literal: + //G digit { ' | digit }* . digit ({ ' | digit }*)? ([eE][-+]?digit { ' | digit }*) [fFlL] + //G + //G TODO full grammar & refactor to utility functions with their + //G own unit test rather than inline everything here + //G + else if (is_digit(line[i])) { + auto j = 1; + while (is_separator_or(is_digit,peek(j))) { ++j; } + if ( + (peek(j) != '.' || !is_digit(peek(j+1))) + && peek(j) != 'f' + && peek(j) != 'F' + && peek(j) != 'e' + && peek(j) != 'E' + ) + { + // cf: https://en.cppreference.com/w/cpp/language/integer_literal + // + // TODO: This dumbly slurps the suffixs + // ull/ULL. Suffix parsing should move to a utility + // and be error checked. Best would be to slurp all + // [a-zA-Z] and then validate against a list of + // allowed suffixes. Ideally handle the C++23 size + // suffixes as well. + if (peek(j) == 'u' || peek(j) == 'U') { ++j; } + if (peek(j) == 'l' || peek(j) == 'L') { ++j; } + if (peek(j) == 'l' || peek(j) == 'L') { ++j; } + store(j, lexeme::DecimalLiteral); + } + else { + // cf: https://en.cppreference.com/w/cpp/language/floating_literal + + // slurps the digits after '.' + if (peek(j) == '.') { + ++j; + if (!is_digit(peek(j))) { + errors.emplace_back( + source_position(lineno, i), + "a floating point literal must have at least one digit after the decimal point (can be '.0')" + ); + } + while (is_separator_or(is_digit,peek(j))) { + ++j; + } + } + + // slurp the exponential form marker + if (peek(j) == 'e' || peek(j) == 'E') { + ++j; + if (peek(j) == '-' || peek(j) == '+') { ++j; } + while (is_separator_or(is_digit,peek(j))) { ++j; } + } + + // TODO: This dumbly slurps the suffixes fF or + // lL. Suffix parsing should move to a utility and be + // error checked. Best would be to slurp all [a-zA-Z] + // and then validate against a list of allowed + // suffixes. Ideally handle the C++23 suffixes aswell. + if (peek(j) == 'f' || peek(j) == 'F') { ++j; } + else if (peek(j) == 'l' || peek(j) == 'L') { ++j; } + store(j, lexeme::FloatLiteral); + } + } + + //G string-literal: + //G encoding-prefix? '"' s-char-seq? '"' + //G encoding-prefix? 'R"' d-char-seq? '(' s-char-seq? ')' d-char-seq? '"' + //G + //G s-char-seq: + //G interpolation? s-char + //G interpolation? s-char-seq s-char + //G + //G d-char-seq: + //G d-char + //G + //G interpolation: + //G '(' expression ')' '$' + //G + else if (auto j = is_encoding_prefix_and('\"')) { + // if peek(j-2) is 'R' it means that we deal with raw-string literal + if (peek(j-2) == 'R') { + auto seq_pos = i + j; + + if (auto paren_pos = line.find("(", seq_pos); paren_pos != std::string::npos) { + auto opening_seq = line.substr(i, paren_pos - i + 1); + auto closing_seq = ")"+line.substr(seq_pos, paren_pos-seq_pos)+"\""; + + if (auto closing_pos = line.find(closing_seq, paren_pos+1); closing_pos != line.npos) { + store(closing_pos+std::ssize(closing_seq)-i, lexeme::StringLiteral); + } else { + raw_string_multiline.emplace(raw_string{source_position{lineno, i}, opening_seq, opening_seq, closing_seq }); + // skip entire raw string opening sequence R" + i = paren_pos; + + // if we are on the end of the line we need to add new line char + if (i+1 == std::ssize(line)) { + raw_string_multiline.value().text += '\n'; + } + } + continue; + } + else { + errors.emplace_back( + source_position(lineno, i + j - 2), + "invalid new-line in raw string delimiter \"" + std::string(&line[i],j) + + "\" - stray 'R' in program \"" + ); + } + } + else { + while (auto len = peek_is_sc_char(j, '\"')) { j += len; } + if (peek(j) != '\"') { + errors.emplace_back( + source_position(lineno, i), + "string literal \"" + std::string(&line[i+1],j) + + "\" is missing its closing \"" + ); + } + + // At this point we have a string-literal, but it may contain + // captures/interpolations we want to tokenize + auto literal = std::string_view{ &line[i], std::size_t(j+1) }; + auto s = expand_string_literal( literal, errors, source_position(lineno, i + 1) ); + + // If there are no captures/interpolations, just store it directly and continue + if (std::ssize(s) == j+1) { + store(j+1, lexeme::StringLiteral); + } + // Otherwise, replace it with the expanded version and continue + else { + if (std::ssize(s) <= j + 1) { + errors.emplace_back( + source_position( lineno, i ), + "not a legal string literal", + false, + true // a noisy fallback error message + ); + return {}; + } + mutable_line.replace( i, j+1, s ); + + reset_processing_of_the_line(); + } + } + } + + //G character-literal: + //G encoding-prefix? ''' c-char-seq? ''' + //G + //G c-char-seq: + //G c-char + //G c-char-seq c-char + //G + else if (auto j = is_encoding_prefix_and('\'')) { + auto len = peek_is_sc_char(j, '\''); + if (len > 0) { + j += len; + if (peek(j) != '\'') { + assert (j > 1); + errors.emplace_back( + source_position(lineno, i), + "character literal '" + std::string(&line[i+1],j-1) + + "' is missing its closing '" + ); + } + store(j+1, lexeme::CharacterLiteral); + } + else { + errors.emplace_back( + source_position(lineno, i), + "character literal is empty" + ); + } + } + + // Cpp1 multi-token fundamental type keyword + // + else if (auto j = peek_is_cpp1_multi_token_fundamental_keyword()) { + store(j, lexeme::Cpp1MultiKeyword); + } + + // Cpp2 fixed-width type alias keyword + // + else if (auto j = peek_is_cpp2_fundamental_type_keyword()) { + store(j, lexeme::Cpp2FixedType); + } + + // Other keyword + // + else if (auto j = peek_is_keyword()) { + store(j, lexeme::Keyword); + + if (tokens.back() == "const_cast") { + errors.emplace_back( + source_position(lineno, i), + "'const_cast' is not supported in Cpp2 - the current C++ best practice is to never cast away const, and that is const_cast's only effective use" + ); + } + if (tokens.back() == "static_cast") { + errors.emplace_back( + source_position(lineno, i), + "'static_cast<T>(val)' is not supported in Cpp2 - use 'val as T' for safe conversions instead, or if necessary cpp2::unsafe_narrow<T>(val) for a possibly-lossy narrowing conversion" + ); + } + if (tokens.back() == "dynamic_cast") { + errors.emplace_back( + source_position(lineno, i), + "'dynamic_cast<Derived*>(pBase)' is not supported in Cpp2 - use 'pBase as *Derived' for safe dynamic conversions instead" + ); + } + } + + // Identifier + // + else if (auto j = starts_with_identifier({&line[i], std::size(line)-i})) + { + if ( + !isspace(peek(-1)) + && !tokens.empty() + && is_literal(tokens.back().type()) + ) + { + store(j, lexeme::UserDefinedLiteralSuffix); + } + else + { + store(j, lexeme::Identifier); + + tokens.back().remove_prefix_if("__identifier__"); + + if (tokens.back() == "NULL") { + errors.emplace_back( + source_position(lineno, i), + "'NULL' is not supported in Cpp2 - for a local pointer variable, leave it uninitialized instead, and set it to a non-null value when you have one" + ); + } + if (tokens.back() == "delete") { + errors.emplace_back( + source_position(lineno, i), + "'delete' and owning raw pointers are not supported in Cpp2 - use unique.new<T> or shared.new<T> instead in that order (or, in the future, gc.new<T>, but that is not yet implemented)" + ); + } + } + } + + // Anything else should be whitespace + // + else if (!isspace(line[i])) { + errors.emplace_back( + source_position(lineno, i), + std::string("unexpected text '") + line[i] + "'", + false, + true // a noisy fallback error message + ); + } + } + } + } + +END: + if (in_comment) { + current_comment += "\n"; + } + if (raw_string_multiline && line.size() == 0) { + raw_string_multiline.value().text += '\n'; + } + + assert (std::ssize(tokens) >= original_size); + return std::ssize(tokens) != original_size; +} + + +//----------------------------------------------------------------------- +// +// tokens: a map of the tokens of a source file +// +//----------------------------------------------------------------------- +// + +class tokens +{ + std::vector<error_entry>& errors; + + // All non-comment source tokens go here, which will be parsed in the parser + std::map<lineno_t, std::vector<token>> grammar_map; + + // All comment source tokens go here, which are applied in the lexer + // + // We could put all the tokens in the same map, but that would mean the + // parsing logic would have to remember to skip comments everywhere... + // simpler to keep comments separate, at the smaller cost of traversing + // a second token stream when lowering to Cpp1 to re-interleave comments + std::vector<comment> comments; + + // A stable place to store additional tokens that are synthesized later + std::deque<token> generated_tokens; + +public: + //----------------------------------------------------------------------- + // Constructor + // + // errors error list + // + tokens( + std::vector<error_entry>& errors_ + ) + : errors{ errors_ } + { + } + + + //----------------------------------------------------------------------- + // lex: Tokenize the Cpp2 lines + // + // lines tagged source lines + // is_generated is this generated code + // + auto lex( + std::vector<source_line>& lines, + bool is_generated = false + ) + -> void + { + auto in_comment = false; + auto raw_string_multiline = std::optional<raw_string>(); + + assert (std::ssize(lines) > 0); + auto line = std::begin(lines); + while (line != std::end(lines)) { + + // Skip over non-Cpp2 lines + if (line->cat != source_line::category::cpp2) { + ++line; + continue; + } + + // At this point, we're at the first line of a Cpp2 code section + + // Create new map entry for the section starting at this line, + // and populate its tokens with the tokens in this section + auto lineno = std::distance(std::begin(lines), line); + + // If this is generated code, use negative line numbers to + // inform and assist the printer + if (is_generated) { + lineno -= 10'000; + } + + auto& entry = grammar_map[lineno]; + auto current_comment = std::string{}; + auto current_comment_start = source_position{}; + + for ( + ; + line != std::end(lines) && line->cat == source_line::category::cpp2; + ++line, ++lineno + ) + { + lex_line( + line->text, lineno, + in_comment, current_comment, current_comment_start, + entry, comments, errors, + raw_string_multiline + ); + + // Check whether all the tokens on this line were consecutive + // w/o extra whitespace (separated by 0 or 1 whitespace chars) + if (!entry.empty()) { + for (auto i = std::ssize(entry) - 1; + i > 0; + --i + ) + { + if (entry[i-1].position().lineno != lineno) { + break; + } + + if ( + entry[i].position().lineno == lineno + && entry[i-1].position().colno + entry[i-1].length() + 1 + < entry[i].position().colno + ) + { + line->all_tokens_are_densely_spaced = false; + break; + } + } + } + } + } + } + + + //----------------------------------------------------------------------- + // get_map: Access the token map + // + auto get_map() const + -> auto const& + { + return grammar_map; + } + + + //----------------------------------------------------------------------- + // get_comments: Access the comment list + // + auto get_comments() const + -> auto const& + { + return comments; + } + + + //----------------------------------------------------------------------- + // get_generated: Access the generated tokens + // + auto get_generated() + -> auto& + { + return generated_tokens; + } + + + //----------------------------------------------------------------------- + // num_unprinted_comments: The number of not-yet-printed comments + // + auto num_unprinted_comments() + -> int + { + auto ret = 0; + for (auto const& c : comments) { + if (!c.dbg_was_printed) { + ++ret; + } + } + return ret; + } + + //----------------------------------------------------------------------- + // debug_print + // + auto debug_print(std::ostream& o) const + -> void + { + for (auto const& [lineno, entry] : grammar_map) { + + o << "--- Section starting at line " << lineno << "\n"; + for (auto const& token : entry) { + o << " " << token << " (" << token.position().lineno + << "," << token.position().colno << ") " + << _as<std::string>(token.type()) << "\n"; + } + + } + + o << "--- Comments\n"; + for (auto const& [kind, start, end, text, dbg_ignore] : comments) { + o << " " + << (kind == comment::comment_kind::line_comment ? "// " : "/* ") + << "(" << start.lineno << "," << start.colno << ")" + << "-(" << end.lineno << "," << end.colno << ")" + << " " << text << "\n"; + } + + o << "--- Generated tokens\n"; + for (auto const& token : generated_tokens) { + o << " " << token << " (" << token.position().lineno + << "," << token.position().colno << ") " + << _as<std::string>(token.type()) << "\n"; + } + + o << "--- Generated text\n"; + for (auto const& s : generated_text) { + o << " " << s << "\n"; + } + + } + +}; + +static auto generated_lexers = std::deque<tokens>{}; + +} + +#endif diff --git a/CompilerDriver/cc2/source/parse.h b/CompilerDriver/cc2/source/parse.h new file mode 100644 index 0000000..e8e6daf --- /dev/null +++ b/CompilerDriver/cc2/source/parse.h @@ -0,0 +1,9263 @@ + +// Copyright (c) Herb Sutter +// SPDX-License-Identifier: CC-BY-NC-ND-4.0 + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + + +//=========================================================================== +// Parser +//=========================================================================== + +#ifndef CPP2_PARSE_H +#define CPP2_PARSE_H + +#include "lex.h" +#include <memory> +#include <variant> +#include <iostream> + + +namespace cpp2 { + +auto violates_lifetime_safety = false; + +//----------------------------------------------------------------------- +// Operator categorization +// + +//G prefix-operator: +//G one of '!' '-' '+' +//GT parameter-direction +//G +auto is_prefix_operator(token const& tok) + -> bool +{ + //if (to_passing_style(tok) != passing_style::invalid) { + // return true; + //} + + switch (tok.type()) { + break;case lexeme::Not: + case lexeme::Minus: + case lexeme::Plus: + return true; + break;default: + return false; + } +} + + +//G postfix-operator: +//G one of '++' '--' '*' '&' '~' '$' '...' +//G +auto is_postfix_operator(lexeme l) + -> bool +{ + switch (l) { + break;case lexeme::PlusPlus: + case lexeme::MinusMinus: + case lexeme::Multiply: + case lexeme::Ampersand: + case lexeme::Tilde: + case lexeme::Dollar: + case lexeme::Ellipsis: + return true; + break;default: + return false; + } +} + + +//G assignment-operator: +//G one of '=' '*=' '/=' '%=' '+=' '-=' '>>=' '<<=' '&=' '^=' '|=' +//G +auto is_assignment_operator(lexeme l) + -> bool +{ + switch (l) { + break;case lexeme::Assignment: + case lexeme::MultiplyEq: + case lexeme::SlashEq: + case lexeme::ModuloEq: + case lexeme::PlusEq: + case lexeme::MinusEq: + case lexeme::RightShiftEq: + case lexeme::LeftShiftEq: + case lexeme::AmpersandEq: + case lexeme::CaretEq: + case lexeme::PipeEq: + return true; + break;default: + return false; + } +} + + +//----------------------------------------------------------------------- +// +// Parse tree node types +// +//----------------------------------------------------------------------- +// + +//----------------------------------------------------------------------- +// try_visit +// +// Helper to visit whatever is in a variant where each +// alternative is a smart pointer +// +template <int I> +auto try_visit(auto& variant, auto& visitor, int depth) + -> void +{ + if (variant.index() == I) { + auto const& s = std::get<I>(variant); + assert (s); + s->visit(visitor, depth+1); + } +} + + +struct expression_list_node; +struct id_expression_node; +struct declaration_node; +struct inspect_expression_node; +struct literal_node; +struct template_argument; + + +struct primary_expression_node +{ + enum active { empty=0, identifier, expression_list, id_expression, declaration, inspect, literal }; + std::variant< + std::monostate, + token const*, + std::unique_ptr<expression_list_node>, + std::unique_ptr<id_expression_node>, + std::unique_ptr<declaration_node>, + std::unique_ptr<inspect_expression_node>, + std::unique_ptr<literal_node> + > expr; + // Cache to work around <https://github.com/llvm/llvm-project/issues/73336>. + bool expression_list_is_fold_expression = false; + + + // API + // + auto is_fold_expression() const + -> bool; + + auto is_identifier() const + -> bool; + + auto is_id_expression() const + -> bool; + + auto is_expression_list() const + -> bool; + + auto get_expression_list() const + -> expression_list_node const*; + + auto is_literal() const + -> bool; + + auto template_arguments() const -> std::vector<template_argument> const&; + + auto get_token() const -> token const*; + + auto to_string() const + -> std::string; + + // Internals + // + auto position() const -> source_position; + auto visit(auto& v, int depth) -> void; +}; + + +struct literal_node { + token const* literal = {}; + token const* user_defined_suffix = {}; + + // API + // + auto get_token() const + -> token const* + { + return literal; + } + + auto to_string() const + -> std::string + { + assert (literal); + auto ret = literal->to_string(); + if (user_defined_suffix) { + ret += user_defined_suffix->to_string(); + } + return ret; + } + + // Internals + // + auto position() const + -> source_position + { + assert (literal); + return literal->position(); + } + + auto visit(auto& v, int depth) -> void + { + v.start(*this, depth); + assert (literal); + literal->visit(v, depth+1); + if (user_defined_suffix) { + user_defined_suffix->visit(v, depth+1); + } + v.end(*this, depth); + } +}; + + +struct postfix_expression_node; + +struct prefix_expression_node +{ + std::vector<token const*> ops; + std::unique_ptr<postfix_expression_node> expr; + + // API + // + auto is_fold_expression() const + -> bool; + + auto is_identifier() const + -> bool; + + auto is_id_expression() const + -> bool; + + auto is_expression_list() const + -> bool; + + auto get_expression_list() const + -> expression_list_node const*; + + auto get_postfix_expression_node() const + -> postfix_expression_node * + { + assert(expr); + return expr.get(); + } + + auto is_literal() const + -> bool; + + auto is_result_a_temporary_variable() const -> bool; + + auto to_string() const + -> std::string; + + // Internals + // + auto position() const -> source_position; + auto visit(auto& v, int depth) -> void; +}; + + +struct expression_node; + + +template< + String Name, + typename Term +> +struct binary_expression_node +{ + std::unique_ptr<Term> expr; + expression_node const* my_expression = {}; + + binary_expression_node(); + + struct term + { + token const* op; + std::unique_ptr<Term> expr; + }; + std::vector<term> terms; + + + // API + // + auto is_fold_expression() const + -> bool + { + // This is a fold-expression if any subexpression + // has an identifier named "..." + auto ret = expr->is_fold_expression(); + for (auto& x : terms) { + ret |= x.expr->is_fold_expression(); + } + return ret; + } + + auto lhs_is_id_expression() const + -> bool + { + return expr->is_id_expression(); + } + + auto is_standalone_expression() const + -> bool; + + auto terms_size() const + -> int + { + return std::ssize(terms); + } + + auto is_identifier() const + -> bool + { + return terms.empty() && expr->is_identifier(); + } + + auto is_id_expression() const + -> bool + { + return terms.empty() && expr->is_id_expression(); + } + + auto is_expression_list() const + -> bool + { + return terms.empty() && expr->is_expression_list(); + } + + auto get_expression_list() const + -> expression_list_node const* + { + if (is_expression_list()) { + return expr->get_expression_list(); + } + return {}; + } + + auto is_literal() const + -> bool + { + return terms.empty() && expr->is_literal(); + } + + // Get left-hand postfix-expression + auto get_postfix_expression_node() const + -> postfix_expression_node * + { + assert(expr); + return expr->get_postfix_expression_node(); + } + + // Get first right-hand postfix-expression, if there is one + auto get_second_postfix_expression_node() const + -> postfix_expression_node * + { + if (!terms.empty()) { + assert(terms.front().expr); + return terms.front().expr->get_postfix_expression_node(); + } + // else + return {}; + } + + // "Simple" means binary (size>0) and not chained (size<2) + struct get_lhs_rhs_if_simple_binary_expression_with_ret { + postfix_expression_node* lhs; + Term* rhs; + }; + auto get_lhs_rhs_if_simple_binary_expression_with(lexeme op) const + -> get_lhs_rhs_if_simple_binary_expression_with_ret + { + if ( + std::ssize(terms) == 1 + && terms[0].op->type() == op + ) + { + return { + get_postfix_expression_node(), + terms.front().expr.get() + }; + } + // Else + return { nullptr, nullptr }; + } + + auto is_result_a_temporary_variable() const -> bool { + if constexpr (std::string_view(Name.value) == "assignment") { + assert(expr); + return expr->is_result_a_temporary_variable(); + } else { + if (terms.empty()) { + assert(expr); + return expr->is_result_a_temporary_variable(); + } else { + return true; + } + } + } + + auto to_string() const + -> std::string + { + assert (expr); + auto ret = expr->to_string(); + for (auto const& x : terms) { + assert (x.op); + ret += " " + x.op->to_string(); + assert (x.expr); + ret += " " + x.expr->to_string(); + } + return ret; + } + + + // Internals + // + auto position() const + -> source_position + { + assert (expr); + return expr->position(); + } + + auto visit(auto& v, int depth) + -> void + { + v.start(*this, depth); + assert (expr); + expr->visit(v, depth+1); + for (auto const& x : terms) { + assert (x.op); + v.start(*x.op, depth+1); + assert (x.expr); + x.expr->visit(v, depth+1); + } + v.end(*this, depth); + } +}; + + +struct is_as_expression_node; + +using multiplicative_expression_node = binary_expression_node< "multiplicative" , is_as_expression_node >; +using additive_expression_node = binary_expression_node< "additive" , multiplicative_expression_node >; +using shift_expression_node = binary_expression_node< "shift" , additive_expression_node >; +using compare_expression_node = binary_expression_node< "compare" , shift_expression_node >; +using relational_expression_node = binary_expression_node< "relational" , compare_expression_node >; +using equality_expression_node = binary_expression_node< "equality" , relational_expression_node >; +using bit_and_expression_node = binary_expression_node< "bit-and" , equality_expression_node >; +using bit_xor_expression_node = binary_expression_node< "bit-xor" , bit_and_expression_node >; +using bit_or_expression_node = binary_expression_node< "bit-or" , bit_xor_expression_node >; +using logical_and_expression_node = binary_expression_node< "logical-and" , bit_or_expression_node >; +using logical_or_expression_node = binary_expression_node< "logical-or" , logical_and_expression_node >; +using assignment_expression_node = binary_expression_node< "assignment" , logical_or_expression_node >; + + +struct assignment_expression_lhs_rhs { + postfix_expression_node* lhs; + logical_or_expression_node* rhs; +}; + + +struct expression_statement_node; + +struct expression_node +{ + static inline std::vector<expression_node*> current_expressions = {}; + + std::unique_ptr<assignment_expression_node> expr; + int num_subexpressions = 0; + expression_statement_node const* my_statement = {}; + + expression_node(); + + // API + // + auto is_fold_expression() const + -> bool + { + // This is a fold-expression if any subexpression + // has an identifier named "..." + return expr->is_fold_expression(); + } + + auto is_standalone_expression() const + -> bool; + + auto subexpression_count() const + -> int + { + return num_subexpressions; + } + + auto is_identifier() const + -> bool + { + return expr->is_identifier(); + } + + auto is_id_expression() const + -> bool + { + return expr->is_id_expression(); + } + + auto is_expression_list() const + -> bool + { + return expr->is_expression_list(); + } + + auto get_expression_list() const + -> expression_list_node const* + { + if (is_expression_list()) { + return expr->get_expression_list(); + } + return {}; + } + + auto is_literal() const + -> bool + { + return expr->is_literal(); + } + + auto get_lhs_rhs_if_simple_assignment() const + -> assignment_expression_lhs_rhs; + + auto to_string() const + -> std::string + { + assert (expr); + return expr->to_string(); + } + + // Internals + // + auto position() const -> source_position + { + assert (expr); + return expr->position(); + } + + auto visit(auto& v, int depth) -> void + { + v.start(*this, depth); + assert (expr); + expr->visit(v, depth+1); + v.end(*this, depth); + } +}; + + +template< + String Name, + typename Term +> +binary_expression_node<Name, Term>::binary_expression_node() { + if (!expression_node::current_expressions.empty()) { + my_expression = expression_node::current_expressions.back(); + } +} + + +template< + String Name, + typename Term +> +auto binary_expression_node<Name, Term>::is_standalone_expression() const + -> bool +{ + return + my_expression + && my_expression->is_standalone_expression() + ; +} + + +enum class passing_style { in=0, copy, inout, out, move, forward, invalid }; +auto to_passing_style(token const& t) -> passing_style { + if (t.type() == lexeme::Identifier) { + if (t == "in" ) { return passing_style::in; } + if (t == "copy" ) { return passing_style::copy; } + if (t == "inout" ) { return passing_style::inout; } + if (t == "out" ) { return passing_style::out; } + if (t == "move" ) { return passing_style::move; } + if (t == "forward") { return passing_style::forward; } + } + return passing_style::invalid; +} +auto to_string_view(passing_style pass) -> std::string_view { + switch (pass) { + break;case passing_style::in : return "in"; + break;case passing_style::copy : return "copy"; + break;case passing_style::inout : return "inout"; + break;case passing_style::out : return "out"; + break;case passing_style::move : return "move"; + break;case passing_style::forward: return "forward"; + break;default : return "INVALID passing_style"; + } +} + + +struct expression_list_node +{ + token const* open_paren = {}; + token const* close_paren = {}; + bool inside_initializer = false; + + struct term { + passing_style pass = {}; + std::unique_ptr<expression_node> expr; + + auto visit(auto& v, int depth) -> void + { + v.start(*this, depth); + assert(expr); + expr->visit(v, depth+1); + v.end(*this, depth); + } + }; + std::vector< term > expressions; + + + // API + // + auto is_fold_expression() const + -> bool + { + // This is a fold-expression if any subexpression + // has an identifier named "..." + auto ret = false; + for (auto& x : expressions) { + ret |= x.expr->is_fold_expression(); + } + return ret; + } + + + // Internals + // + auto position() const + -> source_position + { + // Make sure this got set + assert (open_paren); + return open_paren->position(); + } + + auto visit(auto& v, int depth) + -> void + { + v.start(*this, depth); + for (auto& x : expressions) { + x.visit(v, depth+1); + } + v.end(*this, depth); + } +}; + +auto primary_expression_node::is_identifier() const + -> bool +{ + return expr.index() == identifier; +} + +auto primary_expression_node::is_id_expression() const + -> bool +{ + return expr.index() == id_expression; +} + +auto primary_expression_node::is_expression_list() const + -> bool +{ + return expr.index() == expression_list; +} + +auto primary_expression_node::get_expression_list() const + -> expression_list_node const* +{ + if (is_expression_list()) { + return std::get<expression_list>(expr).get(); + } + return {}; +} + +auto primary_expression_node::is_literal() const + -> bool +{ + return expr.index() == literal; +} + + +struct expression_statement_node +{ + static inline std::vector<expression_statement_node*> current_expression_statements = {}; + + std::unique_ptr<expression_node> expr; + bool has_semicolon = false; + + // API + // + auto subexpression_count() const + -> int + { + assert (expr); + return expr->subexpression_count(); + } + + auto to_string() const + -> std::string + { + assert (expr); + return expr->to_string(); + } + + // Internals + // + auto position() const + -> source_position + { + assert (expr); + return expr->position(); + } + + auto visit(auto& v, int depth) + -> void + { + v.start(*this, depth); + assert (expr); + expr->visit(v, depth+1); + v.end(*this, depth); + } +}; + + +auto expression_node::is_standalone_expression() const + -> bool +{ + return + my_statement + && my_statement->subexpression_count() == subexpression_count() + ; +} + + +struct capture { + postfix_expression_node* capture_expr; + std::string cap_sym = {}; + std::string str = {}; + std::string str_suppressed_move = {}; + auto operator==(postfix_expression_node* p) { return capture_expr == p; } +}; + +struct capture_group { + std::vector<capture> members; + + auto add(postfix_expression_node* p) + -> void + { + members.push_back({p}); + } + + auto remove(postfix_expression_node* p) + -> void; + + ~capture_group(); +}; + + +struct postfix_expression_node +{ + std::unique_ptr<primary_expression_node> expr; + + struct term + { + token const* op; + + // This is used if *op is . - can be null + std::unique_ptr<id_expression_node> id_expr = {}; + + // These are used if *op is [ or ( - can be null + std::unique_ptr<expression_list_node> expr_list = {}; + token const* op_close = {}; + }; + std::vector<term> ops; + capture_group* cap_grp = {}; + + ~postfix_expression_node(); + + // API + // + auto is_fold_expression() const + -> bool + { + // This is a fold-expression if any subexpression + // has an identifier named "..." + return expr->is_fold_expression(); + } + + auto is_identifier() const + -> bool + { + return ops.empty() && expr->is_identifier(); + } + + auto is_id_expression() const + -> bool + { + return ops.empty() && expr->is_id_expression(); + } + + auto is_expression_list() const + -> bool + { + return ops.empty() && expr->is_expression_list(); + } + + auto get_expression_list() const + -> expression_list_node const* + { + if (is_expression_list()) { + return expr->get_expression_list(); + } + return {}; + } + + auto is_literal() const + -> bool + { + return ops.empty() && expr->is_literal(); + } + + auto get_first_token_ignoring_this() const + -> token const*; + + auto is_result_a_temporary_variable() const -> bool { + if (ops.empty()) { + return false; + } else { + return (ops.front().op->type() == lexeme::Ampersand + || ops.front().op->type() == lexeme::Tilde); + } + } + + auto to_string() const + -> std::string; + + // Internals + // + auto position() const -> source_position + { + assert (expr); + return expr->position(); + } + + auto visit(auto& v, int depth) -> void; +}; + +auto prefix_expression_node::is_fold_expression() const + -> bool +{ + // This is a fold-expression if any subexpression + // has an identifier named "..." + return expr->is_fold_expression(); +} + +auto prefix_expression_node::is_identifier() const + -> bool +{ + return ops.empty() && expr->is_identifier(); +} + +auto prefix_expression_node::is_id_expression() const + -> bool +{ + return ops.empty() && expr->is_id_expression(); +} + +auto prefix_expression_node::is_expression_list() const + -> bool +{ + return ops.empty() && expr->is_expression_list(); +} + +auto prefix_expression_node::get_expression_list() const + -> expression_list_node const* +{ + if (is_expression_list()) { + return expr->get_expression_list(); + } + return {}; +} + +auto prefix_expression_node::is_literal() const + -> bool +{ + return ops.empty() && expr->is_literal(); +} + +auto prefix_expression_node::is_result_a_temporary_variable() const -> bool { + if (ops.empty()) { + return expr->is_result_a_temporary_variable(); + } else { + return true; + } +} + + +auto expression_node::get_lhs_rhs_if_simple_assignment() const + -> assignment_expression_lhs_rhs +{ + auto ret = expr->get_lhs_rhs_if_simple_binary_expression_with(lexeme::Assignment); + return { ret.lhs, ret.rhs }; +} + + +auto capture_group::remove(postfix_expression_node* p) + -> void +{ + p->cap_grp = {}; + auto old_size = members.size(); + std::erase(members, p); + assert (members.size() == old_size-1); +} + + +capture_group::~capture_group() +{ + assert (members.empty()); + // We shouldn't need to do this: + // while (!members.empty()) { + // remove(members.front().capture_expr); + // } + // if the capture_group outlives the tree of things that can point to it + // => each node with a capture_group should declare it as the first member + // before any other node that could own a postfix_expression that could + // point back up to that capture_group +} + + +auto prefix_expression_node::to_string() const + -> std::string +{ + auto ret = std::string{}; + + for (auto const& x : ops) { + assert (x); + ret += x->as_string_view(); + } + + assert (expr); + return ret + expr->to_string(); +} + + +auto prefix_expression_node::position() const + -> source_position +{ + if (std::ssize(ops) > 0) { + return ops.front()->position(); + } + assert (expr); + return expr->position(); +} + + +auto prefix_expression_node::visit(auto& v, int depth) + -> void +{ + v.start(*this, depth); + for (auto const& x : ops) { + assert (x); + v.start(*x, depth+1); + } + assert (expr); + expr->visit(v, depth+1); + v.end(*this, depth); +} + + +struct type_id_node; +struct template_args_tag { }; + +struct template_argument +{ + enum active { empty=0, expression, type_id }; + source_position comma; + std::variant< + std::monostate, + std::unique_ptr<expression_node>, + std::unique_ptr<type_id_node> + > arg; + + auto to_string() const + -> std::string; +}; + +// Used by functions that must return a reference to an empty arg list +inline std::vector<template_argument> const no_template_args; + +struct unqualified_id_node +{ + token const* identifier = {}; // required + + // These are used only if it's a template-id + source_position open_angle = {}; + source_position close_angle = {}; + + std::vector<template_argument> template_args; + + auto template_arguments() const + -> std::vector<template_argument> const& + { + return template_args; + } + + auto get_token() const + -> token const* + { + if (open_angle == source_position{}) { + assert (identifier); + return identifier; + } + // else + return {}; + } + + auto to_string() const + -> std::string; + + auto position() const + -> source_position + { + assert (identifier); + return identifier->position(); + } + + auto visit(auto& v, int depth) + -> void + { + v.start(*this, depth); + assert (identifier); + v.start(*identifier, depth+1); + + if (open_angle != source_position{}) { + // Inform the visitor that this is a template args list + v.start(template_args_tag{}, depth); + assert(open_angle != source_position{}); + assert(close_angle != source_position{}); + assert(template_args.empty() + || template_args.front().comma == source_position{}); + for (auto& a : template_args) { + try_visit<template_argument::expression>(a.arg, v, depth+1); + try_visit<template_argument::type_id >(a.arg, v, depth+1); + } + v.end(template_args_tag{}, depth); + } + + v.end(*this, depth); + } +}; + + +struct qualified_id_node +{ + struct term { + token const* scope_op; + std::unique_ptr<unqualified_id_node> id = {}; + + term( token const* o ) : scope_op{o} { } + }; + std::vector<term> ids; + + auto template_arguments() const + -> std::vector<template_argument> const& + { + return ids.back().id->template_arguments(); + } + + auto get_token() const + -> token const* + { + if ( + std::ssize(ids) == 1 + && !ids.front().scope_op + ) + { + assert (ids.front().id); + return ids.front().id->get_token(); + } + // else + return {}; + } + + auto to_string() const + -> std::string + { + auto ret = std::string{}; + for (auto& term : ids) { + if (term.scope_op) { + ret += term.scope_op->as_string_view(); + } + assert (term.id); + ret += term.id->to_string(); + } + return ret; + } + + auto get_first_token() const + -> token const* + { + assert ( + !ids.empty() + && ids.front().id + ); + return ids.front().id->get_token(); + } + + auto position() const + -> source_position + { + assert (!ids.empty()); + if (ids.front().scope_op) { + return ids.front().scope_op->position(); + } + else { + assert (ids.front().id); + return ids.front().id->position(); + } + } + + auto visit(auto& v, int depth) + -> void + { + v.start(*this, depth); + for (auto const& x : ids) { + if (x.scope_op) { + x.scope_op->visit(v, depth+1); + } + assert(x.id); + x.id->visit(v, depth+1); + } + v.end(*this, depth); + } +}; + + +struct type_id_node +{ + source_position pos; + + std::vector<token const*> pc_qualifiers; + token const* address_of = {}; + token const* dereference_of = {}; + int dereference_cnt = {}; + token const* suspicious_initialization = {}; + + enum active { empty=0, qualified, unqualified, keyword }; + std::variant< + std::monostate, + std::unique_ptr<qualified_id_node>, + std::unique_ptr<unqualified_id_node>, + token const* + > id; + + auto is_wildcard() const + -> bool + { + return + id.index() == type_id_node::empty + || (get_token() && *get_token() == "_") + ; + } + + auto is_pointer_qualified() const + -> bool + { + for (auto q : pc_qualifiers) { + if (q->type() == lexeme::Multiply) { + return true; + } + } + return false; + } + + auto is_concept() const + -> bool + { + auto tok = get_token(); + return tok && *tok == "concept"; + } + + auto template_arguments() const + -> std::vector<template_argument> const& + { + if (id.index() == unqualified) { + return std::get<unqualified>(id)->template_arguments(); + } + // else + return std::get<qualified>(id)->template_arguments(); + } + + auto to_string() const + -> std::string + { + switch (id.index()) { + break;case empty: + return {}; + break;case qualified: + return std::get<qualified>(id)->to_string(); + break;case unqualified: + return std::get<unqualified>(id)->to_string(); + break;case keyword: + return std::get<keyword>(id)->to_string(); + break;default: + assert(!"ICE: invalid type_id state"); + } + // else + return {}; + } + + auto get_token() const + -> token const* + { + switch (id.index()) { + break;case empty: + return {}; + break;case qualified: + return {}; + break;case unqualified: + return get<unqualified>(id)->get_token(); + break;case keyword: + return get<keyword>(id); + break;default: + assert(!"ICE: invalid type_id state"); + } + // else + return {}; + } + + auto position() const + -> source_position + { + return pos; + } + + auto visit(auto& v, int depth) + -> void + { + v.start(*this, depth); + for (auto q : pc_qualifiers) { + v.start(*q, depth+1); + } + try_visit<qualified >(id, v, depth); + try_visit<unqualified>(id, v, depth); + try_visit<keyword >(id, v, depth); + v.end(*this, depth); + } +}; + +auto unqualified_id_node::to_string() const + -> std::string +{ + assert(identifier); + auto ret = identifier->to_string(); + if (open_angle != source_position{}) { + auto separator = std::string{"<"}; + for (auto& t : template_args) { + ret += separator; + assert(t.arg.index() != template_argument::empty); + if (t.arg.index() == template_argument::expression) { + ret += std::get<template_argument::expression>(t.arg)->to_string(); + } + else if (t.arg.index() == template_argument::type_id) { + ret += std::get<template_argument::type_id>(t.arg)->to_string(); + } + separator = ","; + } + if (std::ssize(template_args) > 0) { + ret += ">"; + } + } + return ret; +} + +auto template_argument::to_string() const + -> std::string +{ + switch (arg.index()) { + break;case empty: + return {}; + break;case expression: + return std::get<expression>(arg)->to_string(); + break;case type_id: + return std::get<type_id>(arg)->to_string(); + break;default: + assert(!"ICE: invalid template_argument state"); + } + // else + return {}; +} + + +struct is_as_expression_node +{ + std::unique_ptr<prefix_expression_node> expr; + + struct term + { + token const* op = {}; + + // This is used if *op is a type - can be null + std::unique_ptr<type_id_node> type = {}; + + // This is used if *op is an expression - can be null + std::unique_ptr<expression_node> expr = {}; + }; + std::vector<term> ops; + + + // API + // + auto is_fold_expression() const + -> bool + { + // This is a fold-expression if any subexpression + // has an identifier named "..." + return expr->is_fold_expression(); + } + + auto is_identifier() const + -> bool + { + return ops.empty() && expr->is_identifier(); + } + + auto is_id_expression() const + -> bool + { + return ops.empty() && expr->is_id_expression(); + } + + auto is_expression_list() const + -> bool + { + return ops.empty() && expr->is_expression_list(); + } + + auto get_expression_list() const + -> expression_list_node const* + { + if (is_expression_list()) { + return expr->get_expression_list(); + } + return {}; + } + + auto is_literal() const + -> bool + { + return ops.empty() && expr->is_literal(); + } + + auto get_postfix_expression_node() const + -> postfix_expression_node * + { + assert(expr); + return expr->get_postfix_expression_node(); + } + + auto is_result_a_temporary_variable() const -> bool { + if (ops.empty()) { + assert(expr); + return expr->is_result_a_temporary_variable(); + } else { + return true; + } + } + + auto to_string() const + -> std::string + { + assert (expr); + auto ret = expr->to_string(); + for (auto const& x : ops) { + assert (x.op); + ret += " " + x.op->to_string(); + if (x.type) { + ret += " " + x.type->to_string(); + } + if (x.expr) { + ret += " " + x.expr->to_string(); + } + } + return ret; + } + + // Internals + // + auto position() const + -> source_position + { + assert (expr); + return expr->position(); + } + + auto visit(auto& v, int depth) + -> void + { + v.start(*this, depth); + assert (expr); + expr->visit(v, depth+1); + for (auto const& x : ops) { + assert (x.op); + v.start(*x.op, depth+1); + if (x.type) { + x.type->visit(v, depth+1); + } + if (x.expr) { + x.expr->visit(v, depth+1); + } + } + v.end(*this, depth); + } +}; + + +expression_node::expression_node() +{ + if (!expression_statement_node::current_expression_statements.empty()) { + my_statement = expression_statement_node::current_expression_statements.back(); + } +} + + +struct id_expression_node +{ + source_position pos; + + enum active { empty=0, qualified, unqualified }; + std::variant< + std::monostate, + std::unique_ptr<qualified_id_node>, + std::unique_ptr<unqualified_id_node> + > id; + + auto template_arguments() const + -> std::vector<template_argument> const& + { + if (is_unqualified()) { + return std::get<unqualified>(id)->template_arguments(); + } + // else + return std::get<qualified>(id)->template_arguments(); + } + + auto is_fold_expression() const + -> bool + { + // This is a fold-expression if any subexpression has + // has an identifier named "..." + auto tok = get_token(); + return tok && *tok == "..."; + } + + auto is_empty() const + -> bool + { + return id.index() == empty; + } + + auto is_qualified() const + -> bool + { + return id.index() == qualified; + } + + auto is_unqualified() const + -> bool + { + return id.index() == unqualified; + } + + auto get_token() const + -> token const* + { + if (id.index() == unqualified) { + return std::get<unqualified>(id)->get_token(); + } + // else + return {}; + } + + auto to_string() const + -> std::string + { + if (id.index() == qualified) { + return std::get<qualified>(id)->to_string(); + } + else if (id.index() == unqualified) { + return std::get<unqualified>(id)->to_string(); + } + // else + return {}; + } + + auto position() const + -> source_position + { + return pos; + } + + auto visit(auto& v, int depth) + -> void + { + v.start(*this, depth); + try_visit<qualified >(id, v, depth); + try_visit<unqualified>(id, v, depth); + v.end(*this, depth); + } +}; + + +postfix_expression_node::~postfix_expression_node() +{ + if (cap_grp) { + cap_grp->remove(this); + } +} + + +auto primary_expression_node::is_fold_expression() const + -> bool +{ + // This is a fold-expression if any subexpression has + // has an identifier named "..." + switch (expr.index()) { + break;case identifier: + return *std::get<identifier>(expr) == "..."; + break;case expression_list: + return expression_list_is_fold_expression; + break;case id_expression: + return std::get<id_expression>(expr)->is_fold_expression(); + break;default: ; // the others can't contain folds + } + return false; +} + + +auto postfix_expression_node::get_first_token_ignoring_this() const + -> token const* +{ + if ( + expr->get_token() + && *expr->get_token() == "this" + && std::ssize(ops) == 1 + && ops[0].op->type() == lexeme::Dot + ) + { + return ops[0].id_expr->get_token(); + } + return expr->get_token(); +} + + +auto postfix_expression_node::to_string() const + -> std::string +{ + assert (expr); + auto ret = expr->to_string(); + + for (auto const& x : ops) { + assert (x.op); + ret += x.op->as_string_view(); + if (x.id_expr) { + ret += x.id_expr->to_string(); + } + if (x.expr_list) { + return "(*ERROR*) temporary alpha limitation: type metafunctions cannot stringize expressions that involve nested expression-lists, declarations, or inspect expressions"; + } + } + + return ret; +} + + +auto postfix_expression_node::visit(auto& v, int depth) + -> void +{ + v.start(*this, depth); + assert (expr); + expr->visit(v, depth+1); + for (auto const& x : ops) { + assert (x.op); + v.start(*x.op, depth+1); + if (x.id_expr) { + x.id_expr->visit(v, depth+1); + } + if (x.expr_list) { + x.expr_list->visit(v, depth+1); + } + } + v.end(*this, depth); +} + + +struct statement_node; + +struct compound_statement_node +{ + source_position open_brace; + source_position close_brace; + std::vector<std::unique_ptr<statement_node>> statements; + + colno_t body_indent = 0; + + compound_statement_node(source_position o = source_position{}); + + auto position() const + -> source_position + { + return open_brace; + } + + auto visit(auto& v, int depth) -> void; +}; + + +struct selection_statement_node +{ + bool is_constexpr = false; + token const* identifier = {}; + source_position else_pos; + std::unique_ptr<logical_or_expression_node> expression; + std::unique_ptr<compound_statement_node> true_branch; + std::unique_ptr<compound_statement_node> false_branch; + bool has_source_false_branch = false; + + auto position() const + -> source_position + { + assert (identifier); + return identifier->position(); + } + + auto visit(auto& v, int depth) + -> void + { + v.start(*this, depth); + assert (identifier); + v.start(*identifier, depth+1); + assert (expression); + expression->visit(v, depth+1); + assert (true_branch); + true_branch->visit(v, depth+1); + if (false_branch) { + false_branch->visit(v, depth+1); + } + v.end(*this, depth); + } +}; + + +struct parameter_declaration_node; + +struct iteration_statement_node +{ + token const* label = {}; + token const* identifier = {}; + std::unique_ptr<assignment_expression_node> next_expression; // if used, else null + std::unique_ptr<logical_or_expression_node> condition; // used for "do" and "while", else null + std::unique_ptr<compound_statement_node> statements; // used for "do" and "while", else null + std::unique_ptr<expression_node> range; // used for "for", else null + std::unique_ptr<parameter_declaration_node> parameter; // used for "for", else null + std::unique_ptr<statement_node> body; // used for "for", else null + bool for_with_in = false;// used for "for," says whether loop variable is 'in' + + auto position() const + -> source_position + { + if (label) { + return label->position(); + } + assert(identifier); + return identifier->position(); + } + + auto visit(auto& v, int depth) + -> void; +}; + + +struct return_statement_node +{ + token const* identifier = {}; + std::unique_ptr<expression_node> expression; + + auto position() const + -> source_position + { + assert(identifier); + return identifier->position(); + } + + auto visit(auto& v, int depth) + -> void + { + v.start(*this, depth); + if (expression) { + expression->visit(v, depth+1); + } + v.end(*this, depth); + } +}; + + +struct alternative_node +{ + std::unique_ptr<unqualified_id_node> name; + token const* is_as_keyword = {}; + + // One of these will be used + std::unique_ptr<type_id_node> type_id; + std::unique_ptr<postfix_expression_node> value; + + source_position equal_sign; + std::unique_ptr<statement_node> statement; + + auto position() const + -> source_position + { + assert(is_as_keyword); + return is_as_keyword->position(); + } + + auto visit(auto& v, int depth) + -> void; +}; + + +struct inspect_expression_node +{ + bool is_constexpr = false; + token const* identifier = {}; + std::unique_ptr<expression_node> expression; + std::unique_ptr<type_id_node> result_type; + source_position open_brace; + source_position close_brace; + + std::vector<std::unique_ptr<alternative_node>> alternatives; + + auto position() const + -> source_position + { + assert(identifier); + return identifier->position(); + } + + auto visit(auto& v, int depth) + -> void + { + v.start(*this, depth); + assert (identifier); + v.start(*identifier, depth+1); + assert (expression); + expression->visit(v, depth+1); + if (result_type) { + result_type->visit(v, depth+1); + } + for (auto&& alt : alternatives) { + alt->visit(v, depth+1); + } + v.end(*this, depth); + } +}; + + +struct contract_node +{ + // Declared first, because it should outlive any owned + // postfix_expressions that could refer to it + capture_group captures; + + source_position open_bracket; + token const* kind = {}; + std::unique_ptr<id_expression_node> group; + std::vector<std::unique_ptr<id_expression_node>> flags; + std::unique_ptr<logical_or_expression_node> condition; + std::unique_ptr<expression_node> message = {}; + + contract_node( source_position pos ) + : open_bracket{pos} + { } + + auto position() const + -> source_position + { + return open_bracket; + } + + auto visit(auto& v, int depth) + -> void + { + v.start(*this, depth); + + assert(kind); + kind->visit(v, depth+1); + + if (group) { + group->visit(v, depth+1); + } + + for (auto const& f : flags) { + f->visit(v, depth+1); + } + + assert(condition); + condition->visit(v, depth+1); + + if (message) { + message->visit(v, depth+1); + } + + v.end(*this, depth); + } +}; + + +struct jump_statement_node +{ + token const* keyword; + token const* label; + + auto position() const + -> source_position + { + assert(keyword); + return keyword->position(); + } + + auto visit(auto& v, int depth) + -> void + { + v.start(*this, depth); + if (keyword) { + keyword->visit(v, depth+1); + } + if (label) { + label->visit(v, depth+1); + } + v.end(*this, depth); + } +}; + + +struct using_statement_node +{ + token const* keyword = {}; + bool for_namespace = false; + std::unique_ptr<id_expression_node> id; + + auto position() const + -> source_position + { + assert(keyword); + return keyword->position(); + } + + auto visit(auto& v, int depth) + -> void + { + v.start(*this, depth); + assert(id); + id->visit(v, depth+1); + v.end(*this, depth); + } +}; + + +struct parameter_declaration_list_node; + +struct statement_node +{ + std::unique_ptr<parameter_declaration_list_node> parameters; + compound_statement_node* compound_parent = nullptr; + + statement_node(compound_statement_node* compound_parent_ = nullptr); + + enum active { expression=0, compound, selection, declaration, return_, iteration, using_, contract, inspect, jump }; + std::variant< + std::unique_ptr<expression_statement_node>, + std::unique_ptr<compound_statement_node>, + std::unique_ptr<selection_statement_node>, + std::unique_ptr<declaration_node>, + std::unique_ptr<return_statement_node>, + std::unique_ptr<iteration_statement_node>, + std::unique_ptr<using_statement_node>, + std::unique_ptr<contract_node>, + std::unique_ptr<inspect_expression_node>, + std::unique_ptr<jump_statement_node> + > statement; + + bool emitted = false; // a note field that's used during lowering to Cpp1 + + bool marked_for_removal = false; // for use during metafunctions which may replace members + + // API + // + auto is_expression () const -> bool { return statement.index() == expression; } + auto is_compound () const -> bool { return statement.index() == compound; } + auto is_selection () const -> bool { return statement.index() == selection; } + auto is_declaration() const -> bool { return statement.index() == declaration; } + auto is_return () const -> bool { return statement.index() == return_; } + auto is_iteration () const -> bool { return statement.index() == iteration; } + auto is_using () const -> bool { return statement.index() == using_; } + auto is_contract () const -> bool { return statement.index() == contract; } + auto is_inspect () const -> bool { return statement.index() == inspect; } + auto is_jump () const -> bool { return statement.index() == jump; } + + template<typename Node> + auto get_if() + -> Node* + { + auto pnode = std::get_if<std::unique_ptr<Node>>(&statement); + if (pnode) { + return pnode->get(); + } + // else + return nullptr; + } + + template<typename Node> + auto get_if() const + -> Node const* + { + auto pnode = std::get_if<std::unique_ptr<Node>>(&statement); + if (pnode) { + return pnode->get(); + } + // else + return nullptr; + } + + auto get_lhs_rhs_if_simple_assignment() const + -> assignment_expression_lhs_rhs + { + if (is_expression()) { + return std::get<expression>(statement)->expr->get_lhs_rhs_if_simple_assignment(); + } + // Else + return {}; + } + + auto to_string() const + -> std::string + { + switch (statement.index()) { + break;case expression: + return std::get<expression>(statement)->to_string(); + break;default: + return "(*ERROR*) temporary alpha limitation: type metafunctions cannot stringize expressions that involve initializer statements other than expression-statements"; + } + } + + // Internals + // + auto position() const + -> source_position; + + auto visit(auto& v, int depth) + -> void; +}; + + +auto alternative_node::visit(auto& v, int depth) + -> void +{ + v.start(*this, depth); + if (name) { + v.start(*name, depth+1); + } + assert (is_as_keyword); + v.start(*is_as_keyword, depth+1); + if (type_id) { + type_id->visit(v, depth+1); + } + else { + assert (value); + value->visit(v, depth+1); + } + assert (statement); + statement->visit(v, depth+1); + v.end(*this, depth); +} + + +auto compound_statement_node::visit(auto& v, int depth) + -> void +{ + v.start(*this, depth); + for (auto const& x : statements) { + assert(x); + x->visit(v, depth+1); + } + v.end(*this, depth); +} + + +struct parameter_declaration_node +{ + source_position pos = {}; + passing_style pass = passing_style::in; + int ordinal = 1; + + enum class modifier { none=0, implicit, virtual_, override_, final_ }; + modifier mod = modifier::none; + + std::unique_ptr<declaration_node> declaration; + + // API + // + auto has_name() const + -> bool; + + auto name() const + -> token const*; + + auto has_name(std::string_view) const + -> bool; + + auto direction() const + -> passing_style + { + return pass; + } + + auto is_implicit() const + -> bool + { + return mod == modifier::implicit; + } + + auto is_virtual() const + -> bool + { + return mod == modifier::virtual_; + } + + auto make_virtual() + -> void + { + mod = modifier::virtual_; + } + + auto is_override() const + -> bool + { + return mod == modifier::override_; + } + + auto is_final() const + -> bool + { + return mod == modifier::final_; + } + + auto is_polymorphic() const + -> bool + { + switch (mod) { + break;case modifier::virtual_: + case modifier::override_: + case modifier::final_: + return true; + break;default: + return false; + } + } + + // Internals + // + auto position() const + -> source_position; + + auto visit(auto& v, int depth) + -> void; +}; + + +struct parameter_declaration_list_node +{ + token const* open_paren = {}; + token const* close_paren = {}; + + std::vector<std::unique_ptr<parameter_declaration_node>> parameters; + + // API + // + auto ssize() const -> auto { + return std::ssize(parameters); + } + + auto operator[](int i) + -> parameter_declaration_node* + { + return parameters[i].get(); + } + + auto operator[](int i) const + -> parameter_declaration_node const* + { + return parameters[i].get(); + } + + // Internals + // + auto position() const + -> source_position + { + assert(open_paren); + return open_paren->position(); + } + + auto visit(auto& v, int depth) + -> void + { + v.start(*this, depth); + for (auto const& x : parameters) { + assert(x); + x->visit(v, depth+1); + } + v.end(*this, depth); + } +}; + + +auto statement_node::visit(auto& v, int depth) + -> void +{ + v.start(*this, depth); + if (parameters) { + parameters->visit(v, depth+1); + } + try_visit<expression >(statement, v, depth); + try_visit<compound >(statement, v, depth); + try_visit<selection >(statement, v, depth); + try_visit<declaration>(statement, v, depth); + try_visit<return_ >(statement, v, depth); + try_visit<iteration >(statement, v, depth); + try_visit<contract >(statement, v, depth); + try_visit<inspect >(statement, v, depth); + try_visit<jump >(statement, v, depth); + v.end(*this, depth); +} + + +struct function_returns_tag { }; + +struct function_type_node +{ + declaration_node* my_decl; + + std::unique_ptr<parameter_declaration_list_node> parameters; + bool throws = false; + + struct single_type_id { + std::unique_ptr<type_id_node> type; + passing_style pass = passing_style::move; + }; + + enum active { empty = 0, id, list }; + std::variant< + std::monostate, + single_type_id, + std::unique_ptr<parameter_declaration_list_node> + > returns; + + std::vector<std::unique_ptr<contract_node>> contracts; + + function_type_node(declaration_node* decl); + + // API + // + auto has_postconditions() const + -> bool; + + auto is_function_with_this() const + -> bool; + + auto is_virtual_function() const + -> bool; + + auto make_function_virtual() + -> bool; + + auto is_defaultable() const + -> bool; + + auto is_constructor() const + -> bool; + + auto is_default_constructor() const + -> bool; + + auto is_move() const + -> bool; + + auto is_swap() const + -> bool; + + auto is_constructor_with_that() const + -> bool; + + auto is_constructor_with_in_that() const + -> bool; + + auto is_constructor_with_move_that() const + -> bool; + + auto is_comparison() const + -> bool; + + auto is_increment_or_decrement() const + -> bool; + + auto is_compound_assignment() const + -> bool; + + auto is_assignment() const + -> bool; + + auto is_assignment_with_that() const + -> bool; + + auto is_assignment_with_in_that() const + -> bool; + + auto is_assignment_with_move_that() const + -> bool; + + auto is_destructor() const + -> bool; + + auto has_declared_return_type() const + -> bool + { + return returns.index() != empty; + } + + auto has_deduced_return_type() const + -> bool + { + return + returns.index() == empty + || ( + returns.index() == id + && std::get<function_type_node::id>(returns).type->is_wildcard() + ) + ; + } + + auto unnamed_return_type_to_string() const + -> std::string + { + if (auto id = std::get_if<function_type_node::id>(&returns)) { + return (*id).type->to_string(); + } + return {}; + } + + auto has_bool_return_type() const + -> bool + { + if (auto id = std::get_if<function_type_node::id>(&returns)) { + if (auto name = (*id).type->get_token()) { + return *name == "bool"; + } + } + return false; + } + + auto has_non_void_return_type() const + -> bool + { + if (auto id = std::get_if<function_type_node::id>(&returns)) { + if (auto name = (*id).type->get_token()) { + return *name != "void"; + } + } + return returns.index() != empty; + } + + auto parameter_count() const + -> int + { + return std::ssize(parameters->parameters); + } + + auto index_of_parameter_named(std::string_view s) const + -> int + { + auto ret = 0; + for (auto& param : parameters->parameters) { + if (param->has_name(s)) { + return ret; + } + ++ret; + } + return -1; + } + + auto has_parameter_named(std::string_view s) const + -> bool + { + for (auto& param : parameters->parameters) { + if (param->has_name(s)) { + return true; + } + } + return false; + } + + auto has_parameter_with_name_and_pass( + std::string_view s, + passing_style pass + ) const + -> bool + { + for (auto& param : parameters->parameters) { + if ( + param->has_name(s) + && param->pass == pass + ) + { + return true; + } + } + return false; + } + + auto first_parameter_name() const + -> std::string; + + auto nth_parameter_type_name(int n) const + -> std::string; + + auto has_in_parameter_named(std::string_view s) const + -> bool + { + return has_parameter_with_name_and_pass(s, passing_style::in); + } + + auto has_out_parameter_named(std::string_view s) const + -> bool + { + return has_parameter_with_name_and_pass(s, passing_style::out); + } + + auto has_move_parameter_named(std::string_view s) const + -> bool + { + return has_parameter_with_name_and_pass(s, passing_style::move); + } + + // Internals + // + auto position() const + -> source_position + { + assert (parameters); + return parameters->position(); + } + + auto visit(auto& v, int depth) + -> void + { + v.start(*this, depth); + assert(parameters); + parameters->visit(v, depth+1); + + if (returns.index() == id) { + auto& r = std::get<id>(returns); + assert(r.type); + r.type->visit(v, depth+1); + } + else if (returns.index() == list) { + auto& r = std::get<list>(returns); + assert(r); + // Inform the visitor that this is a returns list + v.start(function_returns_tag{}, depth); + r->visit(v, depth+1); + v.end(function_returns_tag{}, depth); + } + v.end(*this, depth); + } +}; + + +struct type_node +{ + token const* type; + bool final = false; + + type_node( + token const* t, + bool final_ = false + ) + : type{t} + , final{final_} + { } + + // API + // + auto is_final() const + -> bool + { + return final; + } + + auto make_final() + -> void + { + final = true; + } + + // Internals + // + auto position() const + -> source_position + { + assert(type); + return type->position(); + } + + auto visit(auto& v, int depth) + -> void + { + v.start(*this, depth); + v.end(*this, depth); + } +}; + + +struct namespace_node +{ + token const* namespace_; + + namespace_node(token const* ns) : namespace_{ns} { } + + auto position() const + -> source_position + { + assert(namespace_); + return namespace_->position(); + } + + auto visit(auto& v, int depth) + -> void + { + v.start(*this, depth); + v.end(*this, depth); + } +}; + + +struct alias_node +{ + token const* type = {}; + std::unique_ptr<type_id_node> type_id; // for objects + + enum active : std::uint8_t { a_type, a_namespace, an_object }; + std::variant< + std::unique_ptr<type_id_node>, + std::unique_ptr<id_expression_node>, + std::unique_ptr<expression_node> + > initializer; + + alias_node( token const* t ) : type{t} { } + + // API + // + auto is_type_alias () const -> bool + { return initializer.index() == a_type; } + auto is_namespace_alias() const -> bool + { return initializer.index() == a_namespace; } + auto is_object_alias () const -> bool + { return initializer.index() == an_object; } + + // Internals + // + auto position() const + -> source_position + { + assert (type); + return type->position(); + } + + auto visit(auto& v, int depth) + -> void + { + v.start(*this, depth); + + try_visit<a_type >(initializer, v, depth+1); + try_visit<a_namespace>(initializer, v, depth+1); + try_visit<an_object >(initializer, v, depth+1); + + v.end(*this, depth); + } +}; + + +enum class accessibility { default_ = 0, public_, protected_, private_ }; + +auto to_string(accessibility a) + -> std::string +{ + switch (a) { + break;case accessibility::public_ : return "public"; + break;case accessibility::protected_: return "protected"; + break;case accessibility::private_ : return "private"; + break;default: assert(a == accessibility::default_); + } + return "default"; +} + + +struct declaration_identifier_tag { }; + +struct declaration_node +{ + // The capture_group is declared first, because it should outlive + // any owned postfix_expressions that could refer to it + capture_group captures; + source_position pos; + bool is_variadic = false; + bool is_constexpr = false; + bool terse_no_equals = false; + std::unique_ptr<unqualified_id_node> identifier; + accessibility access = accessibility::default_; + + enum active : std::uint8_t { a_function, an_object, a_type, a_namespace, an_alias }; + std::variant< + std::unique_ptr<function_type_node>, + std::unique_ptr<type_id_node>, + std::unique_ptr<type_node>, + std::unique_ptr<namespace_node>, + std::unique_ptr<alias_node> + > type; + + std::vector<std::unique_ptr<id_expression_node>> metafunctions; + std::unique_ptr<parameter_declaration_list_node> template_parameters; + source_position requires_pos = {}; + std::unique_ptr<logical_or_expression_node> requires_clause_expression; + + source_position equal_sign = {}; + std::unique_ptr<statement_node> initializer; + + declaration_node* parent_declaration = {}; + statement_node* my_statement = {}; + + // Attributes currently configurable only via metafunction API, + // not directly in the base language grammar + bool member_function_generation = true; + + // Cache some context + bool is_template_parameter = false; + bool is_parameter = false; + + // Constructor + // + declaration_node(declaration_node* parent) + : parent_declaration{parent} + { } + + // API + // + auto type_member_mark_for_removal() + -> bool + { + if (my_statement) { + my_statement->marked_for_removal = true; + return true; + } + return false; + } + + auto type_remove_marked_members() + -> void + { + assert (is_type() && initializer && initializer->is_compound()); + auto compound_stmt = initializer->get_if<compound_statement_node>(); + assert (compound_stmt); + + // Note: This loop is a careful use of the brittle STL "erase" idiom. Do not change this + // loop without carefully ensuring it remains safe against iterator invalidation. + // (Especially don't change this to a for loop with a "++i" iteration-expression.) + auto i = compound_stmt->statements.begin(); + while (i != compound_stmt->statements.end()) + { + if ((*i)->marked_for_removal) { + i = compound_stmt->statements.erase(i); // these two branches ... + } + else { + ++i; // ... must stay together + } + } + } + + auto type_remove_all_members() + -> void + { + assert (is_type() && initializer && initializer->is_compound()); + auto body = initializer->get_if<compound_statement_node>(); + assert (body); + + // Drop all statements in the body, which should self-deregister all our 'captures' + // - (only) statements in the body should have been able to refer to 'captures' + body->statements.clear(); + assert(captures.members.empty()); + } + + auto type_disable_member_function_generation() + -> void + { + member_function_generation = false; + } + + auto object_type() const + -> std::string + { + if (!is_object()) { + return "(*ERROR*) not an object"; + } + // Else + return std::get<an_object>(type)->to_string(); + } + + auto object_initializer() const + -> std::string + { + if (!is_object()) { + return "(*ERROR*) not an object"; + } + else if (initializer) { + return initializer->to_string(); + } + // Else + return ""; + } + + auto get_parent() const + -> declaration_node* + { + return parent_declaration; + } + + auto is_public() const + -> bool + { + return access == accessibility::public_; + } + + auto is_protected() const + -> bool + { + return access == accessibility::protected_; + } + + auto is_private() const + -> bool + { + return access == accessibility::private_; + } + + auto is_default_access() const + -> bool + { + return access == accessibility::default_; + } + +private: + auto set_access(accessibility a) + -> bool + { + if (is_default_access()) { + access = a; + } + return access == a; + } + +public: + auto make_public() + -> bool + { + return set_access( accessibility::public_ ); + } + + auto make_protected() + -> bool + { + return set_access( accessibility::protected_ ); + } + + auto make_private() + -> bool + { + return set_access( accessibility::private_ ); + } + + auto has_name() const + -> bool + { + return + identifier + && identifier->identifier + ; + } + + auto name() const + -> token const* + { + if (!identifier) { + return nullptr; + } + // Else + return identifier->identifier; + } + + auto has_name(std::string_view s) const + -> bool + { + return + has_name() + && *name() == s + ; + } + + auto has_initializer() const + -> bool + { + return initializer != nullptr; + } + + auto parameter_count() const + -> int + { + if (!is_function()) { + return -1; + } + return std::get<a_function>(type)->parameter_count(); + } + + auto index_of_parameter_named(std::string_view s) const + -> int + { + if (!is_function()) { + return -1; + } + return std::get<a_function>(type)->index_of_parameter_named(s); + } + + auto has_parameter_named(std::string_view s) const + -> bool + { + if (!is_function()) { + return false; + } + return std::get<a_function>(type)->has_parameter_named(s); + } + + auto has_in_parameter_named(std::string_view s) const + -> bool + { + if (!is_function()) { + return false; + } + return std::get<a_function>(type)->has_in_parameter_named(s); + } + + auto has_out_parameter_named(std::string_view s) const + -> bool + { + if (!is_function()) { + return false; + } + return std::get<a_function>(type)->has_out_parameter_named(s); + } + + auto has_move_parameter_named(std::string_view s) const + -> bool + { + if (!is_function()) { + return false; + } + return std::get<a_function>(type)->has_move_parameter_named(s); + } + + auto nth_parameter_type_name(int n) const + -> std::string + { + if (!is_function()) { + return ""; + } + return std::get<a_function>(type)->nth_parameter_type_name(n); + } + + auto is_global () const -> bool + { return !parent_declaration; } + + auto is_function () const -> bool + { return type.index() == a_function; } + auto is_object () const -> bool + { return type.index() == an_object; } + auto is_base_object() const -> bool + { return is_object() && has_name("this"); } + auto is_member_object() const -> bool + { return is_object() && !has_name("this"); } + auto is_concept () const -> bool + { return type.index() == an_object && get<an_object>(type)->is_concept(); } + auto is_type () const -> bool + { return type.index() == a_type; } + auto is_namespace() const -> bool + { return type.index() == a_namespace; } + auto is_alias() const -> bool + { return type.index() == an_alias; } + + auto is_type_alias () const -> bool + { return is_alias() && std::get<an_alias>(type)->is_type_alias(); } + auto is_namespace_alias() const -> bool + { return is_alias() && std::get<an_alias>(type)->is_namespace_alias(); } + auto is_object_alias () const -> bool + { return is_alias() && std::get<an_alias>(type)->is_object_alias(); } + + auto is_function_expression () const -> bool + { return is_function() && !identifier; } + + auto is_polymorphic() const // has base types or virtual functions + -> bool + { + for (auto& decl : get_type_scope_declarations()) { + if ( + decl->has_name("this") + || decl->is_virtual_function() + ) + { + return true; + } + } + return false; + } + + // Do we know that this cannot be a copy constructible type? + auto cannot_be_a_copy_constructible_type() const + -> bool + { + // If we're not a type, we're not a copyable type + if (!is_type()) { + return true; + } + + // Else if we're letting Cpp1 generate SMFs, we're likely copyable + if (!member_function_generation) { + return false; + } + + // Else if we have a copy constructor, we're copyable + for (auto& decl : get_type_scope_declarations()) + if (decl->is_constructor_with_that()) + { + return false; + } + + // Else there can't be a copy constructor + return true; + } + + auto parent_is_function () const -> bool + { return parent_declaration && parent_declaration->type.index() == a_function; } + auto parent_is_object () const -> bool + { return parent_declaration && parent_declaration->type.index() == an_object; } + auto parent_is_type () const -> bool + { return parent_declaration && parent_declaration->type.index() == a_type; } + auto parent_is_namespace () const -> bool + { return !parent_declaration || parent_declaration->type.index() == a_namespace; } + auto parent_is_alias () const -> bool + { return parent_declaration && parent_declaration->type.index() == an_alias; } + + auto parent_is_type_alias () const -> bool + { return parent_declaration && parent_declaration->is_alias() && std::get<an_alias>(parent_declaration->type)->is_type_alias(); } + auto parent_is_namespace_alias() const -> bool + { return parent_declaration && parent_declaration->is_alias() && std::get<an_alias>(parent_declaration->type)->is_namespace_alias(); } + auto parent_is_object_alias () const -> bool + { return parent_declaration && parent_declaration->is_alias() && std::get<an_alias>(parent_declaration->type)->is_object_alias(); } + + auto is_inside_global_unnamed_function() const -> bool { + auto parent = parent_declaration; + // Get outside all nested function expressions + while (parent && parent->is_function() && !parent->has_name()) { + parent = parent->parent_declaration; + } + return !parent; + } + + auto parent_is_polymorphic() const -> bool + { return parent_declaration && parent_declaration->is_polymorphic(); } + + enum which { + functions = 1, + objects = 2, + types = 4, + aliases = 8, + all = functions|objects|types|aliases + }; + +private: + // This helper is a const function that delivers pointers + // to non-const... because this is the best way I can + // think of right now to write the following two get_ + // functions (without duplicating their bodies, and + // without resorting to const_casts) + auto gather_type_scope_declarations(which w) const + -> std::vector<declaration_node*> + { + if ( + !is_type() + || !initializer + || !initializer->is_compound() + ) + { + return {}; + } + + auto compound_stmt = initializer->get_if<compound_statement_node>(); + assert (compound_stmt); + + auto ret = std::vector<declaration_node*>{}; + for (auto& o : compound_stmt->statements) + { + auto decl = o->get_if<declaration_node>(); + if (decl) + { + assert( + !decl->is_namespace() + && "ICE: a type shouldn't be able to contain a namespace" + ); + if ( + (w & functions && decl->is_function()) + || (w & objects && decl->is_object() ) + || (w & types && decl->is_type() ) + || (w & aliases && decl->is_alias() ) + ) + { + ret.push_back(decl); + } + } + } + + return ret; + } + +public: + auto get_type_scope_declarations(which w = all) + -> std::vector<declaration_node*> + { + // Only want to return the gather_ results as + // non-const* in a non-const function + return gather_type_scope_declarations(w); + } + + auto get_type_scope_declarations(which w = all) const + -> std::vector<declaration_node const*> + { + // Convert the gather_ results to const* + auto tmp = gather_type_scope_declarations(w); + return std::vector<declaration_node const*>(tmp.begin(), tmp.end()); + } + + + auto add_type_member( std::unique_ptr<statement_node>&& statement ) + -> bool + { + if ( + !is_type() + || !initializer + || !initializer->is_compound() + || !statement->is_declaration() + ) + { + return false; + } + + // Tell this declaration statement that we are its new parent + // and check to ensure that it doesn't already have a parent + // (that shouldn't happen because we should only get here for a + // generated statement that hasn't been added elsewhere yet) + auto decl = statement->get_if<declaration_node>(); + assert( + decl + && !decl->parent_declaration + ); + decl->parent_declaration = this; + + // And actually adopt it into our list of statements + auto compound_stmt = initializer->get_if<compound_statement_node>(); + assert (compound_stmt); + compound_stmt->statements.push_back(std::move(statement)); + return true; + } + + + auto add_function_initializer( std::unique_ptr<statement_node>&& statement ) + -> bool + { + if ( + !is_function() + || initializer + ) + { + return false; + } + + // Adopt it as our initializer statement + initializer = std::move( statement ); + return true; + } + + + auto get_decl_if_type_scope_object_name_before_a_base_type( std::string_view s ) const + -> declaration_node const* + { + declaration_node const* ret = {}; + + // If it's 'this' then it can't be an object name + if (s == "this") { + return {}; + } + + // Navigate to the nearest enclosing type + auto decl = this; + while ( + !decl->is_type() + && decl->parent_declaration + ) + { + decl = decl->parent_declaration; + } + + if (!decl->is_type()) { + return {}; + } + + // Look for a name match and if so remember the type, + // and look for a base type after that match + auto objects = decl->get_type_scope_declarations(); + auto found_name = false; + auto found_later_base_type = false; + + for (auto& o : objects) { + if (o->is_alias()) { + continue; + } + if (o->has_name(s)) { + found_name = true; + ret = o; + } + if (o->has_name("this")) { + if (found_name) { + found_later_base_type = true; + break; + } + } + } + + // If we didn't find a later base type, discard any name match + if (!found_later_base_type) { + ret = {}; + } + + return ret; + } + + + auto get_initializer_statements() const + -> std::vector<statement_node*> + { + if (!initializer) { + return {}; + } + + auto ret = std::vector<statement_node*>{}; + // For non-compound initializers, we want just that statement + if (!initializer->is_compound()) + { + ret.push_back(initializer.get()); + } + + // Else for compound initializers, we want the compound_statement's statements + else + { + auto compound_stmt = initializer->get_if<compound_statement_node>(); + assert (compound_stmt); + for (auto& o : compound_stmt->statements) { + ret.push_back(o.get()); + } + } + + return ret; + } + + auto is_function_with_this() const + -> bool + { + if (auto func = std::get_if<a_function>(&type)) { + return (*func)->is_function_with_this(); + } + // else + return false; + } + + auto is_virtual_function() const + -> bool + { + if (auto func = std::get_if<a_function>(&type)) { + return (*func)->is_virtual_function(); + } + // else + return false; + } + + auto is_type_final() const + -> bool + { + if (auto t = std::get_if<a_type>(&type)) { + return (*t)->is_final(); + } + // else + return false; + } + + auto make_type_final() + -> bool + { + if (auto t = std::get_if<a_type>(&type)) { + (*t)->make_final(); + return true; + } + // else + return false; + } + + auto make_function_virtual() + -> bool + { + if (auto func = std::get_if<a_function>(&type)) { + return (*func)->make_function_virtual(); + } + // else + return false; + } + + auto is_defaultable_function() const + -> bool + { + if (auto func = std::get_if<a_function>(&type)) { + return (*func)->is_defaultable(); + } + // else + return false; + } + + auto is_constructor() const + -> bool + { + if (auto func = std::get_if<a_function>(&type)) { + return (*func)->is_constructor(); + } + // else + return false; + } + + auto is_default_constructor() const + -> bool + { + if (auto func = std::get_if<a_function>(&type)) { + return (*func)->is_default_constructor(); + } + // else + return false; + } + + auto is_move() const + -> bool + { + if (auto func = std::get_if<a_function>(&type)) { + return (*func)->is_move(); + } + // else + return false; + } + + auto is_swap() const + -> bool + { + if (auto func = std::get_if<a_function>(&type)) { + return (*func)->is_swap(); + } + // else + return false; + } + + auto is_constructor_with_that() const + -> bool + { + if (auto func = std::get_if<a_function>(&type)) { + return (*func)->is_constructor_with_that(); + } + // else + return false; + } + + auto is_constructor_with_in_that() const + -> bool + { + if (auto func = std::get_if<a_function>(&type)) { + return (*func)->is_constructor_with_in_that(); + } + // else + return false; + } + + auto is_constructor_with_move_that() const + -> bool + { + if (auto func = std::get_if<a_function>(&type)) { + return (*func)->is_constructor_with_move_that(); + } + // else + return false; + } + + auto is_comparison() const + -> bool + { + if (auto func = std::get_if<a_function>(&type)) { + return (*func)->is_comparison(); + } + // else + return false; + } + + auto is_increment_or_decrement() const + -> bool + { + if (auto func = std::get_if<a_function>(&type)) { + return (*func)->is_increment_or_decrement(); + } + // else + return false; + } + + auto is_compound_assignment() const + -> bool + { + if (auto func = std::get_if<a_function>(&type)) { + return (*func)->is_compound_assignment(); + } + // else + return false; + } + + auto is_assignment() const + -> bool + { + if (auto func = std::get_if<a_function>(&type)) { + return (*func)->is_assignment(); + } + // else + return false; + } + + auto is_assignment_with_that() const + -> bool + { + if (auto func = std::get_if<a_function>(&type)) { + return (*func)->is_assignment_with_that(); + } + // else + return false; + } + + auto is_assignment_with_in_that() const + -> bool + { + if (auto func = std::get_if<a_function>(&type)) { + return (*func)->is_assignment_with_in_that(); + } + // else + return false; + } + + auto is_assignment_with_move_that() const + -> bool + { + if (auto func = std::get_if<a_function>(&type)) { + return (*func)->is_assignment_with_move_that(); + } + // else + return false; + } + + struct declared_value_set_funcs { + declaration_node const* out_this_in_that = {}; + declaration_node const* out_this_move_that = {}; + declaration_node const* inout_this_in_that = {}; + declaration_node const* inout_this_move_that = {}; + std::vector<std::string> assignments_from = {}; + }; + + auto find_declared_value_set_functions() const + -> declared_value_set_funcs + { + if (!initializer) { + return {}; + } + + auto compound_stmt = initializer->get_if<compound_statement_node>(); + assert (compound_stmt); + + auto ret = declared_value_set_funcs{}; + for (auto& o : compound_stmt->statements) + { + auto decl = o->get_if<declaration_node>(); + if (decl) + { + if (decl->is_constructor_with_in_that()) { + ret.out_this_in_that = decl; + } + if (decl->is_constructor_with_move_that()) { + ret.out_this_move_that = decl; + } + if (decl->is_assignment_with_in_that()) { + ret.inout_this_in_that = decl; + } + if (decl->is_assignment_with_move_that()) { + ret.inout_this_move_that = decl; + } + if (decl->is_assignment() && !decl->is_assignment_with_that()) { + ret.assignments_from.emplace_back( decl->nth_parameter_type_name(2) ); + } + } + } + + return ret; + } + + auto find_parent_declared_value_set_functions() const + -> declared_value_set_funcs + { + if (parent_is_type()) { + return parent_declaration->find_declared_value_set_functions(); + } + // else + return {}; + } + + + auto is_destructor() const + -> bool + { + if (auto func = std::get_if<a_function>(&type)) { + return (*func)->is_destructor(); + } + // else + return false; + } + + auto has_declared_return_type() const + -> bool + { + if (auto func = std::get_if<a_function>(&type)) { + return (*func)->has_declared_return_type(); + } + // else + return false; + } + + auto has_deduced_return_type() const + -> bool + { + if (auto func = std::get_if<a_function>(&type)) { + return (*func)->has_deduced_return_type(); + } + // else + return false; + } + + auto get_function_parameters() + -> std::vector<parameter_declaration_node const*> + { + if (!is_function()) { + return {}; + } + // else + auto ret = std::vector<parameter_declaration_node const*>{}; + for (auto& param : std::get<a_function>(type)->parameters->parameters) { + ret.push_back( param.get() ); + } + return ret; + } + + auto unnamed_return_type_to_string() const + -> std::string + { + if (auto func = std::get_if<a_function>(&type)) { + return (*func)->unnamed_return_type_to_string(); + } + // else + return {}; + } + + auto has_bool_return_type() const + -> bool + { + if (auto func = std::get_if<a_function>(&type)) { + return (*func)->has_bool_return_type(); + } + // else + return false; + } + + auto has_non_void_return_type() const + -> bool + { + if (auto func = std::get_if<a_function>(&type)) { + return (*func)->has_non_void_return_type(); + } + // else + return false; + } + + auto has_parameter_with_name_and_pass( + std::string_view s, + passing_style pass + ) const + -> bool + { + if (auto func = std::get_if<a_function>(&type)) { + return (*func)->has_parameter_with_name_and_pass(s, pass); + } + // else + return false; + } + + auto first_parameter_name() const + -> std::string + { + if (auto func = std::get_if<a_function>(&type)) { + return (*func)->first_parameter_name(); + } + // else + return ""; + } + + auto is_binary_comparison_function() const + -> bool + { + return + is_function() + && ( + has_name("operator==") + || has_name("operator!=") + || has_name("operator<") + || has_name("operator<=") + || has_name("operator>") + || has_name("operator>=") + ); + } + + auto is_const() const + -> bool + { + return + type.index() == an_object + && !std::get<an_object>(type)->pc_qualifiers.empty() + && *std::get<an_object>(type)->pc_qualifiers.front() == "const" + ; + } + + auto has_wildcard_type() const + -> bool + { + return + type.index() == an_object + && std::get<an_object>(type)->is_wildcard() + ; + } + + auto get_object_type() const + -> type_id_node const* + { + if (type.index() == an_object) { + return std::get<an_object>(type).get(); + } + // Else + return {}; + } + + // Internals + // + auto position() const + -> source_position + { + if (identifier) { + return identifier->position(); + } + return pos; + } + + auto visit(auto& v, int depth) + -> void + { + v.start(*this, depth); + + v.start(declaration_identifier_tag{}, depth); + if (identifier) { + identifier->visit(v, depth+1); + } + v.end(declaration_identifier_tag{}, depth); + + try_visit<a_function >(type, v, depth+1); + try_visit<an_object >(type, v, depth+1); + try_visit<a_type >(type, v, depth+1); + try_visit<a_namespace>(type, v, depth+1); + try_visit<an_alias >(type, v, depth+1); + + for (auto& m : metafunctions) { + assert(m); + m->visit(v, depth+1); + } + + if (initializer) { + initializer->visit(v, depth+1); + } + + v.end(*this, depth); + } +}; + + +compound_statement_node::compound_statement_node(source_position o) + : open_brace{o} +{ } + + +statement_node::statement_node(compound_statement_node* compound_parent_) + : compound_parent{ compound_parent_ } +{ } + + +function_type_node::function_type_node(declaration_node* decl) + : my_decl{decl} +{ } + + +auto parameter_declaration_node::has_name() const + -> bool +{ + return declaration->has_name(); +} + + +auto parameter_declaration_node::name() const + -> token const* +{ + return declaration->name(); +} + + +auto parameter_declaration_node::has_name(std::string_view s) const + -> bool +{ + return declaration->has_name(s); +} + + +auto function_type_node::first_parameter_name() const + -> std::string +{ + if (std::ssize(parameters->parameters) > 0) + { + assert (parameters->parameters[0]->declaration->name()); + return parameters->parameters[0]->declaration->name()->to_string(); + } + // Else + return ""; +} + +auto function_type_node::nth_parameter_type_name(int n) const + -> std::string +{ + if (std::ssize(parameters->parameters) >= n) + { + return parameters->parameters[n-1]->declaration->get_object_type()->to_string(); + } + // Else + return ""; +} + + +auto function_type_node::has_postconditions() const + -> bool +{ + return + std::find_if( + contracts.begin(), + contracts.end(), + [](auto const& e){ return *e->kind == "post"; } + ) != contracts.end(); +} + +auto function_type_node::is_function_with_this() const + -> bool +{ + if ( + (*parameters).ssize() > 0 + && (*parameters)[0]->has_name("this") + ) + { + return true; + } + return false; +} + + +auto function_type_node::is_virtual_function() const + -> bool +{ + if ( + (*parameters).ssize() > 0 + && (*parameters)[0]->has_name("this") + && (*parameters)[0]->is_virtual() + ) + { + return true; + } + return false; +} + + +auto function_type_node::make_function_virtual() + -> bool +{ + if (is_function_with_this()) { + (*parameters)[0]->make_virtual(); + return true; + } + return false; +} + + +auto function_type_node::is_defaultable() const + -> bool +{ + if ( + my_decl->has_name("operator==") + || my_decl->has_name("operator<=>") + ) + { + return true; + } + return false; +} + + +auto function_type_node::is_constructor() const + -> bool +{ + if ( + (*parameters).ssize() > 0 + && (*parameters)[0]->has_name("this") + && (*parameters)[0]->direction() == passing_style::out + ) + { + assert(my_decl->has_name("operator=")); + return true; + } + return false; +} + + +auto function_type_node::is_default_constructor() const + -> bool +{ + if ( + is_constructor() + && (*parameters).ssize() == 1 + ) + { + return true; + } + return false; +} + + +auto function_type_node::is_move() const + -> bool +{ + if ( + (is_constructor() || is_assignment()) + && (*parameters).ssize() == 2 + && (*parameters)[1]->has_name("that") + && (*parameters)[1]->direction() == passing_style::move + ) + { + return true; + } + return false; +} + + +auto function_type_node::is_swap() const + -> bool +{ + assert (my_decl); + if ( + my_decl->has_name("swap") + && (*parameters).ssize() == 2 + && (*parameters)[1]->has_name("that") + ) + { + return true; + } + return false; +} + + +auto function_type_node::is_constructor_with_that() const + -> bool +{ + if ( + is_constructor() + && (*parameters).ssize() == 2 + && (*parameters)[1]->has_name("that") + ) + { + return true; + } + return false; +} + + +auto function_type_node::is_assignment_with_that() const + -> bool +{ + if ( + is_assignment() + && (*parameters).ssize() == 2 + && (*parameters)[1]->has_name("that") + ) + { + return true; + } + return false; +} + + +auto function_type_node::is_constructor_with_in_that() const + -> bool +{ + if ( + is_constructor() + && (*parameters).ssize() == 2 + && (*parameters)[1]->has_name("that") + && (*parameters)[1]->direction() == passing_style::in + ) + { + return true; + } + return false; +} + + +auto function_type_node::is_constructor_with_move_that() const + -> bool +{ + if ( + is_constructor() + && (*parameters).ssize() == 2 + && (*parameters)[1]->has_name("that") + && (*parameters)[1]->direction() == passing_style::move + ) + { + return true; + } + return false; +} + + +auto function_type_node::is_comparison() const + -> bool +{ + if ( + ( + my_decl->has_name("operator==") + || my_decl->has_name("operator!=") + || my_decl->has_name("operator<") + || my_decl->has_name("operator<=") + || my_decl->has_name("operator>") + || my_decl->has_name("operator>=") + || my_decl->has_name("operator<=>") + ) + ) + { + return true; + } + return false; +} + + +auto function_type_node::is_increment_or_decrement() const + -> bool +{ + if ( + my_decl->has_name("operator++") + || my_decl->has_name("operator--") + ) + { + return true; + } + return false; +} + + +auto function_type_node::is_compound_assignment() const + -> bool +{ + if ( + ( + my_decl->has_name("operator+=") + || my_decl->has_name("operator-=") + || my_decl->has_name("operator*=") + || my_decl->has_name("operator/=") + || my_decl->has_name("operator%=") + || my_decl->has_name("operator&=") + || my_decl->has_name("operator|=") + || my_decl->has_name("operator^=") + || my_decl->has_name("operator<<=") + || my_decl->has_name("operator>>=") + ) + && (*parameters).ssize() > 1 + && (*parameters)[0]->has_name("this") + && (*parameters)[0]->direction() == passing_style::inout + ) + { + return true; + } + return false; +} + + +auto function_type_node::is_assignment() const + -> bool +{ + if ( + my_decl->has_name("operator=") + && (*parameters).ssize() > 1 + && (*parameters)[0]->has_name("this") + && (*parameters)[0]->direction() == passing_style::inout + ) + { + return true; + } + return false; +} + + +auto function_type_node::is_assignment_with_in_that() const + -> bool +{ + if ( + is_assignment() + && (*parameters).ssize() == 2 + && (*parameters)[1]->has_name("that") + && (*parameters)[1]->direction() == passing_style::in + ) + { + return true; + } + return false; +} + + +auto function_type_node::is_assignment_with_move_that() const + -> bool +{ + if ( + is_assignment() + && (*parameters).ssize() == 2 + && (*parameters)[1]->has_name("that") + && (*parameters)[1]->direction() == passing_style::move + ) + { + return true; + } + return false; +} + + +auto function_type_node::is_destructor() const + -> bool +{ + if ( + my_decl->has_name("operator=") + && (*parameters).ssize() == 1 + && (*parameters)[0]->has_name("this") + && (*parameters)[0]->direction() == passing_style::move + ) + { + return true; + } + return false; +} + + +auto primary_expression_node::template_arguments() const + -> std::vector<template_argument> const& +{ + if (expr.index() == id_expression) { + return std::get<id_expression>(expr)->template_arguments(); + } + // else + return no_template_args; +} + + +auto primary_expression_node::get_token() const + -> token const* +{ + if (expr.index() == identifier) { + return std::get<identifier>(expr); + } + else if (expr.index() == id_expression) { + return std::get<id_expression>(expr)->get_token(); + } + else if (expr.index() == literal) { + return std::get<literal>(expr)->get_token(); + } + // else (because we're deliberately ignoring the other + // options which are more than a single token) + return {}; +} + + +auto primary_expression_node::to_string() const + -> std::string +{ + switch (expr.index()) + { + break;case empty: + return {}; + + break;case identifier: { + auto const& s = std::get<identifier>(expr); + assert (s); + return s->to_string(); + } + + break;case id_expression: { + auto const& s = std::get<id_expression>(expr); + assert (s); + return s->to_string(); + } + + break;case literal: { + auto const& i = std::get<literal>(expr); + assert (i); + return i->to_string(); + } + + break;default: + return "(*ERROR*) temporary alpha limitation: type metafunctions cannot stringize expressions that involve nested expression-lists, declarations, or inspect expressions"; + } +} + + +auto primary_expression_node::position() const + -> source_position +{ + switch (expr.index()) + { + break;case empty: + return { 0, 0 }; + + break;case identifier: { + auto const& s = std::get<identifier>(expr); + assert (s); + return s->position(); + } + + break;case expression_list: { + auto const& s = std::get<expression_list>(expr); + assert (s); + return s->position(); + } + + break;case id_expression: { + auto const& s = std::get<id_expression>(expr); + assert (s); + return s->position(); + } + + break;case declaration: { + auto const& s = std::get<declaration>(expr); + assert (s); + return s->position(); + } + + break;case inspect: { + auto const& i = std::get<inspect>(expr); + assert (i); + return i->position(); + } + + break;case literal: { + auto const& i = std::get<literal>(expr); + assert (i); + return i->position(); + } + + break;default: + assert (!"illegal primary_expression_node state"); + return { 0, 0 }; + } +} + + +auto primary_expression_node::visit(auto& v, int depth) + -> void +{ + v.start(*this, depth); + try_visit<identifier >(expr, v, depth); + try_visit<expression_list>(expr, v, depth); + try_visit<id_expression >(expr, v, depth); + try_visit<declaration >(expr, v, depth); + try_visit<inspect >(expr, v, depth); + try_visit<literal >(expr, v, depth); + v.end(*this, depth); +} + + +struct next_expression_tag { }; +struct loop_body_tag { token const* identifier; }; + +auto iteration_statement_node::visit(auto& v, int depth) + -> void +{ + v.start(*this, depth); + if (label) { + label->visit(v, depth+1); + } + if (identifier) { + identifier->visit(v, depth+1); + } + if (statements) { + statements->visit(v, depth+1); + } + if (next_expression) { + v.start(next_expression_tag{}, depth); + next_expression->visit(v, depth+1); + v.end(next_expression_tag{}, depth); + } + if (condition) { + assert(!range && !body); + condition->visit(v, depth+1); + } + else { + assert(range && parameter && body); + range->visit(v, depth+1); + v.start(loop_body_tag{identifier}, depth); + parameter->visit(v, depth+1); + body->visit(v, depth+1); + } + v.end(*this, depth); +} + + +auto statement_node::position() const + -> source_position +{ + switch (statement.index()) + { + break;case expression: { + auto const& s = std::get<expression>(statement); + assert (s); + return s->position(); + } + + break;case compound: { + auto const& s = std::get<compound>(statement); + assert (s); + return s->position(); + } + + break;case selection: { + auto const& s = std::get<selection>(statement); + assert (s); + return s->position(); + } + + break;case declaration: { + auto const& s = std::get<declaration>(statement); + assert (s); + return s->position(); + } + + break;case return_: { + auto const& s = std::get<return_>(statement); + assert (s); + return s->position(); + } + + break;case iteration: { + auto const& s = std::get<iteration>(statement); + assert (s); + return s->position(); + } + + break;case using_: { + auto const& s = std::get<using_>(statement); + assert (s); + return s->position(); + } + + break;case contract: { + auto const& s = std::get<contract>(statement); + assert (s); + return s->position(); + } + + break;case inspect: { + auto const& s = std::get<inspect>(statement); + assert (s); + return s->position(); + } + + break;case jump: { + auto const& s = std::get<jump>(statement); + assert (s); + return s->position(); + } + + break;default: + assert (!"illegal statement_node state"); + return { 0, 0 }; + } +} + + +auto parameter_declaration_node::position() const + -> source_position +{ + assert (declaration); + return pos; +} + + +auto parameter_declaration_node::visit(auto& v, int depth) + -> void +{ + v.start(*this, depth); + assert(declaration); + declaration->visit(v, depth + 1); + v.end(*this, depth); +} + + +struct translation_unit_node +{ + std::vector< std::unique_ptr<declaration_node> > declarations; + + auto position() const -> source_position + { + if (std::ssize(declarations) > 0) { + return declarations.front()->position(); + } + return {}; + } + + auto visit(auto& v, int depth) -> void + { + v.start(*this, depth); + for (auto const& x : declarations) { + assert(x); + x->visit(v, depth + 1); + } + v.end(*this, depth); + } +}; + + +//----------------------------------------------------------------------- +// +// pretty_print_visualize: pretty-prints Cpp2 ASTs +// +//----------------------------------------------------------------------- +// +auto pretty_print_visualize(token const& n, int indent) + -> std::string; +auto pretty_print_visualize(primary_expression_node const& n, int indent) + -> std::string; +auto pretty_print_visualize(literal_node const& n, int indent) + -> std::string; +auto pretty_print_visualize(prefix_expression_node const& n, int indent) + -> std::string; +template< + String Name, + typename Term +> +auto pretty_print_visualize(binary_expression_node<Name,Term> const& n, int indent) + -> std::string; +auto pretty_print_visualize(expression_node const& n, int indent) + -> std::string; +auto pretty_print_visualize(expression_list_node const& n, int indent) + -> std::string; +auto pretty_print_visualize(expression_statement_node const& n, int indent) + -> std::string; +auto pretty_print_visualize(postfix_expression_node const& n, int indent) + -> std::string; +auto pretty_print_visualize(unqualified_id_node const& n, int indent) + -> std::string; +auto pretty_print_visualize(qualified_id_node const& n, int indent) + -> std::string; +auto pretty_print_visualize(type_id_node const& n, int indent) + -> std::string; +auto pretty_print_visualize(is_as_expression_node const& n, int indent) + -> std::string; +auto pretty_print_visualize(id_expression_node const& n, int indent) + -> std::string; +auto pretty_print_visualize(compound_statement_node const& n, int indent) + -> std::string; +auto pretty_print_visualize(selection_statement_node const& n, int indent) + -> std::string; +auto pretty_print_visualize(iteration_statement_node const& n, int indent) + -> std::string; +auto pretty_print_visualize(return_statement_node const& n, int indent) + -> std::string; +auto pretty_print_visualize(alternative_node const& n, int indent) + -> std::string; +auto pretty_print_visualize(inspect_expression_node const& n, int indent) + -> std::string; +auto pretty_print_visualize(contract_node const& n, int indent) + -> std::string; +auto pretty_print_visualize(jump_statement_node const& n, int indent) + -> std::string; +auto pretty_print_visualize(using_statement_node const& n, int indent) + -> std::string; +auto pretty_print_visualize(statement_node const& n, int indent) + -> std::string; +auto pretty_print_visualize(parameter_declaration_node const& n, int indent, bool is_template_param = false) + -> std::string; +auto pretty_print_visualize(parameter_declaration_list_node const& n, int indent, bool is_template_param_list = false) + -> std::string; +auto pretty_print_visualize(function_type_node const& n, int indent) + -> std::string; +auto pretty_print_visualize(type_node const& n, int indent) + -> std::string; +auto pretty_print_visualize(namespace_node const& n, int indent) + -> std::string; +auto pretty_print_visualize(declaration_node const& n, int indent, bool include_metafunctions_list = false) + -> std::string; + + + +//----------------------------------------------------------------------- +// pre: Get an indentation prefix +// +inline static int indent_spaces = 2; +inline static std::string indent_str = std::string( 1024, ' ' ); // "1K should be enough for everyone" + +auto pre(int indent) + -> std::string_view +{ + assert (indent >= 0); + return { + indent_str.c_str(), + as<size_t>( std::min( indent*indent_spaces, _as<int>(std::ssize(indent_str))) ) + }; +} + + +//----------------------------------------------------------------------- +// try_pretty_print_visualize +// +// Helper to emit whatever is in a variant where each +// alternative is a smart pointer +// +template <int I> +auto try_pretty_print_visualize( + auto& v, + auto&&... more +) + -> std::string +{ + if (v.index() == I) { + auto const& alt = std::get<I>(v); + assert (alt); + return pretty_print_visualize (*alt, CPP2_FORWARD(more)...); + } + return ""; +} + + +auto pretty_print_visualize(token const& t, int) + -> std::string +{ + return t.to_string(); +} + + +auto pretty_print_visualize(primary_expression_node const& n, int indent) + -> std::string +{ + auto ret = std::string{}; + + ret += try_pretty_print_visualize<primary_expression_node::identifier >(n.expr, indent); + ret += try_pretty_print_visualize<primary_expression_node::expression_list>(n.expr, indent); + ret += try_pretty_print_visualize<primary_expression_node::id_expression >(n.expr, indent); + ret += try_pretty_print_visualize<primary_expression_node::declaration >(n.expr, indent); + ret += try_pretty_print_visualize<primary_expression_node::inspect >(n.expr, indent); + ret += try_pretty_print_visualize<primary_expression_node::literal >(n.expr, indent); + + return ret; +} + + +auto pretty_print_visualize(literal_node const& n, int) + -> std::string +{ + // TODO: This is an initial visualizer implementation, and still + // skips a few rarer things (such as raw string literals) + + assert(n.literal); + + auto ret = n.literal->to_string(); + + if (n.user_defined_suffix) { + ret += n.user_defined_suffix->as_string_view(); + } + + return ret; +} + + +auto pretty_print_visualize(prefix_expression_node const& n, int indent) + -> std::string +{ + assert(n.expr); + + auto ret = std::string{}; + + for (auto& op : n.ops) { + assert(op); + ret += op->as_string_view(); + } + + ret += pretty_print_visualize(*n.expr, indent); + + return ret; +} + + +template< + String Name, + typename Term +> +auto pretty_print_visualize(binary_expression_node<Name,Term> const& n, int indent) + -> std::string +{ + assert(n.expr); + + auto ret = pretty_print_visualize(*n.expr, indent); + for (auto& term : n.terms) { + assert(term.op && term.expr); + ret += " " + term.op->to_string() + + " " + pretty_print_visualize(*term.expr, indent); + } + return ret; +} + + +auto pretty_print_visualize(expression_node const& n, int indent) + -> std::string +{ + assert(n.expr); + return pretty_print_visualize(*n.expr, indent); +} + + +auto pretty_print_visualize(expression_list_node const& n, int indent) + -> std::string +{ + assert(n.open_paren && n.close_paren); + + auto ret = n.open_paren->to_string(); + + for (auto i = 0; auto& expr : n.expressions) { + assert(expr.expr); + if ( + expr.pass == passing_style::out + || expr.pass == passing_style::move + || expr.pass == passing_style::forward + ) + { + ret += to_string_view(expr.pass) + std::string{" "}; + } + ret += pretty_print_visualize(*expr.expr, indent); + if (++i < std::ssize(n.expressions)) { + ret += ", "; + } + } + + ret += n.close_paren->as_string_view(); + + return ret; +} + + +auto pretty_print_visualize(expression_statement_node const& n, int indent) + -> std::string +{ + assert(n.expr); + + auto ret = pretty_print_visualize(*n.expr, indent); + + if (n.has_semicolon && ret.back() != ';') { + ret += ";"; + } + + return ret; +} + + +auto pretty_print_visualize(postfix_expression_node const& n, int indent) + -> std::string +{ + assert(n.expr); + + auto ret = pretty_print_visualize(*n.expr, indent); + + for (auto& op : n.ops) + { + assert(op.op); + if (op.expr_list) { + assert (op.op_close); + ret += pretty_print_visualize(*op.expr_list, indent); + } + else { + ret += op.op->as_string_view(); + if (op.id_expr) { + ret += pretty_print_visualize(*op.id_expr, indent); + } + } + } + + return ret; +} + + +auto pretty_print_visualize(unqualified_id_node const& n, int indent) + -> std::string +{ + assert(n.identifier); + + auto ret = n.identifier->to_string(); + + if (n.open_angle != source_position{}) + { + ret += "<"; + for (bool first = true; auto& arg : n.template_args) + { + if (!first) { + ret += ", "; + } + first = false; + ret += try_pretty_print_visualize<template_argument::expression>(arg.arg, indent); + ret += try_pretty_print_visualize<template_argument::type_id >(arg.arg, indent); + } + ret += ">"; + } + + return ret; +} + + +auto pretty_print_visualize(qualified_id_node const& n, int indent) + -> std::string +{ + auto ret = std::string{}; + + for (auto& id : n.ids) { + if (id.scope_op) { ret += id.scope_op->as_string_view(); } + assert (id.id); + ret += pretty_print_visualize(*id.id, indent); + } + + return ret; +} + + +auto pretty_print_visualize(type_id_node const& n, int indent) + -> std::string +{ + auto ret = std::string{}; + + for (auto& qual : n.pc_qualifiers) { + assert(qual); + ret += qual->as_string_view(); + ret += " "; + } + + if (n.id.index() == type_id_node::empty) { ret += "_"; } + ret += try_pretty_print_visualize<type_id_node::qualified >(n.id, indent); + ret += try_pretty_print_visualize<type_id_node::unqualified>(n.id, indent); + ret += try_pretty_print_visualize<type_id_node::keyword >(n.id, indent); + + return ret; +} + + +auto pretty_print_visualize(is_as_expression_node const& n, int indent) + -> std::string +{ + assert (n.expr); + + auto ret = pretty_print_visualize(*n.expr, indent); + + for (auto& op : n.ops) { + if (op.op) { ret += " " + op.op->to_string() + " "; } + if (op.type) { ret += pretty_print_visualize(*op.type, indent); } + if (op.expr) { ret += pretty_print_visualize(*op.expr, indent); } + } + + return ret; +} + + +auto pretty_print_visualize(id_expression_node const& n, int indent) + -> std::string +{ + auto ret = std::string{}; + + ret += try_pretty_print_visualize<id_expression_node::qualified >(n.id, indent); + ret += try_pretty_print_visualize<id_expression_node::unqualified>(n.id, indent); + + return ret; +} + + +auto pretty_print_visualize(compound_statement_node const& n, int indent) + -> std::string +{ + auto ret = std::string{"\n"} + pre(indent) + "{"; + + for (auto& stmt : n.statements) { + assert (stmt); + ret += pretty_print_visualize(*stmt, indent+1); + } + + ret += std::string{"\n"} + pre(indent) + "}"; + + return ret; +} + + +auto pretty_print_visualize(selection_statement_node const& n, int indent) + -> std::string +{ + assert (n.identifier && n.expression && n.true_branch && n.false_branch); + + auto ret = std::string{}; + + ret += std::string{"\n"} + pre(indent) + n.identifier->as_string_view() + " "; + + if (n.is_constexpr) { + ret += "constexpr "; + } + + ret += pretty_print_visualize(*n.expression, indent) + + pretty_print_visualize(*n.true_branch, indent); + + if (n.has_source_false_branch) { + ret += std::string{"\n"} + pre(indent) + "else " + + pretty_print_visualize(*n.false_branch, indent); + } + + return ret; +} + + +auto pretty_print_visualize(iteration_statement_node const& n, int indent) + -> std::string +{ + // First compute the common parts + + auto next_expr = std::string{}; + if (n.next_expression) { + next_expr += std::string{"\n"} + pre(indent) + "next " + pretty_print_visualize(*n.next_expression, indent); + } + + auto stmts = std::string{}; + if (n.statements) { + stmts += pretty_print_visualize(*n.statements, indent+1); + } + + // Then slot them in where appropriate + + auto ret = std::string{}; + assert (n.identifier); + + ret += std::string{"\n"} + pre(indent); + if (n.label) { + ret += n.label->to_string() + + ": "; + } + + if (*n.identifier == "while") { + assert (n.condition); + ret += "while " + + pretty_print_visualize(*n.condition, indent) + next_expr + stmts; + } + else if (*n.identifier == "do") { + assert (n.condition); + ret += "do " + + stmts + + next_expr + + "\n" + pre(indent) + "while " + + pretty_print_visualize(*n.condition, indent); + if (ret.back() != ';') { + ret += ";"; + } + } + else { + assert (n.range && n.parameter && n.body); + ret += "for " + + pretty_print_visualize(*n.range, indent) + + next_expr + + "\n" + pre(indent) + "do (" + pretty_print_visualize(*n.parameter, indent + 1) + ")" + + pretty_print_visualize(*n.body, indent+1); + } + + return ret; +} + + +auto pretty_print_visualize(return_statement_node const& n, int indent) + -> std::string +{ + auto ret = std::string{"\n"} + pre(indent) + "return"; + + if (n.expression) { + ret += " " + pretty_print_visualize(*n.expression, indent); + } + + if (ret.back() != ';') { + ret += ";"; + } + + return ret; +} + + +auto pretty_print_visualize(alternative_node const& n, int indent) + -> std::string +{ + auto ret = std::string{}; + assert (n.is_as_keyword); + ret += std::string{"\n"} + pre(indent); + if (n.name) { + ret += pretty_print_visualize(*n.name, indent) + ": "; + } + ret += n.is_as_keyword->as_string_view(); + if (n.type_id) { + ret += " " + pretty_print_visualize(*n.type_id, indent); + } + if (n.value) { + ret += " " + pretty_print_visualize(*n.value, indent); + } + ret += " = " + pretty_print_visualize(*n.statement, indent+1); + return ret; +} + + +auto pretty_print_visualize(inspect_expression_node const& n, int indent) + -> std::string +{ + assert (n.expression); + + auto ret = std::string{"inspect"}; + + if (n.is_constexpr) { + ret += " constexpr"; + } + + ret += " " + pretty_print_visualize(*n.expression, indent); + + if (n.result_type) { + ret += " -> " + pretty_print_visualize(*n.result_type, indent); + } + + ret += " {"; + + for (auto& alt : n.alternatives) { + assert(alt); + ret += pretty_print_visualize(*alt, indent+1); + } + + ret += std::string{"\n"} + pre(indent) + "}"; + + return ret; +} + + +auto pretty_print_visualize(contract_node const& n, int indent) + -> std::string +{ + assert (n.kind && n.condition); + + auto ret = std::string{"\n"} + pre(indent) + n.kind->as_string_view(); + + if (n.group) { + ret += "<" + pretty_print_visualize(*n.group, indent); + for (auto const& flag : n.flags) { + ret += "," + pretty_print_visualize(*flag, indent); + } + ret += ">"; + } + + ret += "( " + pretty_print_visualize(*n.condition, indent); + + if (n.message) { + ret += ", " + pretty_print_visualize(*n.message, indent); + } + + ret += " )"; + + if (*n.kind == "assert" && ret.back() != ';') { + ret += ";"; + } + + return ret; +} + + +auto pretty_print_visualize(jump_statement_node const& n, int indent) + -> std::string +{ + assert (n.keyword); + + auto ret = std::string{"\n"} + pre(indent) + n.keyword->as_string_view(); + + if (n.label) { + ret += " " + n.label->to_string(); + } + + if (ret.back() != ';') { + ret += ";"; + } + + return ret; +} + + +auto pretty_print_visualize(using_statement_node const& n, int indent) + -> std::string +{ + assert (n.keyword); + + auto ret = std::string{"\n"} + pre(indent) + n.keyword->as_string_view() + " "; + + if (n.for_namespace) { + ret += "namespace "; + } + + ret += pretty_print_visualize(*n.id, indent); + if (ret.back() != ';') { + ret += ";"; + } + + return ret; +} + + +auto pretty_print_visualize(statement_node const& n, int indent) + -> std::string +{ + auto ret = std::string{}; + + if (n.is_expression()) + { + if (n.compound_parent) { + ret += std::string{"\n"} + pre(indent); + } + auto& expr = std::get<statement_node::expression>(n.statement); + assert (expr); + ret += pretty_print_visualize(*expr, indent); + } + else + { + if (n.parameters) { + ret += std::string{"\n"} + pre(indent) + pretty_print_visualize(*n.parameters, indent); + } + + ret += try_pretty_print_visualize<statement_node::compound >(n.statement, indent); + ret += try_pretty_print_visualize<statement_node::selection >(n.statement, indent); + ret += try_pretty_print_visualize<statement_node::declaration>(n.statement, indent); + ret += try_pretty_print_visualize<statement_node::return_ >(n.statement, indent); + ret += try_pretty_print_visualize<statement_node::iteration >(n.statement, indent); + ret += try_pretty_print_visualize<statement_node::using_ >(n.statement, indent); + ret += try_pretty_print_visualize<statement_node::contract >(n.statement, indent); + ret += try_pretty_print_visualize<statement_node::inspect >(n.statement, indent); + ret += try_pretty_print_visualize<statement_node::jump >(n.statement, indent); + } + + return ret; +} + + +auto pretty_print_visualize(parameter_declaration_node const& n, int indent, bool is_template_param_list /* = false */ ) + -> std::string +{ + assert (n.declaration); + + auto ret = std::string{}; + + if (!is_template_param_list) { + switch (n.mod) { + break;case parameter_declaration_node::modifier::implicit : ret += "implicit "; + break;case parameter_declaration_node::modifier::virtual_ : ret += "virtual "; + break;case parameter_declaration_node::modifier::override_: ret += "override "; + break;case parameter_declaration_node::modifier::final_ : ret += "final "; + break;default: ; // none + } + + ret += to_string_view(n.pass); + ret += " "; + } + + ret += pretty_print_visualize(*n.declaration, indent); + + return ret; +} + + +auto pretty_print_visualize(parameter_declaration_list_node const& n, int indent, bool is_template_param_list /* = false */) + -> std::string +{ + assert(n.open_paren && n.close_paren); + + auto ret = n.open_paren->to_string(); + + auto space = std::string{}; + if (std::ssize(n.parameters) > 1) { + space += std::string{"\n"} + pre(indent+1); + } + + for (auto i = 0; auto& param : n.parameters) { + ret += space + pretty_print_visualize(*param, indent+1, is_template_param_list); + if (++i < std::ssize(n.parameters)) { + ret += ", "; + } + } + + if (std::ssize(n.parameters) > 1) { + ret += std::string{"\n"} + pre(indent); + } + ret += n.close_paren->to_string(); + + return ret; +} + + +auto pretty_print_visualize(function_type_node const& n, int indent) + -> std::string +{ + assert (n.parameters); + + auto ret = pretty_print_visualize(*n.parameters, indent); + + if (n.throws) { + ret += " throws"; + } + + if (n.has_non_void_return_type()) { + ret += " -> "; + ret += try_pretty_print_visualize<function_type_node::list>(n.returns, indent+1); + if (n.returns.index() == function_type_node::id) { + auto& single = std::get<function_type_node::id>(n.returns); + ret += to_string_view(single.pass) + + std::string{" "} + pretty_print_visualize(*single.type, indent+1); + } + } + + for (auto& contract: n.contracts) { + assert(contract); + ret += pretty_print_visualize(*contract, indent+1); + } + + return ret; +} + + +auto pretty_print_visualize(type_node const& n) + -> std::string +{ + assert (n.type); + + auto ret = std::string{}; + + if (n.final) { + ret += "final "; + } + + ret += "type"; + + return ret; +} + + +auto pretty_print_visualize(namespace_node const&) + -> std::string +{ + return "namespace"; +} + + +auto pretty_print_visualize(declaration_node const& n, int indent, bool include_metafunctions_list /* = false */ ) + -> std::string +{ + indent_spaces = 4; + + // First compute the common parts + + auto metafunctions = std::string{}; + if (include_metafunctions_list) { + for (auto& meta : n.metafunctions) { + metafunctions += " @" + pretty_print_visualize(*meta, indent); + } + } + + auto template_params = std::string{}; + if (n.template_parameters) { + template_params += " " + pretty_print_visualize(*n.template_parameters, indent + 1, true); + } + + auto requires_clause = std::string{}; + if (n.requires_clause_expression) { + requires_clause += " requires (" + pretty_print_visualize(*n.requires_clause_expression, indent) + ")"; + } + + auto initializer = std::string{}; + if (n.initializer) { + auto adjusted_indent = indent; + if (!n.name()) { + ++adjusted_indent; + } + initializer = " ="; + if (n.is_function() && n.is_constexpr) { + initializer += "="; + } + initializer += " " + pretty_print_visualize(*n.initializer, adjusted_indent); + if (initializer.ends_with(";;")) { + initializer.pop_back(); + } + } + else if (!n.is_parameter) { + initializer = ";"; + } + + // Then slot them in where appropriate + + auto ret = std::string{""}; + + // Add an extra newline for spacing, unless this declaration + // is within a function body or is the first member of a type + if ( + !n.parent_is_function() + && !n.parent_is_object() + && !n.is_parameter + ) + { + static declaration_node const* last_parent_type = {}; + if (n.parent_is_type()) { + if (last_parent_type != n.get_parent()) { + last_parent_type = n.get_parent(); + } + else { + ret += "\n"; + } + } + else { + ret += "\n"; + } + } + if (!n.is_parameter && n.name()) { + ret += std::string{"\n"} + pre(indent); + } + + switch (n.access) { + break;case accessibility::public_ : ret += "public "; + break;case accessibility::protected_ : ret += "protected "; + break;case accessibility::private_ : ret += "private "; + break;default: ; // default accessibility + } + + if (n.identifier) { + ret += pretty_print_visualize(*n.identifier, indent); + } + + if (n.is_parameter && (n.has_name("this") || n.has_name("that"))) { + return ret; + } + + if (n.is_variadic) { + ret += "..."; + } + + ret += ":"; + + if (n.is_function()) { + auto& func = std::get<declaration_node::a_function>(n.type); + assert(func); + ret += metafunctions + + template_params + + pretty_print_visualize(*func, indent) + + requires_clause + + initializer; + } + else if (n.is_object()) { + auto& type_id = std::get<declaration_node::an_object>(n.type); + assert(type_id); + ret += metafunctions + + template_params; + if (!n.has_wildcard_type()) { + ret += " " + pretty_print_visualize(*type_id, indent); + } + ret += requires_clause + + initializer; + } + else if (n.is_type()) { + auto& t = std::get<declaration_node::a_type>(n.type); + assert(t); + ret += metafunctions + + template_params + + " " + pretty_print_visualize(*t) + + initializer; + } + else if (n.is_namespace()) { + auto& t = std::get<declaration_node::a_type>(n.type); + assert(t); + ret += "namespace = " + + initializer; + } + else if (n.is_alias()) { + auto& a = std::get<declaration_node::an_alias>(n.type); + assert(a); + + auto object_type_id = std::string{}; + if (a->type_id) { + object_type_id += " " + pretty_print_visualize(*a->type_id, indent); + } + + ret += template_params; + if (a->is_type_alias()) { + auto& t = std::get<alias_node::a_type>(a->initializer); + ret += " type" + + requires_clause + + " == " + + pretty_print_visualize(*t, indent); + if (ret.back() != ';') { + ret += ";"; + } + } + else if (a->is_namespace_alias()) { + auto& id = std::get<alias_node::a_namespace>(a->initializer); + assert(id); + ret += " namespace == " + + pretty_print_visualize(*id, indent); + if (ret.back() != ';') { + ret += ";"; + } + } + else if (a->is_object_alias()) { + auto& expr = std::get<alias_node::an_object>(a->initializer); + assert(expr); + ret += object_type_id + + requires_clause + + " == " + + pretty_print_visualize(*expr, indent); + if (ret.back() != ';') { + ret += ";"; + } + } + } + + return ret; +} + + +auto pretty_print_visualize(translation_unit_node const& n) + -> std::string +{ + auto ret = std::string{}; + + for (auto& decl : n.declarations) { + assert(decl); + ret += pretty_print_visualize(*decl, 0); + } + + return ret; +} + + +//----------------------------------------------------------------------- +// +// parser: parses a section of Cpp2 code +// +//----------------------------------------------------------------------- +// +class parser +{ + std::vector<error_entry>& errors; + + std::unique_ptr<translation_unit_node> parse_tree = {}; + + // Keep a stack of current capture groups (contracts/decls still being parsed) + std::vector<capture_group*> current_capture_groups = {}; + + struct capture_groups_stack_guard + { + parser* pars; + + capture_groups_stack_guard(parser* p, capture_group* cg) + : pars{ p } + { + assert(p); + assert(cg); + pars->current_capture_groups.push_back(cg); + } + + ~capture_groups_stack_guard() + { + pars->current_capture_groups.pop_back(); + } + }; + + // Keep a stack of currently active declarations (still being parsed) + std::vector<declaration_node*> current_declarations = { nullptr }; + + struct current_declarations_stack_guard + { + parser* pars; + + current_declarations_stack_guard(parser* p, declaration_node* decl) + : pars{ p } + { + assert(p); + assert(decl); + pars->current_declarations.push_back(decl); + } + + ~current_declarations_stack_guard() + { + pars->current_declarations.pop_back(); + } + }; + + std::vector<token> const* tokens = {}; + std::deque<token>* generated_tokens = {}; + int pos = 0; + std::string parse_kind = {}; + + // Keep track of the function bodies' locations - used to emit comments + // in the right pass (decide whether it's a comment that belongs with + // the declaration or is part of the definition) + struct function_body_extent { + lineno_t first; + lineno_t last; + auto operator<=>(function_body_extent const&) const = default; + auto operator<=>(int i) const { return first <=> i; } + + function_body_extent( lineno_t f, lineno_t l ): first{f}, last{l} { } + }; + mutable std::vector<function_body_extent> function_body_extents; + mutable bool is_function_body_extents_sorted = false; + +public: + auto is_within_function_body(source_position p) const + { + // Short circuit the empty case, so that the rest of the function + // can unconditionally decrement any non-.begin() iterator once + if (function_body_extents.empty()) { + return false; + } + + // Ensure we are sorted + if (!is_function_body_extents_sorted) { + std::sort( + function_body_extents.begin(), + function_body_extents.end() + ); + is_function_body_extents_sorted = true; + } + + // Find the first entry that is beyond pos, and back up one to + // the last that could be a match; this also ensures iter is + // dereferenceable, not .end() + auto iter = std::lower_bound( + function_body_extents.begin(), + function_body_extents.end(), + p.lineno+1 + ); + if (iter != function_body_extents.begin()) { + --iter; + } + + // Now go backwards through the preceding entries until + // one includes pos or we move before pos + while ( + iter->first <= p.lineno + ) + { + if ( + iter->first <= p.lineno + && p.lineno <= iter->last + ) + { + return true; + } + if (iter == function_body_extents.begin()) { + break; + } + --iter; + } + return false; + } + + +public: + //----------------------------------------------------------------------- + // Constructors - the copy constructor constructs a new instance with + // the same errors reference but otherwise a clean slate + // + // errors error list + // + parser( std::vector<error_entry>& errors_ ) + : errors{ errors_ } + , parse_tree{std::make_unique<translation_unit_node>()} + { } + + parser( parser const& that ) + : errors{ that.errors } + , parse_tree{std::make_unique<translation_unit_node>()} + { } + + + //----------------------------------------------------------------------- + // parse + // + // tokens input tokens for this section of Cpp2 source code + // generated_tokens a shared place to store generated tokens + // + // Each call parses this section's worth of tokens and adds the + // result to the stored parse tree. Call this repeatedly for the Cpp2 + // sections in a TU to build the whole TU's parse tree + // + auto parse( + std::vector<token> const& tokens_, + std::deque<token>& generated_tokens_ + ) + -> bool + { + parse_kind = "source file"; + + // Set per-parse state for the duration of this call + tokens = &tokens_; + generated_tokens = &generated_tokens_; + + // Generate parse tree for this section as if a standalone TU + pos = 0; + auto tu = translation_unit(); + + // Then add it to the complete parse tree + parse_tree->declarations.insert( + parse_tree->declarations.end(), + std::make_move_iterator(tu->declarations.begin()), + std::make_move_iterator(tu->declarations.end()) + ); + if (!done()) { + error("unexpected text at end of Cpp2 code section", true, {}, true); + return false; + } + return true; + } + + + //----------------------------------------------------------------------- + // parse_one_statement + // + // tokens input tokens for this section of Cpp2 source code + // generated_tokens a shared place to store generated tokens + // + // Each call parses one statement and returns its parse tree. + // + auto parse_one_declaration( + std::vector<token> const& tokens_, + std::deque<token>& generated_tokens_ + ) + -> std::unique_ptr<statement_node> + { + parse_kind = "source string during code generation"; + + // Set per-parse state for the duration of this call + tokens = &tokens_; + generated_tokens = &generated_tokens_; + + try { + // Parse one declaration - we succeed if the parse succeeded, + // and there were no new errors, and all tokens were consumed + auto errors_size = std::ssize(errors); + pos = 0; + if (auto d = statement(); + d + && std::ssize(errors) == errors_size + && done() + ) + { + return d; + } + } + catch(std::runtime_error& e) { + error(e.what(), true, {}, true); + } + + return {}; + } + + + //----------------------------------------------------------------------- + // Get a set of pointers to just the declarations in the given token map section + // + auto get_parse_tree_declarations_in_range(std::vector<token> const& token_range) const + -> std::vector< declaration_node const* > + { + assert (parse_tree); + assert (!token_range.empty()); + auto first_line = token_range.front().position().lineno; + auto last_line = token_range.back().position().lineno; + + auto ret = std::vector< declaration_node const* >{}; + for (auto& decl : parse_tree->declarations) + { + assert(decl); + + // The grammar and the tokens are in lineno order, so we don't + // need to look further once we pass the last lineno + if (decl->position().lineno > last_line) { + break; + } + if (decl->position().lineno >= first_line) { + ret.push_back( decl.get() ); + } + } + + return ret; + } + + + //----------------------------------------------------------------------- + // visit + // + auto visit(auto& v) -> void + { + parse_tree->visit(v, 0); + } + +private: + //----------------------------------------------------------------------- + // Error reporting: Fed into the supplied this->errors object + // + // msg message to be printed + // + // include_curr_token in this file (during parsing), we normally want + // to show the current token as the unexpected text + // we encountered, but some sema rules are applied + // early during parsing and for those it doesn't + // make sense to show the next token (e.g., when + // we detect and reject a "std::move" qualified-id, + // it's not relevant to add "at LeftParen: (" + // just because ( happens to be the next token) + // + auto error( + char const* msg, + bool include_curr_token = true, + source_position err_pos = {}, + bool fallback = false + ) const + -> void + { + auto m = std::string{msg}; + auto i = done() ? -1 : 0; + assert (peek(i)); + if (include_curr_token) { + m += std::string(" (at '") + peek(i)->to_string() + "')"; + } + if ( + err_pos == source_position{} + ) { + err_pos = peek(i)->position(); + } + errors.emplace_back( err_pos, m, false, fallback ); + } + + auto error( + std::string const& msg, + bool include_curr_token = true, + source_position err_pos = {}, + bool fallback = false + ) const + -> void + { + error( msg.c_str(), include_curr_token, err_pos, fallback ); + } + + bool has_error() { + return !errors.empty(); + } + + + //----------------------------------------------------------------------- + // Token navigation: Only these functions should access this->token_ + // + auto curr() const + -> token const& + { + if (done()) { + throw std::runtime_error("unexpected end of " + parse_kind); + } + + return (*tokens)[pos]; + } + + auto peek(int num) const + -> token const* + { + assert (tokens); + if ( + pos + num >= 0 + && pos + num < std::ssize(*tokens) + ) + { + return &(*tokens)[pos + num]; + } + return {}; + } + + auto done() const + -> bool + { + assert (tokens); + assert (pos <= std::ssize(*tokens)); + return pos == std::ssize(*tokens); + } + + auto next(int num = 1) + -> void + { + assert (tokens); + pos = std::min( pos+num, _as<int>(std::ssize(*tokens)) ); + } + + + //----------------------------------------------------------------------- + // Parsers for unary expressions + // + + //G primary-expression: + //G inspect-expression + //G id-expression + //G literal + //G '(' expression-list ')' + //GT '{' expression-list '}' + //G unnamed-declaration + //G + auto primary_expression() + -> std::unique_ptr<primary_expression_node> + { + auto n = std::make_unique<primary_expression_node>(); + + if (auto inspect = inspect_expression(true)) + { + n->expr = std::move(inspect); + return n; + } + + if (auto id = id_expression()) { + n->expr = std::move(id); + return n; + } + + if (auto lit = literal()) { + n->expr = std::move(lit); + return n; + } + + if (curr().type() == lexeme::LeftParen + // If in the future (not now) we decide to allow braced-expressions + // || curr().type() == lexeme::LeftBrace + ) + { + bool inside_initializer = ( + peek(-1) && peek(-1)->type() == lexeme::Assignment + ); + auto open_paren = &curr(); + auto close = close_paren_type(open_paren->type()); + auto close_text = [&] () -> std::string { if (close == lexeme::RightParen) { return ")"; } return "}"; }(); + next(); + auto expr_list = expression_list(open_paren, inside_initializer); + if (!expr_list) { + error("unexpected text - ( is not followed by an expression-list"); + next(); + return {}; + } + if (curr().type() != close_paren_type(open_paren->type())) { + error("unexpected text - expression-list is not terminated by " + close_text); + next(); + return {}; + } + expr_list->close_paren = &curr(); + next(); + if ( + curr().type() != lexeme::Semicolon + && curr().type() != lexeme::RightParen + && curr().type() != lexeme::RightBracket + && curr().type() != lexeme::Greater + && curr().type() != lexeme::Comma + ) { + expr_list->inside_initializer = false; + } + n->expression_list_is_fold_expression = expr_list->is_fold_expression(); + n->expr = std::move(expr_list); + return n; + } + + if (auto decl = unnamed_declaration(curr().position(), false, true)) // captures are allowed + { + assert ( + !decl->has_name() + && "ICE: declaration should have been unnamed" + ); + + if (auto obj = std::get_if<declaration_node::an_object>(&decl->type)) { + if ((*obj)->is_wildcard()) { + error("an unnamed object at expression scope currently cannot have a deduced type (the reason to create an unnamed object is typically to create a temporary of a named type)"); + next(); + return {}; + } + } + else if (auto func = std::get_if<declaration_node::a_function>(&decl->type)) { + if ((*func)->returns.index() == function_type_node::list) { + error("an unnamed function at expression scope currently cannot return multiple values"); + next(); + return {}; + } + if ( // check if a single-expression function is followed by an extra second semicolon + decl->initializer && decl->initializer->is_expression() + && !done() && curr().type() == lexeme::Semicolon + ) { + error("a single-expression function should end with a single semicolon"); + } + if (!(*func)->contracts.empty()) { + error("an unnamed function at expression scope currently cannot have contracts"); + next(); + return {}; + } + } + else { + error("(temporary alpha limitation) an unnamed declaration at expression scope must be a function or an object"); + next(); + return {}; + } + + if ( + peek(-1) && peek(-1)->type() != lexeme::RightBrace // it is not a braced function expression + && curr().type() != lexeme::LeftParen // not imediatelly called + && curr().type() != lexeme::RightParen // not as a last argument to function + && curr().type() != lexeme::Comma // not as first or in-the-middle, function argument + && curr().type() != lexeme::Greater // not as the last argument to template + && curr().type() != lexeme::RightBracket // not as the last index argument + && curr() != "is" // not as the argument to is + && curr() != "as" // not as the argument to as + && curr() != "do" // not as `for`'s `next`. + ) { + // this is a fix for a short function syntax that should have double semicolon used + // (check comment in expression_statement(bool semicolon_required)) + // We simulate double semicolon by moving back to single semicolon. + next(-1); + } + + n->expr = std::move(decl); + return n; + } + + return {}; + } + + + //G postfix-expression: + //G primary-expression + //G postfix-expression postfix-operator [Note: without whitespace before the operator] + //G postfix-expression '[' expression-list? ']' + //G postfix-expression '(' expression-list? ')' + //G postfix-expression '.' id-expression + //G + auto postfix_expression() + -> std::unique_ptr<postfix_expression_node> + { + auto n = std::make_unique<postfix_expression_node>(); + n->expr = primary_expression(); + if (!(n->expr)) { + return {}; + } + + while ( + !done() + && ( + (is_postfix_operator(curr().type()) + // Postfix operators must be lexically adjacent + && curr().position().lineno == peek(-1)->position().lineno + && curr().position().colno == peek(-1)->position().colno + peek(-1)->length() + ) + || curr().type() == lexeme::LeftBracket + || curr().type() == lexeme::LeftParen + || curr().type() == lexeme::Dot + ) + ) + { + // these can't be unary operators if followed by a (, identifier, or literal + if ( + ( + curr().type() == lexeme::Multiply + || curr().type() == lexeme::Ampersand + || curr().type() == lexeme::Tilde + ) + && peek(1) + && ( + peek(1)->type() == lexeme::LeftParen + || peek(1)->type() == lexeme::Identifier + || is_literal(peek(1)->type()) + ) + ) + { + auto op = curr().to_string(); + auto msg = "postfix unary " + op; + if (curr().type() == lexeme::Multiply ) { msg += " (dereference)" ; } + else if (curr().type() == lexeme::Ampersand) { msg += " (address-of)" ; } + else if (curr().type() == lexeme::Tilde ) { msg += " (unary bit-complement)" ; } + msg += " cannot be immediately followed by a (, identifier, or literal - add whitespace before " + + op + " here if you meant binary " + op; + if (curr().type() == lexeme::Multiply ) { msg += " (multiplication)" ; } + else if (curr().type() == lexeme::Ampersand) { msg += " (bitwise and)" ; } + else if (curr().type() == lexeme::Tilde ) { msg += " (binary bit-complement)"; } + + error(msg, false); + break; + } + + if (curr().type() == lexeme::Dollar) { + // cap_grp must not already be set, or this is a multi-$ postfix-expression + if (n->cap_grp) { + error("$ (capture) can appear at most once in a single postfix-expression"); + return {}; + } + if (current_capture_groups.empty()) { + error("$ (capture) cannot appear here - it must appear in an anonymous expression function, a postcondition, or an interpolated string literal"); + return {}; + } + n->cap_grp = current_capture_groups.back(); + n->cap_grp->add(n.get()); + } + + // Remember current position, in case we need to backtrack + auto term_pos = pos; + + auto term = postfix_expression_node::term{&curr()}; + next(); + + if (term.op->type() == lexeme::LeftBracket) + { + term.expr_list = expression_list(term.op); + if (!term.expr_list) + { + error("[ is not followed by a valid expression list"); + return {}; + } + if (curr().type() != lexeme::RightBracket) + { + error("unexpected text - [ is not properly matched by ]", true, {}, true); + return {}; + } + term.expr_list->close_paren = &curr(); + term.op_close = &curr(); + next(); + } + else if (term.op->type() == lexeme::LeftParen) + { + // Next should be an expression-list followed by a ')' + // If not, then this wasn't a call expression so backtrack to + // the '(' which will be part of the next grammar production + + term.expr_list = expression_list(term.op); + if ( + term.expr_list + && curr().type() == lexeme::RightParen + ) + { + term.expr_list->close_paren = &curr(); + term.op_close = &curr(); + next(); + } + else + { + pos = term_pos; // backtrack + break; + } + } + else if (term.op->type() == lexeme::Dot) + { + term.id_expr = id_expression(); + if (!term.id_expr) { + error("'.' must be followed by a valid member name"); + return {}; + } + } + + n->ops.push_back( std::move(term) ); + } + + if (auto tok = n->expr->get_token(); + tok + && *tok == "this" + && curr().type() == lexeme::Arrow + ) + { + auto next_word = std::string{}; + if (peek(1)) { + next_word = peek(1)->to_string(); + } + error("'this' is not a pointer - write 'this." + next_word + "' instead of 'this->" + next_word + "'"); + return {}; + } + + for (auto& e : expression_node::current_expressions) { + e->num_subexpressions += std::ssize(n->ops); + } + + return n; + } + + + //G prefix-expression: + //G postfix-expression + //G prefix-operator prefix-expression + //GTODO await-expression + //GTODO 'sizeof' '(' type-id ')' + //GTODO 'sizeof' '...' ( identifier ')' + //GTODO 'alignof' '(' type-id ')' + //GTODO throws-expression + //G + auto prefix_expression() + -> std::unique_ptr<prefix_expression_node> + { + auto n = std::make_unique<prefix_expression_node>(); + for ( ; + is_prefix_operator(curr()); + next() + ) + { + n->ops.push_back(&curr()); + } + if ((n->expr = postfix_expression())) { + return n; + } + switch (curr().type()) + { + break; case lexeme::PlusPlus: + error("prefix '++var' is not valid Cpp2; use postfix 'var++' instead", false); + break; case lexeme::MinusMinus: + error("prefix '--var' is not valid Cpp2; use postfix 'var--' instead", false); + break; case lexeme::Multiply: + error("prefix '*ptr' dereference is not valid Cpp2; use postfix 'ptr*' instead", false); + break; case lexeme::Ampersand: + error("prefix '&var' address-of is not valid Cpp2; use postfix 'var&' instead", false); + break; case lexeme::Tilde: + error("prefix '~var' is not valid Cpp2; use postfix 'var~' instead", false); + break; default: ; + } + return {}; + } + + + //----------------------------------------------------------------------- + // Parsers for binary expressions + // + + // The general /*binary*/-expression: + // /*term*/-expression { { /* operators at this precedence level */ } /*term*/-expression }* + // + template< + typename Binary, + typename ValidateOp, + typename TermFunc + > + auto binary_expression( + ValidateOp validate_op, + TermFunc term + ) + -> std::unique_ptr<Binary> + { + auto n = std::make_unique<Binary>(); + if ( (n->expr = term()) ) + { + while (!done()) + { + typename Binary::term t{}; + + // Remember current position, because we may need to backtrack if this next + // t.op might be valid but isn't followed by a valid term and so isn't for us + auto term_pos = pos; + + // Most of these predicates only look at the current token and return + // true/false == whether this is a valid operator for this production + if constexpr( requires{ bool{ validate_op(curr()) }; } ) { + if (!validate_op(curr())) { + break; + } + t.op = &curr(); + next(); + } + + // But for shift-expression we may synthesize >> from > > + // which will return a token* == a valid operator for this production + // (possibly a synthesized new token) or nullptr otherwise + else if constexpr( requires{ validate_op(curr(), *peek(1)); } ) { + if ( + peek(1) == nullptr + || (t.op = validate_op(curr(), *peek(1))) == nullptr + ) + { + break; + } + // If we didn't consume the next token, we consumed the next two + if (t.op != &curr()) { + next(); + } + next(); + } + + // And it shouldn't be anything else + else { + assert (!"ICE: validate_op should take one token and return bool, or two tokens and return token const* "); + } + + // At this point we may have a valid t.op, so try to parse the next term... + // If it's not a valid term, then this t.op wasn't for us, pop it and return + // what we found (e.g., with "requires expression = {...}" the = is a grammar + // element and not an operator, it isn't and can't be part of the expression) + if ( !(t.expr = term()) ) { + pos = term_pos; // backtrack + return n; + } + + // We got a term, so this op + term was for us + n->terms.push_back( std::move(t) ); + } + return n; + } + return {}; + } + + //G multiplicative-expression: + //G is-as-expression + //G multiplicative-expression '*' is-as-expression + //G multiplicative-expression '/' is-as-expression + //G multiplicative-expression '%' is-as-expression + //G + auto multiplicative_expression() + -> auto + { + return binary_expression<multiplicative_expression_node> ( + [](token const& t){ return t.type() == lexeme::Multiply || t.type() == lexeme::Slash || t.type() == lexeme::Modulo; }, + [this]{ return is_as_expression(); } + ); + } + + //G additive-expression: + //G multiplicative-expression + //G additive-expression '+' multiplicative-expression + //G additive-expression '-' multiplicative-expression + //G + auto additive_expression() + -> auto + { + return binary_expression<additive_expression_node> ( + [](token const& t){ return t.type() == lexeme::Plus || t.type() == lexeme::Minus; }, + [this]{ return multiplicative_expression(); } + ); + } + + //G shift-expression: + //G additive-expression + //G shift-expression '<<' additive-expression + //G shift-expression '>>' additive-expression + //G + auto shift_expression(bool allow_angle_operators = true) + -> auto + { + if (allow_angle_operators) { + return binary_expression<shift_expression_node> ( + [this](token const& t, token const& next) -> token const* { + if (t.type() == lexeme::LeftShift) { + return &t; + } + if ( + t.type() == lexeme::Greater + && next.type() == lexeme::Greater + && t.position() == source_position{ next.position().lineno, next.position().colno-1 } + ) + { + generated_tokens->emplace_back( ">>", t.position(), lexeme::RightShift); + return &generated_tokens->back(); + } + return nullptr; + }, + [this]{ return additive_expression(); } + ); + } + else { + return binary_expression<shift_expression_node> ( + [](token const&, token const&) -> token const* { return nullptr; }, + [this]{ return additive_expression(); } + ); + } + } + + //G compare-expression: + //G shift-expression + //G compare-expression '<=>' shift-expression + //G + auto compare_expression(bool allow_angle_operators = true) + -> auto + { + return binary_expression<compare_expression_node> ( + [](token const& t){ return t.type() == lexeme::Spaceship; }, + [=,this]{ return shift_expression(allow_angle_operators); } + ); + } + + //G relational-expression: + //G compare-expression + //G relational-expression '<' compare-expression + //G relational-expression '>' compare-expression + //G relational-expression '<=' compare-expression + //G relational-expression '>=' compare-expression + //G + auto relational_expression(bool allow_angle_operators = true) + -> auto + { + if (allow_angle_operators) { + return binary_expression<relational_expression_node> ( + [](token const& t, token const& next) -> token const* { + if ( + t.type() == lexeme::Less + || t.type() == lexeme::LessEq + || (t.type() == lexeme::Greater && next.type() != lexeme::GreaterEq) + || t.type() == lexeme::GreaterEq + ) { + return &t; + } + return nullptr; + }, + [=,this]{ return compare_expression(allow_angle_operators); } + ); + } + else { + return binary_expression<relational_expression_node> ( + [](token const&, token const&) -> token const* { return nullptr; }, + [=,this]{ return compare_expression(allow_angle_operators); } + ); + } + } + + //G equality-expression: + //G relational-expression + //G equality-expression '==' relational-expression + //G equality-expression '!=' relational-expression + //G + auto equality_expression(bool allow_angle_operators = true, bool allow_equality = true) + -> auto + { + if (allow_equality) { + return binary_expression<equality_expression_node> ( + [](token const& t){ return t.type() == lexeme::EqualComparison || t.type() == lexeme::NotEqualComparison; }, + [=,this]{ return relational_expression(allow_angle_operators); } + ); + } + else { + return binary_expression<equality_expression_node> ( + [](token const& t){ return t.type() == lexeme::NotEqualComparison; }, + [=,this]{ return relational_expression(allow_angle_operators); } + ); + } + } + + //G bit-and-expression: + //G equality-expression + //G bit-and-expression '&' equality-expression + //G + auto bit_and_expression(bool allow_angle_operators = true, bool allow_equality = true) + -> auto + { + return binary_expression<bit_and_expression_node> ( + [](token const& t){ return t.type() == lexeme::Ampersand; }, + [=,this]{ return equality_expression(allow_angle_operators, allow_equality); } + ); + } + + //G bit-xor-expression: + //G bit-and-expression + //G bit-xor-expression '^' bit-and-expression + //G + auto bit_xor_expression(bool allow_angle_operators = true, bool allow_equality = true) + -> auto + { + return binary_expression<bit_xor_expression_node> ( + [](token const& t){ return t.type() == lexeme::Caret; }, + [=,this]{ return bit_and_expression(allow_angle_operators, allow_equality); } + ); + } + + //G bit-or-expression: + //G bit-xor-expression + //G bit-or-expression '|' bit-xor-expression + //G + auto bit_or_expression(bool allow_angle_operators = true, bool allow_equality = true) + -> auto + { + return binary_expression<bit_or_expression_node> ( + [](token const& t){ return t.type() == lexeme::Pipe; }, + [=,this]{ return bit_xor_expression(allow_angle_operators, allow_equality); } + ); + } + + //G logical-and-expression: + //G bit-or-expression + //G logical-and-expression '&&' bit-or-expression + //G + auto logical_and_expression(bool allow_angle_operators = true, bool allow_equality = true) + -> auto + { + return binary_expression<logical_and_expression_node> ( + [](token const& t){ return t.type() == lexeme::LogicalAnd; }, + [=,this]{ return bit_or_expression(allow_angle_operators, allow_equality); } + ); + } + + // constant-expression: // don't need intermediate production, just use: + // conditional-expression: // don't need intermediate production, just use: + //G logical-or-expression: + //G logical-and-expression + //G logical-or-expression '||' logical-and-expression + //G + auto logical_or_expression(bool allow_angle_operators = true, bool allow_equality = true) + -> auto + { + return binary_expression<logical_or_expression_node> ( + [](token const& t){ return t.type() == lexeme::LogicalOr; }, + [=,this]{ return logical_and_expression(allow_angle_operators, allow_equality); } + ); + } + + //G assignment-expression: + //G logical-or-expression + //G assignment-expression assignment-operator logical-or-expression + //G + auto assignment_expression( + bool allow_angle_operators = true + ) + -> std::unique_ptr<assignment_expression_node> + { + auto ret = std::unique_ptr<assignment_expression_node>{}; + + if (allow_angle_operators) + { + ret = binary_expression<assignment_expression_node> ( + [this](token const& t, token const& next) -> token const* { + if (is_assignment_operator(t.type())) { + return &t; + } + if ( + t.type() == lexeme::Greater + && next.type() == lexeme::GreaterEq + && t.position() == source_position{ next.position().lineno, next.position().colno-1 } + ) + { + generated_tokens->emplace_back( ">>=", t.position(), lexeme::RightShiftEq); + return &generated_tokens->back(); + } + return nullptr; + }, + [=,this]{ + return logical_or_expression(allow_angle_operators); + } + ); + } + else + { + ret = binary_expression<assignment_expression_node> ( + [](token const&, token const&) -> token const* { return nullptr; }, + [=,this]{ + return logical_or_expression(allow_angle_operators); + } + ); + } + + if (ret && ret->terms_size() > 1) { + error("assignment cannot be chained - instead of 'c = b = a;', write 'b = a; c = b;'", false); + return {}; + } + + return ret; + } + + //G expression: // eliminated 'condition:' - just use 'expression:' + //G assignment-expression + //GTODO try expression + //G + auto expression( + bool allow_angle_operators = true, + bool check_arrow = true + ) + -> std::unique_ptr<expression_node> + { + auto n = std::make_unique<expression_node>(); + + { + expression_node::current_expressions.push_back(n.get()); + auto guard = finally([&]{ expression_node::current_expressions.pop_back(); }); + + if (!(n->expr = assignment_expression(allow_angle_operators))) { + return {}; + } + + if ( + check_arrow + && !done() + && curr().type() == lexeme::Arrow + ) + { + error("'->' is not Cpp2 deference syntax - write '*.' instead"); + return {}; + } + } + + for (auto& e : expression_node::current_expressions) { + ++e->num_subexpressions; + } + return n; + } + + //G expression-list: + //G parameter-direction? expression + //G expression-list ',' parameter-direction? expression + //G + auto expression_list( + token const* open_paren, + bool inside_initializer = false + ) + -> std::unique_ptr<expression_list_node> + { + auto pass = passing_style::in; + auto n = std::make_unique<expression_list_node>(); + n->open_paren = open_paren; + n->inside_initializer = inside_initializer; + + if (auto dir = to_passing_style(curr()); + ( + dir == passing_style::out + || dir == passing_style::move + || dir == passing_style::forward + ) + && peek(1) + && peek(1)->type() == lexeme::Identifier + ) + { + pass = dir; + next(); + } + auto x = expression(); + + // If this is an empty expression_list, we're done + if (!x) { + return n; + } + + // Otherwise remember the first expression + n->expressions.push_back( { pass, std::move(x) } ); + // and see if there are more... + while (curr().type() == lexeme::Comma) { + next(); + pass = passing_style::in; + if (auto dir = to_passing_style(curr()); + dir == passing_style::out + || dir == passing_style::move + || dir == passing_style::forward + ) + { + pass = dir; + next(); + } + auto expr = expression(); + if (!expr) { + error("invalid text in expression list", true, {}, true); + return {}; + } + n->expressions.push_back( { pass, std::move(expr) } ); + } + return n; + } + + + //G type-id: + //G type-qualifier-seq? qualified-id + //G type-qualifier-seq? unqualified-id + //G + //G type-qualifier-seq: + //G type-qualifier + //G type-qualifier-seq type-qualifier + //G + //G type-qualifier: + //G 'const' + //G '*' + //G + auto type_id() + -> std::unique_ptr<type_id_node> + { + auto n = std::make_unique<type_id_node>(); + + while ( + (curr().type() == lexeme::Keyword && curr() == "const") + || curr().type() == lexeme::Multiply + ) + { + if ( + curr() == "const" + && !n->pc_qualifiers.empty() + && *n->pc_qualifiers.back() == "const" + ) + { + error("consecutive 'const' not allowed"); + return {}; + } + n->pc_qualifiers.push_back( &curr() ); + next(); + } + + if (auto id = qualified_id()) { + n->pos = id->position(); + n->id = std::move(id); + assert (n->id.index() == type_id_node::qualified); + } + else if (auto id = unqualified_id()) { + n->pos = id->position(); + n->id = std::move(id); + assert (n->id.index() == type_id_node::unqualified); + } + else { + if (!n->pc_qualifiers.empty()) { + error("'*'/'const' type qualifiers must be followed by a type name or '_' wildcard"); + } + return {}; + } + if (curr().type() == lexeme::Multiply) { + error("'T*' is not a valid Cpp2 type; use '*T' for a pointer instead", false); + return {}; + } + + return n; + } + + + //G is-as-expression: + //G prefix-expression + //G is-as-expression is-type-constraint + //G is-as-expression is-value-constraint + //G is-as-expression as-type-cast + //GTODO type-id is-type-constraint + //G + //G is-type-constraint + //G 'is' type-id + //G + //G is-value-constraint + //G 'is' expression + //G + //G as-type-cast + //G 'as' type-id + //G + auto is_as_expression() + -> std::unique_ptr<is_as_expression_node> + { + auto n = std::make_unique<is_as_expression_node>(); + n->expr = prefix_expression(); + if (!(n->expr)) { + return {}; + } + + auto is_found = false; + auto as_found = false; + + while ( + !done() + && (curr() == "is" || curr() == "as") + ) + { + if (curr() == "is") { + if (is_found) { + error("repeated 'is' are not allowed"); + return {}; + } + is_found = true; + } + else { + as_found = true; + } + + if (is_found && as_found) { + error("mixed 'is' and 'as' are not allowed"); + return {}; + } + + auto term = is_as_expression_node::term{}; + term.op = &curr(); + next(); + + if ((term.type = type_id()) != nullptr) { + ; + } + else if ((term.expr = expression()) != nullptr) { + ; + } + + if ( + *term.op == "as" + && term.expr + ) + { + error("'as' must be followed by a type-id, not an expression", false); + return {}; + } + if ( + !term.type + && !term.expr + ) + { + if (*term.op == "is") { + error( "'is' must be followed by a type-id or an expression", false); + } + else { + error( "'as' must be followed by a type-id", false); + } + return {}; + } + + n->ops.push_back( std::move(term) ); + } + + return n; + } + + + //G unqualified-id: + //G identifier + //G keyword + //G template-id + //GTODO operator-function-id + //G ... + //G + //G template-id: + //G identifier '<' template-argument-list? '>' + //G + //G template-argument-list: + //G template-argument-list ',' template-argument + //G + //G template-argument: + //G # note: < > << >> are not allowed in expressions until new ( is opened + //G 'const' type-id + //G expression + //G type-id + //G + auto unqualified_id() + -> std::unique_ptr<unqualified_id_node> + { + // Handle the identifier + if ( + curr().type() != lexeme::Identifier + && curr().type() != lexeme::Keyword + && curr().type() != lexeme::Cpp2FixedType + && curr().type() != lexeme::Ellipsis + ) + { + return {}; + } + + auto n = std::make_unique<unqualified_id_node>(); + + n->identifier = &curr(); + auto one_past_identifier_end_pos = curr().position(); + one_past_identifier_end_pos.colno += curr().length(); + next(); + + // Handle the template-argument-list if there is one + if ( + curr().type() == lexeme::Less + && curr().position() == one_past_identifier_end_pos + ) + { + // Remember current position, in case this < is isn't a template argument list + auto start_pos = pos; + + n->open_angle = curr().position(); + next(); + + auto term = template_argument{}; + + do { + // If it doesn't start with * or const (which can only be a type id), + // try parsing it as an expression + if (auto e = [&]{ + if ( + curr().type() == lexeme::Multiply // '*' + || curr() == "const" // 'const' + ) + { + return decltype(expression()){}; + } + return expression(false); // false == disallow unparenthesized relational comparisons in template args + }() + ) + { + term.arg = std::move(e); + } + + // Else try parsing it as a type id + else if (auto i = type_id()) { + term.arg = std::move(i); + } + + // Else if we already got at least one template-argument, this is a + // ',' followed by something that isn't a valid template-arg + else if (std::ssize(n->template_args) > 0) { + error( "expected a template argument after ','", false); + return {}; + } + + // Else this is an empty '<>' list which is okay + else { + break; + } + + n->template_args.push_back( std::move(term) ); + } + // Use the lambda trick to jam in a "next" clause + while ( + curr().type() == lexeme::Comma + && [&]{term.comma = curr().position(); next(); return true;}() + ); + // When this is rewritten in Cpp2, it will be: + // while curr().type() == lexeme::Comma + // next term.comma = curr().position(); + + if (curr().type() != lexeme::Greater) { + // Aha, this wasn't a template argument list after all, + // so back out just that part and return the identifier + n->open_angle = source_position{}; + n->template_args.clear(); + pos = start_pos; + return n; + } + n->close_angle = curr().position(); + next(); + } + + else { + if (*n->identifier == "new") { + error( "use 'new<" + curr().to_string() + ">', not 'new " + curr().to_string() + "'", false); + return {}; + } + if (*n->identifier == "co_await" || *n->identifier == "co_yield") { + error( "(temporary alpha limitation) coroutines are not yet supported in Cpp2", false); + return {}; + } + } + + return n; + } + + + //G qualified-id: + //G nested-name-specifier unqualified-id + //G member-name-specifier unqualified-id + //G + //G nested-name-specifier: + //G '::' + //G unqualified-id '::' + //G + //G member-name-specifier: + //G unqualified-id '.' + //G + auto qualified_id() + -> std::unique_ptr<qualified_id_node> + { + auto n = std::make_unique<qualified_id_node>(); + + auto term = qualified_id_node::term{nullptr}; + + // Handle initial :: if present, else the first scope_op will be null + if (curr().type() == lexeme::Scope) { + term.scope_op = &curr(); + next(); + } + + // Remember current position, because we need to look ahead to the next :: + auto start_pos = pos; + + // If we don't get a first id, or if we didn't have a leading :: and + // the next thing isn't :: or ., back out and report unsuccessful + term.id = unqualified_id(); + if ( + !term.id + || (!term.scope_op && curr().type() != lexeme::Scope) + ) + { + pos = start_pos; // backtrack + return {}; + } + + // Reject "std" :: "move" / "forward" + assert (term.id->identifier); + auto first_uid_was_std = (*term.id->identifier == "std"); + auto first_time_through_loop = true; + + n->ids.push_back( std::move(term) ); + + while (curr().type() == lexeme::Scope) + { + auto term = qualified_id_node::term{ &curr() }; + next(); + term.id = unqualified_id(); + if (!term.id) { + error("invalid text in qualified name", true, {}, true); + return {}; + } + assert (term.id->identifier); + if ( + first_time_through_loop + && first_uid_was_std + && term.scope_op->type() == lexeme::Scope + ) + { + if (*term.id->identifier == "move") { + error("std::move is not needed in Cpp2 - use 'move' parameters/arguments instead", false); + return {}; + } + else if (*term.id->identifier == "forward") { + error("std::forward is not needed in Cpp2 - use 'forward' parameters/arguments instead", false); + return {}; + } + first_time_through_loop = false; + } + n->ids.push_back( std::move(term) ); + } + + return n; + } + + + //G id-expression: + //G qualified-id + //G unqualified-id + //G + auto id_expression() + -> std::unique_ptr<id_expression_node> + { + auto n = std::make_unique<id_expression_node>(); + if (auto id = qualified_id()) { + n->pos = id->position(); + n->id = std::move(id); + assert (n->id.index() == id_expression_node::qualified); + return n; + } + if (auto id = unqualified_id()) { + n->pos = id->position(); + n->id = std::move(id); + assert (n->id.index() == id_expression_node::unqualified); + return n; + } + return {}; + } + + //G literal: + //G integer-literal ud-suffix? + //G character-literal ud-suffix? + //G floating-point-literal ud-suffix? + //G string-literal ud-suffix? + //G boolean-literal ud-suffix? + //G pointer-literal ud-suffix? + //G user-defined-literal ud-suffix? + //G + auto literal() + -> std::unique_ptr<literal_node> + { + if (is_literal(curr().type())) { + auto n = std::make_unique<literal_node>(); + n->literal = &curr(); + next(); + if (curr().type() == lexeme::UserDefinedLiteralSuffix) { + n->user_defined_suffix = &curr(); + next(); + } + return n; + } + return {}; + } + + //G expression-statement: + //G expression ';' + //G expression + //G + auto expression_statement( + bool semicolon_required + ) + -> std::unique_ptr<expression_statement_node> + { + auto n = std::make_unique<expression_statement_node>(); + + expression_statement_node::current_expression_statements.push_back(n.get()); + auto guard = finally([&]{ expression_statement_node::current_expression_statements.pop_back(); }); + + if (!(n->expr = expression(true, true))) { + return {}; + } + + if ( + semicolon_required + && (done() || curr().type() != lexeme::Semicolon) + && peek(-1)->type() != lexeme::Semicolon + // this last peek(-1)-condition is a hack (? or is it just + // maybe elegant? I'm torn) so that code like + // + // callback := :(inout x:_) = x += "suffix"; ; + // + // doesn't need the redundant semicolon at the end of a decl... + // there's probably a cleaner way to do it, but this works and + // it doesn't destabilize any regression tests + ) + { + return {}; + } + if ( + !done() + && curr().type() == lexeme::Semicolon + ) + { + n->has_semicolon = true; + next(); + } + return n; + } + + + //G selection-statement: + //G 'if' 'constexpr'? logical-or-expression compound-statement + //G 'if' 'constexpr'? logical-or-expression compound-statement 'else' compound-statement + //G + auto selection_statement() + -> std::unique_ptr<selection_statement_node> + { + if ( + curr().type() != lexeme::Keyword + || curr() != "if" + ) + { + return {}; + } + auto n = std::make_unique<selection_statement_node>(); + n->identifier = &curr(); + next(); + + if ( + curr().type() == lexeme::Keyword + && curr() == "constexpr" + ) + { + n->is_constexpr = true; + next(); + } + + if (auto e = logical_or_expression()) { + n->expression = std::move(e); + } + else { + error("invalid if condition", true, {}, true); + return {}; + } + + if (curr().type() != lexeme::LeftBrace) { + error("an if branch body must be enclosed with { }"); + return {}; + } + + if (auto s = compound_statement()) { + n->true_branch = std::move(s); + } + else { + error("invalid if branch body", true, {}, true); + return {}; + } + + if ( + curr().type() != lexeme::Keyword + || curr() != "else" + ) + { + // Add empty else branch to simplify processing elsewhere + // Note: Position (0,0) signifies it's implicit (no source location) + n->false_branch = + std::make_unique<compound_statement_node>( source_position(0,0) ); + } + else { + n->else_pos = curr().position(); + next(); + + if ( + curr().type() != lexeme::LeftBrace + && curr() != "if" + ) + { + error("an else branch body must be enclosed with { }"); + return {}; + } + + if (auto s = compound_statement( source_position{}, true )) { + n->false_branch = std::move(s); + n->has_source_false_branch = true; + } + else { + error("invalid else branch body", true, {}, true); + return {}; + } + } + + return n; + } + + + //G return-statement: + //G return expression? ';' + //G + auto return_statement() + -> std::unique_ptr<return_statement_node> + { + if ( + curr().type() != lexeme::Keyword + || curr() != "return" + ) + { + return {}; + } + + auto n = std::make_unique<return_statement_node>(); + n->identifier = &curr(); + next(); + + // If there's no optional return expression, we're done + if (curr().type() == lexeme::Semicolon) { + next(); + return n; + } + + // Handle the return expression + auto x = expression(); + if (!x) { + error("invalid return expression", true, {}, true); + return {}; + } + n->expression = std::move(x); + + // Final semicolon + if (curr().type() != lexeme::Semicolon) { + error("missing ; after return"); + next(); + return {}; + } + + next(); + return n; + } + + + //G iteration-statement: + //G label? 'while' logical-or-expression next-clause? compound-statement + //G label? 'do' compound-statement next-clause? 'while' logical-or-expression ';' + //G label? 'for' expression next-clause? 'do' unnamed-declaration + //G + //G label: + //G identifier ':' + //G + //G next-clause: + //G 'next' assignment-expression + //G + auto iteration_statement() + -> std::unique_ptr<iteration_statement_node> + { + auto n = std::make_unique<iteration_statement_node>(); + + // If the next three tokens are: + // identifier ':' 'for/while/do' + // then it's a labeled iteration statement + if ( + curr().type() == lexeme::Identifier + && peek(1) + && peek(1)->type() == lexeme::Colon + && peek(2) + && peek(2)->type() == lexeme::Keyword + && (*peek(2) == "while" || *peek(2) == "do" || *peek(2) == "for") + ) + { + n->label = &curr(); + next(); + next(); + } + + if ( + curr().type() != lexeme::Keyword + || (curr() != "while" && curr() != "do" && curr() != "for") + ) + { + return {}; + } + + n->identifier = &curr(); + next(); + + //----------------------------------------------------------------- + // We'll do these same things in different orders, + // so extract them into local functions... + auto handle_optional_next_clause = [&]() -> bool { + if (curr() != "next") { + return true; // absent next clause is okay + } + next(); // don't bother remembering "next" token, shouldn't need its position info + auto next = assignment_expression(); + if (!next) { + error("invalid expression after 'next'", true, {}, true); + return false; + } + n->next_expression = std::move(next); + return true; + }; + + auto handle_logical_expression = [&]() -> bool { + auto x = logical_or_expression(); + if (!x) { + error("a loop must have a valid conditional expression"); + return false; + } + n->condition = std::move(x); + return true; + }; + + auto handle_compound_statement = [&]() -> bool { + auto s = compound_statement(); + if (!s) { + error("invalid while loop body", true, {}, true); + return false; + } + n->statements = std::move(s); + return true; + }; + //----------------------------------------------------------------- + + // Handle "while" + // + if (*n->identifier == "while") + { + if (!handle_logical_expression ()) { return {}; } + if (!handle_optional_next_clause()) { return {}; } + if (!handle_compound_statement ()) { return {}; } + if (!done() && curr().type() == lexeme::Semicolon) { + error("a loop body may not be followed by a semicolon (empty statements are not allowed)"); + return {}; + } + return n; + } + + // Handle "do" + // + else if (*n->identifier == "do") + { + if (!handle_compound_statement ()) { return {}; } + if (!handle_optional_next_clause()) { return {}; } + if (curr() != "while") { + error("do loop body must be followed by 'while'"); + return {}; + } + next(); + if (!handle_logical_expression ()) { return {}; } + if (curr().type() != lexeme::Semicolon) { + error("missing ; after do..while loop condition"); + next(); + return {}; + } + next(); + return n; + } + + // Handle "for" + // + else if (*n->identifier == "for") + { + n->range = expression(); + if (!n->range) { + error("expected valid range expression after 'for'", true, {}, true); + return {}; + } + + if (!handle_optional_next_clause()) { return {}; } + + if ( + curr() != "do" + || !peek(1) + || peek(1)->type() != lexeme::LeftParen + ) + { + next(); + if (curr().type() == lexeme::Colon) { + error("alpha design change note: 'for range' syntax has changed - please remove ':' and '=', for example: for args do (arg) std::cout << arg;"); + } + else { + error("'for range' must be followed by 'do ( parameter )'"); + } + return {}; + } + next(2); // eat 'do' and '(' + + n->parameter = parameter_declaration(false, false, false); + if (!n->parameter) { + error("'for range do (' must be followed by a parameter declaration", false, source_position{}, true); + return {}; + } + + if (curr().type() != lexeme::RightParen) { + error("expected ')' after 'for' parameter"); + return {}; + } + next(); // eat ')' + + n->body = statement(); + if (!n->body) { + error("invalid for..do loop body", false, source_position{}, true); + return {}; + } + // else + if (n->parameter->pass == passing_style::in) { + n->for_with_in = true; + } + + if (!done() && curr().type() == lexeme::Semicolon) { + error("a loop body may not be followed by a semicolon (empty statements are not allowed)"); + return {}; + } + + return n; + } + + assert(!"compiler bug: unexpected case"); + return {}; + } + + + //G alternative: + //G alt-name? is-type-constraint '=' statement + //G alt-name? is-value-constraint '=' statement + //G alt-name? as-type-cast '=' statement + //G + //GTODO alt-name: + //G unqualified-id ':' + //G + auto alternative() + -> std::unique_ptr<alternative_node> + { + auto n = std::make_unique<alternative_node>(); + + if ( + curr().type() == lexeme::Identifier + && peek(1) + && peek(1)->type() == lexeme::Colon + ) + { + error("(temporary alpha limitation) declaring an identifier is not supported yet"); + return {}; + } + + // Now we should be as "is" or "as" + // (initial partial implementation, just "is/as id-expression") + if ( + curr() != "is" + && curr() != "as" + ) + { + return {}; + } + + n->is_as_keyword = &curr(); + next(); + + if (auto id = type_id()) { + n->type_id = std::move(id); + } + else if (auto e = postfix_expression()) { + n->value = std::move(e); + } + else { + error("expected type-id or value after 'is' in inspect alternative", true, {}, true); + return {}; + } + + if (curr().type() != lexeme::Assignment) { + error("expected = at start of inspect alternative body", true, {}, true); + return {}; + } + n->equal_sign = curr().position(); + next(); + + if (auto s = statement(true, n->equal_sign)) { + n->statement = std::move(s); + } + else { + error("expected statement after = in inspect alternative", true, {}, true); + return {}; + } + + return n; + } + + + //G inspect-expression: + //G 'inspect' 'constexpr'? expression '{' alternative-seq? '}' + //G 'inspect' 'constexpr'? expression '->' type-id '{' alternative-seq? '}' + //G + //G alternative-seq: + //G alternative + //G alternative-seq alternative + //G + auto inspect_expression(bool is_expression) + -> std::unique_ptr<inspect_expression_node> + { + if (curr() != "inspect") { + return {}; + } + + if (!is_expression) { + errors.emplace_back( + curr().position(), + "(temporary alpha limitation) cppfront is still learning 'inspect' - only inspect expressions are currently supported" + ); + return {}; + } + + auto n = std::make_unique<inspect_expression_node>(); + n->identifier = &curr(); + next(); + + if (curr() == "constexpr") { + n->is_constexpr = true; + next(); + } + + if (auto e = expression(true, false)) { + n->expression = std::move(e); + } + else { + error("invalid inspect expression", true, {}, true); + return {}; + } + + // Handle the optional explicit return type + if (curr().type() == lexeme::Arrow) + { + if (!is_expression) { + error("an inspect statement cannot have an explicit return type (whereas an inspect expression must have one)"); + return {}; + } + next(); + if (curr().type() == lexeme::LeftParen) { + error("multiple/named returns are not currently allowed for inspect"); + return {}; + } + + auto type = type_id(); + if (!type) { + error("expected a valid inspect return type after ->"); + return {}; + } + n->result_type = std::move(type); + } + else if (is_expression) { + error("an inspect expression must have an explicit '-> result_type'"); + return {}; + } + + // Now do the inspect body + if (curr().type() != lexeme::LeftBrace) { + error("expected { at start of inspect body"); + return {}; + } + n->open_brace = curr().position(); + next(); + + while (curr().type() != lexeme::RightBrace) + { + auto a = alternative(); + if (!a) { + error("invalid alternative in inspect", true, {}, true); + return {}; + } + if ( + is_expression + && !a->statement->is_expression() + ) + { + error("an inspect expression alternative must be just an expression " + "(not a braced block) that will be used as the value of the inspect expression"); + return {}; + } + n->alternatives.push_back( std::move(a) ); + } + + n->close_brace = curr().position(); + next(); + + if (n->alternatives.empty()) { + error("inspect body cannot be empty - add at least one alternative"); + return {}; + } + + return n; + } + + + //G jump-statement: + //G 'break' identifier? ';' + //G 'continue' identifier? ';' + //G + auto jump_statement() + -> std::unique_ptr<jump_statement_node> + { + auto n = std::make_unique<jump_statement_node>(); + + if ( + curr() != "break" + && curr() != "continue" + ) + { + return {}; + } + + n->keyword = &curr(); + next(); + + if (curr().type() == lexeme::Identifier) { + n->label = &curr(); + next(); + } + + if (curr().type() != lexeme::Semicolon) { + error("expected ; at end of jump-statement"); + return {}; + } + next(); + + return n; + } + + + //G using-statement: + //G 'using' id-expression ';' + //G 'using' 'namespace' id-expression ';' + //G + auto using_statement() + -> std::unique_ptr<using_statement_node> + { + auto n = std::make_unique<using_statement_node>(); + + if (curr() != "using") { + return {}; + } + n->keyword = &curr(); + next(); + + if (curr() == "namespace") { + n->for_namespace = true; + next(); + } + + auto id = id_expression(); + if (!id) { + error(std::string{"expected valid id-expression after 'using"} + (n->for_namespace ? " namespace" : "") + "'"); + return {}; + } + + n->id = std::move(id); + + if (curr().type() != lexeme::Semicolon) { + error("expected ; at end of using-statement"); + return {}; + } + next(); + + return n; + } + + + //G statement: + //G selection-statement + //G using-statement + //G inspect-expression + //G return-statement + //G jump-statement + //G iteration-statement + //G compound-statement + //G contract-statement + //G declaration + //G expression-statement + //G + //G contract-statement + //G contract ';' + // + //GTODO try-block + //G + auto statement( + bool semicolon_required = true, + source_position equal_sign = source_position{}, + bool parameters_allowed = false, + compound_statement_node* compound_parent = nullptr + ) + -> std::unique_ptr<statement_node> + { + if (!done() && curr().type() == lexeme::Semicolon) { + error("empty statement is not allowed - remove extra semicolon"); + return {}; + } + + auto n = std::make_unique<statement_node>(compound_parent); + + // If a parameter list is allowed here, try to parse one + if (parameters_allowed) { + n->parameters = parameter_declaration_list(false, true, false, true); + if (n->parameters) { + for (auto& param : n->parameters->parameters) { + if ( + param->direction() != passing_style::in + && param->direction() != passing_style::inout + && param->direction() != passing_style::copy + ) + { + error("(temporary alpha limitation) parameters scoped to a block/statement must be 'in' (the default), 'copy', or 'inout'", false); + return {}; + } + } + } + } + + // Now handle the rest of the statement + + if (auto s = selection_statement()) { + n->statement = std::move(s); + assert (n->is_selection()); + return n; + } + + else if (auto s = using_statement()) { + n->statement = std::move(s); + assert (n->is_using()); + return n; + } + + else if (auto i = inspect_expression(false)) { + n->statement = std::move(i); + assert (n->is_inspect()); + return n; + } + + else if (auto s = return_statement()) { + n->statement = std::move(s); + assert (n->is_return()); + return n; + } + + else if (auto s = jump_statement()) { + n->statement = std::move(s); + assert (n->is_jump()); + return n; + } + + else if (auto s = iteration_statement()) { + n->statement = std::move(s); + assert (n->is_iteration()); + return n; + } + + else if (auto s = compound_statement(equal_sign)) { + n->statement = std::move(s); + assert (n->is_compound()); + return n; + } + + else if (auto s = contract()) { + if (*s->kind != "assert") { + error("only 'assert' contracts are allowed at statement scope"); + return {}; + } + if (curr().type() != lexeme::Semicolon) { + error("missing ';' after contract-statement"); + return {}; + } + next(); + n->statement = std::move(s); + assert (n->is_contract()); + return n; + } + + else if (auto s = declaration(true, false, false, n.get())) { + n->statement = std::move(s); + assert (n->is_declaration()); + return n; + } + + else if (auto s = expression_statement(semicolon_required)) { + n->statement = std::move(s); + assert (n->is_expression()); + return n; + } + + else { + return {}; + } + } + + + //G compound-statement: + //G '{' statement-seq? '}' + //G + //G statement-seq: + //G statement + //G statement-seq statement + //G + auto compound_statement( + source_position equal_sign = source_position{}, + bool allow_single_unbraced_statement = false + ) + -> std::unique_ptr<compound_statement_node> + { + bool is_braced = curr().type() == lexeme::LeftBrace; + if ( + !is_braced + && !allow_single_unbraced_statement + ) + { + return {}; + } + + auto n = std::make_unique<compound_statement_node>(); + if (!is_braced) { + n->body_indent = curr().position().colno-1; + } + else if (peek(1)) { + n->body_indent = peek(1)->position().colno-1; + } + + // Remember current position, in case this isn't a valid statement + auto start_pos = pos; + + // In the case where this is a declaration initializer with + // = { + // on the same line, we want to remember our start position + // as where the = was, not where the { was + if (equal_sign.lineno == curr().position().lineno) { + n->open_brace = equal_sign; + } + else { + n->open_brace = curr().position(); + } + + if (is_braced) { + next(); + } + + while ( + curr().type() != lexeme::RightBrace + && ( + is_braced + || std::ssize(n->statements) < 1 + ) + ) + { + // Only inside a compound-statement, a + // contained statement() may have parameters + auto s = statement(true, source_position{}, true, n.get()); + if (!s) { + + // Only add error when no specific one already exist + if(!has_error()) { + error("invalid statement encountered inside a compound-statement", true); + } + pos = start_pos; // backtrack + return {}; + } + n->statements.push_back( std::move(s) ); + } + + if (is_braced) { + assert(curr().type() == lexeme::RightBrace); + n->close_brace = curr().position(); + next(); + } + return n; + } + + + //G parameter-declaration: + //G this-specifier? parameter-direction? declaration + //G + //G parameter-direction: one of + //G 'in' 'copy' 'inout' 'out' 'move' 'forward' + //G + //G this-specifier: + //G 'implicit' + //G 'virtual' + //G 'override' + //G 'final' + //G + auto parameter_declaration( + bool is_returns = false, + bool is_named = true, + bool is_template = true, + bool is_statement = false + ) + -> std::unique_ptr<parameter_declaration_node> + { + // Remember current position, because we may need to backtrack if this is just + // a parenthesized expression statement, not a statement parameter list + auto start_pos = pos; + + auto n = std::make_unique<parameter_declaration_node>(); + n->pass = + is_returns ? passing_style::out : + passing_style::in; + n->pos = curr().position(); + + // Handle optional this-specifier + // + if (curr() == "implicit") { + n->mod = parameter_declaration_node::modifier::implicit; + next(); + } + else if (curr() == "virtual") { + n->mod = parameter_declaration_node::modifier::virtual_; + next(); + } + else if (curr() == "override") { + n->mod = parameter_declaration_node::modifier::override_; + next(); + } + else if (curr() == "final") { + n->mod = parameter_declaration_node::modifier::final_; + next(); + } + + // Handle optional parameter-direction + // + if (auto dir = to_passing_style(curr()); + dir != passing_style::invalid + ) + { + if (is_template) { + error("a template parameter cannot have a passing style (it is always implicitly 'in')"); + return {}; + } + + if (is_returns) + { + if (dir == passing_style::in) { + error("a return value cannot be 'in'"); + return {}; + } + if (dir == passing_style::copy) { + error("a return value cannot be 'copy'"); + return {}; + } + if (dir == passing_style::inout) { + error("a return value cannot be 'inout'"); + return {}; + } + if (dir == passing_style::move) { + error("a return value cannot be 'move' (it is implicitly 'move'-out)"); + return {}; + } + } + if ( + !is_named + && dir == passing_style::out + ) + { + error("(temporary alpha limitation) an unnamed function cannot have an 'out' parameter"); + return {}; + } + n->pass = dir; + next(); + } + + // Now the main declaration + // + if (!(n->declaration = declaration(false, true, is_template))) { + pos = start_pos; // backtrack + return {}; + } + + // And some error checks + // + if ( + n->mod != parameter_declaration_node::modifier::none + && !n->declaration->has_name("this") + ) + { + error( "only a 'this' parameter may be declared implicit, virtual, override, or final", false ); + } + + if ( + n->declaration->has_name("this") + && n->pass != passing_style::in + && n->pass != passing_style::inout + && n->pass != passing_style::out + && n->pass != passing_style::move + ) + { + error( "a 'this' parameter must be in, inout, out, or move", false ); + } + + if ( + n->declaration->has_name("that") + && n->pass != passing_style::in + && n->pass != passing_style::move + ) + { + error( "a 'that' parameter must be in or move", false ); + } + + // The only parameter type that could be const-qualified is a 'copy' parameter, because + // only it is always truly its own variable, so it makes sense to let the user qualify it; + // all the other parameter types are conceptually (usually actually) bound to their args + if ( + !is_returns + && n->declaration->is_const() + && n->pass != passing_style::copy + && n->pass != passing_style::inout + ) + { + switch (n->pass) { + break;case passing_style::in: + error( "an 'in' parameter is always const, 'const' isn't needed and isn't allowed", false ); + break;case passing_style::inout: + // error( "an 'inout' parameter can't be const, if you do want it to be const then use 'in' instead", false ); + break;case passing_style::out: + error( "an 'out' parameter can't be const, otherwise it can't be initialized in the function body", false ); + break;case passing_style::move: + error( "a 'move' parameter can't be const, otherwise it can't be moved from in the function body", false ); + break;case passing_style::forward: + error( "a 'forward' parameter shouldn't be const, because it passes along the argument's actual const-ness (and actual value category)", false ); + break;default: + assert (!"ICE: missing case"); + } + return {}; + } + + if ( + !is_returns + && !is_statement + && n->declaration->initializer + ) + { + error("Cpp2 is currently exploring the path of not allowing default arguments - use overloading instead", false); + return {}; + } + if (is_named && is_returns) { + auto tok = n->name(); + assert(tok); + if (tok->type() != lexeme::Identifier) { + error("expected identifier, not '" + tok->to_string() + "'", + false, tok->position()); + } + else if (n->declaration->has_wildcard_type()) { + error("return parameter '" + tok->to_string() + "' must have a type", + false, tok->position()); + } + } + return n; + } + + + //G parameter-declaration-list + //G '(' parameter-declaration-seq? ')' + //G + //G parameter-declaration-seq: + //G parameter-declaration + //G parameter-declaration-seq ',' parameter-declaration + //G + auto parameter_declaration_list( + bool is_returns = false, + bool is_named = true, + bool is_template = false, + bool is_statement = false + ) + -> std::unique_ptr<parameter_declaration_list_node> + { + // Remember current position, because we need to look ahead in + // the case of seeing whether a local statement starts with a + // parameter list, since finding that it doesn't (it's some other + // parenthesized expression) is not an error, just backtrack + auto start_pos = pos; + + auto opener = lexeme::LeftParen; + auto closer = lexeme::RightParen; + if (is_template) { + opener = lexeme::Less; + closer = lexeme::Greater; + } + + if (curr().type() != opener) { + return {}; + } + + auto n = std::make_unique<parameter_declaration_list_node>(); + n->open_paren = &curr(); + next(); + + auto param = std::make_unique<parameter_declaration_node>(); + + auto count = 1; + auto expect_another_param_decl = false; + + while ((param = parameter_declaration(is_returns, is_named, is_template, is_statement)) != nullptr) + { + expect_another_param_decl = false; + param->ordinal = count; + ++count; + + if ( + std::ssize(n->parameters) > 1 + && n->parameters.back()->has_name("that") + ) + { + error("'that' may not be followed by any additional parameters", false); + } + + n->parameters.push_back( std::move(param) ); + + if (curr().type() == closer) { + break; + } + else if (curr().type() != lexeme::Comma) { + if (is_statement) { + pos = start_pos; // backtrack + } + else { + error("expected ',' in parameter list", true, {}, true); + } + return {}; + } + + expect_another_param_decl = true; + next(); + } + + if (expect_another_param_decl) { + error("invalid parameter list: a comma must be followed by another parameter", true, {}, true); + } + + if (curr().type() != closer) { + if (is_statement) { + pos = start_pos; // backtrack + } + else { + error("invalid parameter list", true, {}, true); + } + return {}; + } + + n->close_paren = &curr(); + next(); + return n; + } + + + //G contract: + //G contract-kind contract-group? ':' '(' logical-or-expression ')' + //G contract-kind contract-group? ':' '(' logical-or-expression ',' expression ')' + //G + //G contract-group: + //G '<' id-expression contract-flags?'>' + //G + //G contract-flags: + //G ',' id-expression contract-flags? + //G + //G contract-kind: one of + //G 'pre' 'post' 'assert' + //G + auto contract() + -> std::unique_ptr<contract_node> + { + auto n = std::make_unique<contract_node>(curr().position()); + auto guard = capture_groups_stack_guard(this, &n->captures); + + if ( + curr() != "pre" + && curr() != "post" + && curr() != "assert" + ) + { + return {}; + } + n->kind = &curr(); + next(); + + // Check if there's a <group,flags> + if (curr().type() == lexeme::Less) { + next(); + if (auto id = id_expression()) { + n->group = std::move(id); + } + else { + error("invalid contract group after '<'"); + return {}; + } + + // Now check if there's a list of flags + while (curr().type() == lexeme::Comma) { + next(); + if (auto id = id_expression()) { + n->flags.push_back( std::move(id) ); + } + else { + error("invalid contract tag in list"); + return {}; + } + } + + if (curr().type() != lexeme::Greater) { + error("expected '>' after contract group"); + return {}; + } + next(); + } + + if (curr().type() != lexeme::LeftParen) { + error("expected '(' before the contract condition"); + return {}; + } + next(); + + auto condition = logical_or_expression(); + if (!condition) { + error("invalid contract condition", true, {}, true); + return {}; + } + n->condition = std::move(condition); + + // Now check for the optional string message + if (curr().type() == lexeme::Comma) { + next(); + n->message = expression(); + if (!n->message) { + error("a contract violation message must be a valid string expression", true, {}, true); + return {}; + } + } + + if (curr().type() != lexeme::RightParen) { + error("expected ')' at the end of the contract"); + return {}; + } + next(); + + return n; + } + + + //G function-type: + //G parameter-declaration-list throws-specifier? return-list? contract-seq? + //G + //G throws-specifier: + //G 'throws' + //G + //G return-list: + //G expression-statement + //G '->' parameter-direction? type-id + //G '->' parameter-declaration-list + //G + //G contract-seq: + //G contract + //G contract-seq contract + //G + auto function_type( + declaration_node* my_decl, + bool is_named = true + ) + -> std::unique_ptr<function_type_node> + { + auto n = std::make_unique<function_type_node>( my_decl ); + + // Parameters + auto parameters = parameter_declaration_list(false, is_named, false); + if (!parameters) { + return {}; + } + n->parameters = std::move(parameters); + + // Optional "throws" + if ( + curr().type() == lexeme::Keyword + && curr() == "throws" + ) + { + if ( + n->is_move() + || n->is_swap() + || n->is_destructor() + ) + { + error( "(experimental restriction) Cpp2 currently does not allow a move, swap, or destructor function to be designated 'throws'" ); + return {}; + } + + n->throws = true; + next(); + } + + + // If we're not at a '->' or 'requires' or contract and what follows is + // an expression, this is a ":(params) expr" shorthand function syntax + if ( + curr().type() != lexeme::Arrow + && curr() != "requires" + && (curr() != "pre" && curr() != "post") + ) + { + auto start_pos = pos; + auto at_an_expression = expression() != nullptr; + pos = start_pos; // backtrack no matter what, we're just peeking here + + if (at_an_expression) { + n->returns = function_type_node::single_type_id{ std::make_unique<type_id_node>() }; + assert(n->returns.index() == function_type_node::id); + n->my_decl->terse_no_equals = true; + return n; + } + } + + + // Optional returns + if (curr().type() == lexeme::Arrow) + { + next(); + + if (auto pass = to_passing_style(curr()); + pass != passing_style::invalid + ) + { + if ( + pass != passing_style::forward + && pass != passing_style::move + ) + { + error("only 'forward' and 'move' return passing style are allowed from functions"); + } + next(); + if (auto t = type_id()) { + n->returns = function_type_node::single_type_id{ std::move(t), pass }; + assert(n->returns.index() == function_type_node::id); + } + else { + auto msg = std::string("'"); + msg += to_string_view(pass); + error(msg + "' must be followed by a type-id"); + } + } + + else if (auto t = type_id()) + { + if ( + t->get_token() + && t->get_token()->to_string() == "auto" + ) + { + auto name = std::string{"v"}; + if (my_decl && my_decl->name()) { + name = my_decl->name()->to_string(); + } + errors.emplace_back( + curr().position(), + "to define a function " + name + " with deduced return type, write '" + name + ": ( /* arguments */ ) -> _ = { /* function body */ }'" + ); + return {}; + } + n->returns = function_type_node::single_type_id{ std::move(t), passing_style::move }; + assert(n->returns.index() == function_type_node::id); + } + + else if (auto returns_list = parameter_declaration_list(true, is_named)) + { + if (std::ssize(returns_list->parameters) < 1) { + error("an explicit return value list cannot be empty"); + return {}; + } + n->returns = std::move(returns_list); + assert(n->returns.index() == function_type_node::list); + } + + else + { + error("missing function return after ->"); + return {}; + } + } + + // Pre/post conditions + while (auto c = contract()) + { + if ( + *c->kind != "pre" + && *c->kind != "post" + ) + { + error("only 'pre' and 'post' contracts are allowed on functions"); + return {}; + } + n->contracts.push_back( std::move(c) ); + } + + return n; + } + + + auto apply_type_metafunctions( declaration_node& decl ) + -> bool; + + + //G unnamed-declaration: + //G ':' meta-functions-list? template-parameter-declaration-list? function-type requires-clause? '=' statement + //G ':' meta-functions-list? template-parameter-declaration-list? function-type statement + //G ':' meta-functions-list? template-parameter-declaration-list? type-id? requires-clause? '=' statement + //G ':' meta-functions-list? template-parameter-declaration-list? type-id + //G ':' meta-functions-list? template-parameter-declaration-list? 'final'? 'type' requires-clause? '=' statement + //G ':' 'namespace' '=' statement + //G + //G meta-functions-list: + //G '@' id-expression + //G meta-functions-list '@' id-expression + //G + //G requires-clause: + //G # note: for aliases, == is not allowed in expressions until new ( is opened + //G 'requires' logical-or-expression + //G + //G template-parameter-declaration-list + //G '<' parameter-declaration-seq '>' + //G + auto unnamed_declaration( + source_position start, + bool semicolon_required = true, + bool captures_allowed = false, + bool named = false, + bool is_parameter = false, + bool is_template_parameter = false, + std::unique_ptr<unqualified_id_node> id = {}, + accessibility access = {}, + bool is_variadic = false, + statement_node* my_stmt = {} + ) + -> std::unique_ptr<declaration_node> + { + auto n = std::make_unique<declaration_node>( current_declarations.back() ); + n->pos = start; + + n->identifier = std::move(id); + n->access = access; + n->is_variadic = is_variadic; + n->my_statement = my_stmt; + + // If we're in a type scope and the next token is ';', treat this as if + // ': _;' without an initializer. + // This is for type metafunctions that want to use the incomplete name-only + // declaration, and transform it to something else. If unchanged the + // incomplete declaration will be rejected later by sema.check rule. + if ( + n->parent_is_type() + && curr().type() == lexeme::Semicolon + ) + { + n->type = std::make_unique<type_id_node>(); + assert (n->is_object()); + next(); + return n; + } + + // For a template parameter, ':' is not required and + // we default to ': type' + if ( + is_template_parameter + && curr().type() != lexeme::Colon + ) + { + // So invent the "type" token + generated_text.push_back("type"); + generated_tokens->push_back({ + generated_text.back().c_str(), + std::ssize(generated_text.back()), + start, + lexeme::Identifier + }); + + // So we can create the type_node + + auto t = std::make_unique<type_node>( &generated_tokens->back() ); + + n->type = std::move(t); + assert (n->is_type()); + + // That's it, we're done here + return n; + } + + // For 'this' and 'that' parameters ':' is not allowed and we'll use the default ': _' + if ( + n->identifier + && is_parameter + && ( + *n->identifier->identifier == "this" + || *n->identifier->identifier == "that" + ) + && curr().type() == lexeme::Colon + ) + { + error("a 'this' or 'that' parameter knows its type, no ':' is allowed here", false); + return {}; + } + + // For an ordinary parameter, ':' is not required and + // we default to ': _' - i.e., deduced with no initializer + if ( + is_parameter + && curr().type() != lexeme::Colon + ) + { + // So invent the "_" token + generated_text.push_back("_"); + generated_tokens->push_back({ + generated_text.back().c_str(), + std::ssize(generated_text.back()), + start, + lexeme::Identifier + }); + + // So we can create the typeid_id_node and its unqualified_id_node + + auto gen_id = std::make_unique<unqualified_id_node>(); + gen_id->identifier = &generated_tokens->back(); + + auto type = std::make_unique<type_id_node>(); + type->pos = start; + type->id = std::move(gen_id); + + n->type = std::move(type); + assert (n->is_object()); + + // That's it, we're done here + return n; + } + + // Otherwise, the next token must be ':' + if (curr().type() != lexeme::Colon) { + return {}; + } + next(); + + if (curr() == "union") { + error("unsafe 'union' is not supported in Cpp2 - write '@union' to apply Cpp2's safe 'union' type metafunction instead, or use std::variant"); + } + + // Next is an optional metafunctions clause + while (curr() == "@") { + next(); + auto idx = id_expression(); + if (!idx) { + error("'@' must be followed by a metafunction name", false); + return {}; + } + n->metafunctions.push_back( std::move(idx) ); + } + + // Next is an optional template parameter list + if (curr().type() == lexeme::Less) { + auto template_parameters = parameter_declaration_list(false, false, true); + if (!template_parameters) { + error("invalid template parameter list"); + return {}; + } + n->template_parameters = std::move(template_parameters); + } + + auto guard = + captures_allowed + ? std::make_unique<capture_groups_stack_guard>(this, &n->captures) + : std::unique_ptr<capture_groups_stack_guard>() + ; + + auto guard2 = current_declarations_stack_guard(this, n.get()); + + // Next is an an optional type + + auto deduced_type = false; + + // It could be "type", declaring a user-defined type + if ( + curr() == "type" + || ( + curr() == "final" + && peek(1) && *peek(1) == "type" + ) + ) + { + n->type = std::make_unique<type_node>( &curr(), curr() == "final" ); + + if (curr() == "final") { + next(); + } + next(); + + if ( + is_parameter + && !is_template_parameter + ) + { + error("a normal parameter cannot be a 'type' - did you mean to put this in a < > template parameter list?"); + return {}; + } + assert (n->is_type()); + } + + // Or a function type, declaring a function - and tell the function whether it's in a user-defined type + else if (auto t = function_type(n.get(), named)) + { + n->type = std::move(t); + assert (n->is_function()); + + if (!n->metafunctions.empty()) { + errors.emplace_back( + n->metafunctions.front()->position(), + "(temporary alpha limitation) metafunctions are currently not supported on functions, only on types" + ); + return {}; + } + } + + // Or a namespace + else if (curr() == "namespace") + { + n->type = std::make_unique<namespace_node>( &curr() ); + assert (n->type.index() == declaration_node::a_namespace); + next(); + + if (!n->metafunctions.empty()) { + errors.emplace_back( + n->metafunctions.front()->position(), + "(temporary alpha limitation) metafunctions are currently not supported on namespaces, only on types" + ); + return {}; + } + } + + // Or just a type-id, declaring a non-pointer object + else if (auto t = type_id()) + { + if ( + t->get_token() + && t->get_token()->to_string() == "auto" + ) + { + auto name = std::string{"v"}; + if (n->name()) { + name = n->name()->to_string(); + } + errors.emplace_back( + curr().position(), + "to define a variable " + name + " with deduced type, write '" + name + " := /* initializer */;'" + ); + return {}; + } + + n->type = std::move(t); + assert (n->is_object()); + + if (!n->metafunctions.empty()) { + errors.emplace_back( + n->metafunctions.front()->position(), + "(temporary alpha limitation) metafunctions are currently not supported on objects, only on types" + ); + return {}; + } + + if (curr().type() == lexeme::LeftBracket) { + error("C-style array types are not allowed, use std::array instead"); + return {}; + } + } + + // Or nothing, declaring an object of deduced type, + // which we'll represent using an empty type-id + else { + n->type = std::make_unique<type_id_node>(); + assert (n->is_object()); + deduced_type = true; + } + + // If we've already validated that this is a function where the parameter + // list is followed by a valid expression-statement, parse that again + // (requiring a semicolon as we validated when determining terse_no_equals) + if (n->terse_no_equals) + { + n->equal_sign = curr().position(); + n->initializer = statement(/*ignore semicolon_required*/ false, n->equal_sign); + assert( n->initializer && "ICE: should have already validated that there's a valid expression-statement here" ); + } + + else + { + // Next is optionally a requires clause (if not using the "-> expr;" syntax) + if (curr() == "requires") + { + if ( + n->is_type() + && !n->template_parameters + ) + { + error("'requires' is not allowed on a type that does not have a template parameter list"); + return {}; + } + + if (n->is_namespace()) + { + error("'requires' is not allowed on a namespace"); + return {}; + } + + n->requires_pos = curr().position(); + next(); + auto e = logical_or_expression(); + if (!e) { + error("'requires' must be followed by an expression"); + return {}; + } + n->requires_clause_expression = std::move(e); + } + + // Next is optionally = or == followed by an initializer + + // If there is no = or == + if ( + !done() + && curr().type() != lexeme::Assignment + && curr().type() != lexeme::EqualComparison + ) + { + if ( + n->is_type() + && !is_template_parameter + ) + { + error("a user-defined type must have an = initializer"); + return {}; + } + + // Then there may be a semicolon + // If there is a semicolon, eat it + if (!done() && curr().type() == lexeme::Semicolon) { + next(); + } + // But if there isn't one and it was required, diagnose an error + else if (semicolon_required) { + if (curr().type() == lexeme::LeftBrace) { + error("expected '=' before '{' - did you mean '= {' ?", true, {}, true); + } + else { + error("missing ';' at end of declaration or '=' at start of initializer", true, {}, true); + } + return {}; + } + } + + // There was an = or ==, so eat it and continue + else + { + n->equal_sign = curr().position(); + + if (curr().type() == lexeme::EqualComparison) { + if (!n->is_function()) { + error("syntax error at '==' - did you mean '='?"); + } + n->is_constexpr = true; + } + + next(); + + if (auto t = std::get_if<declaration_node::an_object>(&n->type); + t + && (*t)->is_pointer_qualified() + ) + { + if ( + curr() == "nullptr" + || isdigit(std::string_view(curr())[0]) + || ( + curr() == "(" + && peek(1) + && *peek(1) == ")" + ) + ) + { + error("pointer cannot be initialized to null or int - leave it uninitialized and then set it to a non-null value when you have one"); + violates_lifetime_safety = true; + throw std::runtime_error("null initialization detected"); + } + } + + // deduced_type == true means that the type will be deduced, + // represented using an empty type-id + if ( + deduced_type + && peek(1) + ) + { + auto& type = std::get<declaration_node::an_object>(n->type); + // object initialized by the address of the curr() object + if (peek(1)->type() == lexeme::Ampersand) { + type->address_of = &curr(); + } + // object initialized by (potentially multiple) dereference of the curr() object + else if (peek(1)->type() == lexeme::Multiply) { + type->dereference_of = &curr(); + for (int i = 1; peek(i)->type() == lexeme::Multiply; ++i) + type->dereference_cnt += 1; + } + else if ( + // object initialized by the result of the function call (and it is not unnamed function) + (peek(1)->type() == lexeme::LeftParen && curr().type() != lexeme::Colon) + || curr().type() == lexeme::Identifier // or by the object (variable that the type need to be checked) + ) { + type->suspicious_initialization = &curr(); + } + } + + if (!(n->initializer = statement(semicolon_required, n->equal_sign))) { + error( + "ill-formed initializer", + true, {}, true + ); + next(); + return {}; + } + } + + } + + // A type initializer must be a compound expression + if ( + n->is_type() + && !is_parameter + && ( + !n->initializer + || !n->initializer->is_compound() + ) + ) + { + errors.emplace_back( + n->position(), + "a user-defined type initializer must be a compound-expression consisting of declarations" + ); + return {}; + } + + // If this is a type with metafunctions, apply those + if (n->is_type()) { + if (!apply_type_metafunctions(*n)) { + error( + "error encountered while applying type metafunctions", + false, {}, true + ); + return {}; + } + } + + if ( + n->is_function() + && n->initializer + && !done() && curr().type() == lexeme::Semicolon + ) + { + if (n->initializer->is_compound() && n->has_name()) { + error("a braced function body may not be followed by a semicolon (empty statements are not allowed)"); + return {}; + } else if (n->initializer->is_expression()) { + error("a single-expression function should end with a single semicolon"); + return {}; + } + } + + // If this is a function with a list of multiple/named return values, + // and the function body's end doesn't already have "return" as the + // last statement, then generate "return;" as the last statement + if (auto func = std::get_if<declaration_node::a_function>(&n->type); + func + && n->initializer + && (*func)->returns.index() == function_type_node::list + ) + { + if (!n->initializer->is_compound()) { + error( + "a function with named return value(s) must have a full { } body", + false, + {}, + true + ); + return {}; + } + + auto& body = std::get<statement_node::compound>(n->initializer->statement); + + if ( + body->statements.empty() + || !body->statements.back()->is_return() + ) + { + auto last_pos = n->position(); + if (!body->statements.empty()) { + last_pos = body->statements.back()->position(); + } + ++last_pos.lineno; + generated_tokens->emplace_back( "return", last_pos, lexeme::Keyword); + + auto ret = std::make_unique<return_statement_node>(); + ret->identifier = &generated_tokens->back(); + + auto stmt = std::make_unique<statement_node>(); + stmt->statement = std::move(ret); + + body->statements.push_back(std::move(stmt)); + } + } + + // If this is a function, record its extents + if (n->is_function()) { + function_body_extents.emplace_back( + n->equal_sign.lineno, + peek(-1)->position().lineno + ); + } + + return n; + } + + + //G alias: + //G ':' template-parameter-declaration-list? 'type' requires-clause? '==' type-id ';' + //G ':' 'namespace' '==' id-expression ';' + //G ':' template-parameter-declaration-list? type-id? requires-clause? '==' expression ';' + //G + //GT ':' function-type '==' expression ';' + //GT # See commit 63efa6ed21c4d4f4f136a7a73e9f6b2c110c81d7 comment + //GT # for why I don't see a need to enable this yet + // + auto alias() + -> std::unique_ptr<declaration_node> + { + // Remember current position, because we need to look ahead + auto start_pos = pos; + + auto n = std::make_unique<declaration_node>( current_declarations.back() ); + + if (curr().type() != lexeme::Colon) { + return {}; + } + next(); + + // Next is an optional template parameter list + if (curr().type() == lexeme::Less) { + auto template_parameters = parameter_declaration_list(false, false, true); + if (!template_parameters) { + pos = start_pos; // backtrack + return {}; + } + n->template_parameters = std::move(template_parameters); + } + + auto a = std::make_unique<alias_node>( &curr() ); + + // Next must be 'type', 'namespace', a type-id, or we're at the 'requires' or '==' + if (curr() == "type") + { + next(); + } + else if (curr() == "namespace") + { + next(); + if (n->template_parameters) { + errors.emplace_back( + curr().position(), + "a namespace or namespace alias cannot have template parameters" + ); + return {}; + } + } + else if (curr().type() != lexeme::EqualComparison && curr() != "requires") + { + a->type_id = type_id(); + if (!a->type_id) { + pos = start_pos; // backtrack + return {}; + } + } + + // Next is optionally a requires clause + if (curr() == "requires") + { + if ( + n->is_type_alias() + && !n->template_parameters + ) + { + error("'requires' is not allowed on a type alias that does not have a template parameter list"); + return {}; + } + + if (n->is_namespace_alias()) + { + error("'requires' is not allowed on a namespace alias"); + return {}; + } + + n->requires_pos = curr().position(); + next(); + auto e = logical_or_expression(true, false); + if (!e) { + error("'requires' must be followed by an expression"); + return {}; + } + n->requires_clause_expression = std::move(e); + } + + // Now we should be at the '==' if this is an alias + + if (curr().type() == lexeme::EqualComparison) { + next(); + } + else { + if (a->type->type() != lexeme::EqualComparison) { + pos = start_pos; // backtrack + return {}; + } + } + assert(peek(-1)->type() == lexeme::EqualComparison); + + if ( + n->parent_is_type() + && *a->type == "namespace" + ) + { + errors.emplace_back( + curr().position(), + "a namespace alias cannot appear in a type scope" + ); + return {}; + } + + // Finally, pick up the initializer + + // Type alias + if (*a->type == "type") + { + auto t = type_id(); + if (!t) { + errors.emplace_back( + curr().position(), + "a 'type ==' alias declaration must be followed by a type name" + ); + return {}; + } + if ( + t->is_wildcard() + || ( t->get_token() && t->get_token()->to_string() == "auto" ) + ) { + errors.emplace_back( + curr().position(), + "a 'type ==' alias declaration must be followed by a type name (not a wildcard _ nor auto)" + ); + return {}; + } + a->initializer = std::move(t); + } + + // Namespace alias + else if (*a->type == "namespace") + { + if (auto qid = id_expression()) { + a->initializer = std::move(qid); + } + else { + errors.emplace_back( + curr().position(), + "a 'namespace ==' alias declaration must be followed by a namespace name (id-expression)" + ); + return {}; + } + } + + // Object alias + else if ( + a->type_id + || a->type->type() == lexeme::EqualComparison + ) + { + auto e = expression(); + if (!e) { + errors.emplace_back( + curr().position(), + "an object '==' alias declaration must be followed by an expression" + ); + return {}; + } + a->initializer = std::move(e); + } + + // Anything else shouldn't be possible + else { + assert(!"ICE: should be unreachable - invalid alias declaration"); + return {}; + } + + // And the final ceremonial semicolon + if (curr() != ";") { + errors.emplace_back( + curr().position(), + "';' expected at end of alias declaration" + ); + return {}; + } + next(); + + n->type = std::move(a); + + return n; + } + + + //G declaration: + //G access-specifier? identifier '...'? unnamed-declaration + //G access-specifier? identifier alias + //G + //G access-specifier: + //G public + //G protected + //G private + //G + auto declaration( + bool semicolon_required = true, + bool is_parameter = false, + bool is_template_parameter = false, + statement_node* my_stmt = {} + ) + -> std::unique_ptr<declaration_node> + { + if (done()) { return {}; } + + // Remember current position, because we need to look ahead + auto start_pos = pos; + + auto n = std::unique_ptr<declaration_node>{}; + + // This scope is to ensure that once we've moved 'id' into the + // declaration_node, we don't access the moved-from local name + // (and similar hygiene for 'access' though that one doesn't matter as much) + // The reason to move 'id' into unnamed_declaration() is so that + // it can conveniently perform some checks that refer to the name + { + auto access = accessibility::default_; + if (curr() == "public") { + access = accessibility::public_; + next(); + } + else if (curr() == "protected") { + access = accessibility::protected_; + next(); + } + else if (curr() == "private") { + access = accessibility::private_; + next(); + } + + // If they wrote an access-specifier, see if they put a ':' + // after it out of Cpp1 habit (there's no colon in Cpp2) + if ( + access != accessibility::default_ + && curr().type() == lexeme::Colon + ) + { + errors.emplace_back( + curr().position(), + "':' is not allowed after an access-specifier" + ); + return {}; + } + + auto id = unqualified_id(); + if (!id) { + return {}; + } + + if (id->to_string() == "...") { + errors.emplace_back( + curr().position(), + "a variadic declaration must have a name - did you forget to write a name before '...'?" + ); + pos = start_pos; // backtrack + } + + auto is_variadic = false; + if (curr().type() == lexeme::Ellipsis) { + is_variadic = true; + next(); + } + + // Provide some useful Cpp1->Cpp2 migration diagnostics for common mistakes + // + if ( + id->get_token() + && *id->get_token() == "auto" + && curr().type() != lexeme::Colon + ) + { + auto name = std::string{"v"}; + if (peek(0) && peek(0)->type() == lexeme::Identifier) { + name = peek(0)->to_string(); + } + errors.emplace_back( + curr().position(), + "to define a variable " + name + " of type T, write '" + name + ": T = /* initializer */'" + ); + return {}; + } + if ( + id->get_token() + && *id->get_token() == "namespace" + && curr().type() != lexeme::Colon + ) + { + auto name = std::string{"N"}; + if (peek(0)) { + name = peek(0)->to_string(); + } + errors.emplace_back( + curr().position(), + "to define a namespace " + name + ", write '" + name + " : namespace = { /*contents*/ }'" + ); + return {}; + } + if ( + id->get_token() + && ( + *id->get_token() == "class" + || *id->get_token() == "struct" + ) + && curr().type() != lexeme::Colon + ) + { + auto name = std::string{"C"}; + if (peek(0)) { + name = peek(0)->to_string(); + } + errors.emplace_back( + curr().position(), + "to define a type " + name + ", write '" + name + " : type = { /*body*/ }'" + ); + return {}; + } + + // Now proceed... + // + + // First see if it's an alias declaration + n = alias(); + if (n) { + if (is_parameter) { + errors.emplace_back( + curr().position(), + "a parameter declaration may not be an alias declaration" + ); + return {}; + } + + if (is_variadic) { + errors.emplace_back( + curr().position(), + "an alias declaration may not be variadic" + ); + return {}; + } + + n->pos = start_pos; + n->identifier = std::move(id); + n->access = access; + return n; + } + + // Otherwise, this is a normal declaration + n = unnamed_declaration( + start_pos, + semicolon_required, + false, + true, + is_parameter, + is_template_parameter, + std::move(id), + access, + is_variadic, + my_stmt + ); + if (!n) { + pos = start_pos; // backtrack + return {}; + } + } + + // Note: Do this after trying to parse this as a declaration, for parse backtracking + + if ( + *n->identifier->identifier == "that" + && ( + !is_parameter + || is_template_parameter + ) + ) + { + errors.emplace_back( + n->identifier->position(), + "'that' may only be declared as an ordinary function parameter" + ); + return {}; + } + + // Cache some context + n->is_template_parameter = is_template_parameter; + n->is_parameter = is_parameter; + + return n; + } + + + //G declaration-seq: + //G declaration + //G declaration-seq declaration + //G + //G translation-unit: + //G declaration-seq? + // + auto translation_unit() + -> std::unique_ptr<translation_unit_node> + { + auto n = std::make_unique<translation_unit_node>(); + for (auto d = declaration(); d; d = declaration()) { + n->declarations.push_back( std::move(d) ); + } + return n; + } + +public: + //----------------------------------------------------------------------- + // debug_print + // + auto debug_print(std::ostream& o) + -> void; +}; + + +//----------------------------------------------------------------------- +// +// Common parts for printing visitors +// +//----------------------------------------------------------------------- +// +struct printing_visitor +{ + //----------------------------------------------------------------------- + // Constructor: remember a stream to write to + // + std::ostream& o; + + printing_visitor(std::ostream& out) : o{out} { indent_spaces = 2; } +}; + + +//----------------------------------------------------------------------- +// +// Visitor for printing a parse tree +// +//----------------------------------------------------------------------- +// +class parse_tree_printer : printing_visitor +{ + using printing_visitor::printing_visitor; + +public: + auto start(token const& n, int indent) -> void + { + o << pre(indent) << _as<std::string>(n.type()) << ": " << n.to_string() << "\n"; + } + + auto start(literal_node const&, int indent) -> void + { + o << pre(indent) << "literal" << "\n"; + } + + auto start(expression_node const& n, int indent) -> void + { + o << pre(indent) << "expression - " + << n.num_subexpressions << " subexpressions, my_statement [" + << static_cast<void const*>(n.my_statement) << "]\n"; + } + + auto start(expression_list_node::term const&n, int indent) -> void + { + o << pre(indent) << "expression-list term\n"; + if (n.pass == passing_style::out) { + o << pre(indent+1) << "out\n"; + } + } + + auto start(expression_list_node const&, int indent) -> void + { + o << pre(indent) << "expression-list\n"; + } + + auto start(primary_expression_node const&, int indent) -> void + { + o << pre(indent) << "primary-expression\n"; + } + + auto start(prefix_expression_node const&, int indent) -> void + { + o << pre(indent) << "prefix-expression\n"; + } + + auto start(is_as_expression_node const&, int indent) -> void + { + o << pre(indent) << "is-as-expression\n"; + } + + template<String Name, typename Term> + auto start(binary_expression_node<Name, Term> const&, int indent) -> void + { + o << pre(indent) << Name.value << "-expression\n"; + } + + auto start(expression_statement_node const& n, int indent) -> void + { + o << pre(indent) << "expression-statement - [" << static_cast<void const*>(&n) << "]\n"; + } + + auto start(postfix_expression_node const&, int indent) -> void + { + o << pre(indent) << "postfix-expression\n"; + } + + auto start(unqualified_id_node const&, int indent) -> void + { + o << pre(indent) << "unqualified-id\n"; + } + + auto start(qualified_id_node const&, int indent) -> void + { + o << pre(indent) << "qualified-id\n"; + } + + auto start(type_id_node const&, int indent) -> void + { + o << pre(indent) << "type-id\n"; + } + + auto start(id_expression_node const&, int indent) -> void + { + o << pre(indent) << "id-expression\n"; + } + + auto start(statement_node const&, int indent) -> void + { + o << pre(indent) << "statement\n"; + } + + auto start(compound_statement_node const&, int indent) -> void + { + o << pre(indent) << "compound-statement\n"; + } + + auto start(selection_statement_node const& n, int indent) -> void + { + o << pre(indent) << "selection-statement\n"; + o << pre(indent+1) << "is_constexpr: " << _as<std::string>(n.is_constexpr) << "\n"; + } + + auto start(alternative_node const&, int indent) -> void + { + o << pre(indent) << "alternative\n"; + } + + auto start(jump_statement_node const&, int indent) -> void + { + o << pre(indent) << "jump\n"; + } + + auto start(using_statement_node const& n, int indent) -> void + { + o << pre(indent) << "using" << (n.for_namespace? " namespace" : "") << "\n"; + } + + auto start(inspect_expression_node const& n, int indent) -> void + { + o << pre(indent) << "inspect-expression\n"; + o << pre(indent+1) << "is_constexpr: " << _as<std::string>(n.is_constexpr) << "\n"; + } + + auto start(return_statement_node const&, int indent) -> void + { + o << pre(indent) << "return-statement\n"; + } + + auto start(iteration_statement_node const& n, int indent) -> void + { + o << pre(indent) << "iteration-statement\n"; + assert(n.identifier); + o << pre(indent+1) << "identifier: " << std::string_view(*n.identifier) << "\n"; + } + + auto start(contract_node const& n, int indent) -> void + { + o << pre(indent) << "contract\n"; + assert(n.kind); + o << pre(indent+1) << "kind: " << std::string_view(*n.kind) << "\n"; + if (!n.captures.members.empty()) { + o << pre(indent+1) << "captures: " << n.captures.members.size() << "\n"; + } + } + + auto start(type_node const&, int indent) -> void + { + o << pre(indent) << "user-defined type\n"; + } + + auto start(namespace_node const&, int indent) -> void + { + o << pre(indent) << "namespace\n"; + } + + auto start(function_type_node const& n, int indent) -> void + { + o << pre(indent) << "function\n"; + o << pre(indent+1) << "throws: " << _as<std::string>(n.throws) << "\n"; + if (n.returns.index() == function_type_node::id) { + auto& r = std::get<function_type_node::id>(n.returns); + if (r.pass != passing_style::invalid) { + o << pre(indent+1) << "returns by: " << to_string_view(r.pass) << "\n"; + } + } + } + + auto start(function_returns_tag const&, int indent) -> void + { + o << pre(indent) << "function returns\n"; + } + + auto start(template_args_tag const&, int indent) -> void + { + o << pre(indent) << "template arguments\n"; + } + + auto start(declaration_identifier_tag const&, int indent) -> void + { + o << pre(indent) << "declaration identifier\n"; + } + + auto start(next_expression_tag const&, int indent) -> void + { + o << pre(indent) << "next expression\n"; + } + + auto start(alias_node const& n, int indent) -> void + { + o << pre(indent) << "alias\n"; + switch (n.initializer.index()) { + break;case alias_node::a_type: + o << pre(indent+1) << "type\n"; + break;case alias_node::a_namespace: + o << pre(indent+1) << "namespace\n"; + break;case alias_node::an_object: + o << pre(indent+1) << "object\n"; + break;default: + o << pre(indent+1) << "ICE - invalid variant state\n"; + } + } + + auto start(declaration_node const& n, int indent) -> void + { + o << pre(indent) << "declaration [" << &n << "]\n"; + o << pre(indent+1) << "parent: [" << n.parent_declaration << "]\n"; + o << pre(indent+1) << "is_variadic: [" << std::boolalpha << n.is_variadic << "]\n"; + o << pre(indent+1) << "is_constexpr: " << _as<std::string>(n.is_constexpr) << "\n"; + switch (n.type.index()) { + break;case declaration_node::a_function: + o << pre(indent+1) << "function\n"; + break;case declaration_node::an_object: + o << pre(indent+1) << "object\n"; + break;case declaration_node::a_type: + o << pre(indent+1) << "type\n"; + break;case declaration_node::a_namespace: + o << pre(indent+1) << "namespace\n"; + break;case declaration_node::an_alias: + o << pre(indent+1) << "alias\n"; + break;default: + o << pre(indent+1) << "ICE - invalid variant state\n"; + } + if (!n.is_default_access()) { + o << pre(indent+1) << "access: " << to_string(n.access) << "\n"; + } + if (!n.captures.members.empty()) { + o << pre(indent+1) << "captures: " << n.captures.members.size() << "\n"; + } + } + + auto start(parameter_declaration_node const& n, int indent) -> void + { + o << pre(indent) << "parameter-declaration\n"; + + o << pre(indent+1); + switch (n.pass) { + break;case passing_style::in : o << "in"; + break;case passing_style::copy : o << "copy"; + break;case passing_style::inout : o << "inout"; + break;case passing_style::out : o << "out"; + break;case passing_style::move : o << "move"; + break;case passing_style::forward: o << "forward"; + break;default: ; + } + + o << pre(indent+1); + switch (n.mod) { + break;case parameter_declaration_node::modifier::implicit : o << "implicit"; + break;case parameter_declaration_node::modifier::virtual_ : o << "virtual"; + break;case parameter_declaration_node::modifier::override_ : o << "override"; + break;case parameter_declaration_node::modifier::final_ : o << "final"; + break;default: ; + } + o << "\n"; + + assert( n.declaration ); + } + + auto start(parameter_declaration_list_node const&, int indent) -> void + { + o << pre(indent) << "parameter-declaration-list\n"; + } + + auto start(translation_unit_node const&, int indent) -> void + { + o << pre(indent) << "translation-unit\n"; + } + + auto start(auto const&, int indent) -> void + { + o << pre(indent) << "UNRECOGNIZED -- FIXME\n"; + } + + auto end(auto const&, int) -> void + { + // Ignore other node types + } +}; + + +auto parser::debug_print(std::ostream& o) + + -> void +{ + o << "\n\n--- Parse tree\n"; + + auto tree_printer = parse_tree_printer{o}; + visit( tree_printer ); + + o << "\n\n--- Function body extents\n"; + + for (auto const& f : function_body_extents) { + o << " " << f.first << "-" << f.last << "\n"; + } +} + + +} + +#endif diff --git a/CompilerDriver/cc2/source/reflect.h b/CompilerDriver/cc2/source/reflect.h new file mode 100644 index 0000000..1cb66f7 --- /dev/null +++ b/CompilerDriver/cc2/source/reflect.h @@ -0,0 +1,1965 @@ + +#ifndef REFLECT_H_CPP2 +#define REFLECT_H_CPP2 + + +//=== Cpp2 type declarations ==================================================== + + +#include "cpp2util.h" + +#line 1 "reflect.h2" + +#line 20 "reflect.h2" +namespace cpp2 { + +namespace meta { + +#line 32 "reflect.h2" +class compiler_services; + +#line 223 "reflect.h2" +class declaration_base; + +#line 249 "reflect.h2" +class declaration; + +#line 331 "reflect.h2" +class function_declaration; + +#line 418 "reflect.h2" +class object_declaration; + +#line 454 "reflect.h2" +class type_declaration; + +#line 589 "reflect.h2" +class alias_declaration; + +#line 928 "reflect.h2" +class value_member_info; + +#line 1445 "reflect.h2" +} + +} + + +//=== Cpp2 type definitions and function declarations =========================== + +#line 1 "reflect.h2" + +// Copyright (c) Herb Sutter +// SPDX-License-Identifier: CC-BY-NC-ND-4.0 + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + + +//=========================================================================== +// Reflection and meta +//=========================================================================== + +#include "parse.h" + +#line 20 "reflect.h2" +namespace cpp2 { + +namespace meta { + +#line 25 "reflect.h2" +//----------------------------------------------------------------------- +// +// Compiler services +// +//----------------------------------------------------------------------- +// + +class compiler_services + { + // Common data members + // + private: std::vector<error_entry>* errors; + private: int errors_original_size; + private: std::deque<token>* generated_tokens; + private: cpp2::parser parser; + private: std::string metafunction_name {}; + private: std::vector<std::string> metafunction_args {}; + private: bool metafunctions_used {false}; + + // Constructor + // + public: explicit compiler_services( + + std::vector<error_entry>* errors_, + std::deque<token>* generated_tokens_ + ); + +#line 58 "reflect.h2" + // Common API + // + public: auto set_metafunction_name(cpp2::in<std::string_view> name, cpp2::in<std::vector<std::string>> args) & -> void; + +#line 66 "reflect.h2" + public: [[nodiscard]] auto get_metafunction_name() const& -> std::string_view; + + public: [[nodiscard]] auto get_argument(cpp2::in<int> index) & -> std::string; + +#line 76 "reflect.h2" + public: [[nodiscard]] auto get_arguments() & -> std::vector<std::string>; + +#line 81 "reflect.h2" + public: [[nodiscard]] auto arguments_were_used() const& -> bool; +using parse_statement_ret = std::unique_ptr<statement_node>; + + +#line 83 "reflect.h2" + protected: [[nodiscard]] auto parse_statement( + + std::string_view source + ) & -> parse_statement_ret; + +#line 136 "reflect.h2" + public: [[nodiscard]] virtual auto position() const -> source_position; + +#line 142 "reflect.h2" + // Error diagnosis and handling, integrated with compiler output + // Unlike a contract violation, .requires continues further processing + // + public: auto require( + + cpp2::in<bool> b, + cpp2::in<std::string_view> msg + ) const& -> void; + +#line 156 "reflect.h2" + public: auto error(cpp2::in<std::string_view> msg) const& -> void; + +#line 165 "reflect.h2" + // Enable custom contracts on this object, integrated with compiler output + // Unlike .requires, a contract violation stops further processing + // + public: auto report_violation(auto const& msg) const& -> void; + +#line 173 "reflect.h2" + public: [[nodiscard]] auto has_handler() const& -> auto; + public: virtual ~compiler_services() noexcept; +public: compiler_services(compiler_services const& that); + +#line 174 "reflect.h2" +}; + +#line 177 "reflect.h2" +/* +//----------------------------------------------------------------------- +// +// Type IDs +// +//----------------------------------------------------------------------- +// + +// All type_ids are wrappers around a pointer to node +// +type_id: @polymorphic_base @copyable type = +{ + this: compiler_services = (); + + n: type_id_node; + + protected operator=: ( + out this, + n_: type_id_node, + s : compiler_services + ) + = { + compiler_services = s; + n = n_; + assert( n, "a meta::type_id must point to a valid type_id_node, not null" ); + } + + is_wildcard : (this) -> bool = n.is_wildcard(); + is_pointer_qualified: (this) -> bool = n.is_pointer_qualified(); + template_args_count : (this) -> int = n.template_arguments().ssize(); + to_string : (this) -> std::string = n.to_string(); + + position: (override this) -> source_position = n.position(); +} +*/ + +#line 214 "reflect.h2" +//----------------------------------------------------------------------- +// +// Declarations +// +//----------------------------------------------------------------------- +// + +// All declarations are wrappers around a pointer to node +// +class declaration_base +: public compiler_services { + +#line 227 "reflect.h2" + protected: declaration_node* n; + + protected: explicit declaration_base( + + declaration_node* n_, + cpp2::in<compiler_services> s + ); + +#line 240 "reflect.h2" + public: [[nodiscard]] auto position() const -> source_position override; + + public: [[nodiscard]] auto print() const& -> std::string; + public: virtual ~declaration_base() noexcept; +public: declaration_base(declaration_base const& that); + +#line 243 "reflect.h2" +}; + +#line 246 "reflect.h2" +//----------------------------------------------------------------------- +// All declarations +// +class declaration +: public declaration_base { + +#line 253 "reflect.h2" + public: explicit declaration( + + declaration_node* n_, + cpp2::in<compiler_services> s + ); + +#line 262 "reflect.h2" + public: [[nodiscard]] auto is_public() const& -> bool; + public: [[nodiscard]] auto is_protected() const& -> bool; + public: [[nodiscard]] auto is_private() const& -> bool; + public: [[nodiscard]] auto is_default_access() const& -> bool; + + public: auto default_to_public() & -> void; + public: auto default_to_protected() & -> void; + public: auto default_to_private() & -> void; + + public: [[nodiscard]] auto make_public() & -> bool; + public: [[nodiscard]] auto make_protected() & -> bool; + public: [[nodiscard]] auto make_private() & -> bool; + + public: [[nodiscard]] auto has_name() const& -> bool; + public: [[nodiscard]] auto has_name(cpp2::in<std::string_view> s) const& -> bool; + + public: [[nodiscard]] auto name() const& -> std::string_view; + +#line 283 "reflect.h2" + public: [[nodiscard]] auto has_initializer() const& -> bool; + + public: [[nodiscard]] auto is_global() const& -> bool; + public: [[nodiscard]] auto is_function() const& -> bool; + public: [[nodiscard]] auto is_object() const& -> bool; + public: [[nodiscard]] auto is_base_object() const& -> bool; + public: [[nodiscard]] auto is_member_object() const& -> bool; + public: [[nodiscard]] auto is_type() const& -> bool; + public: [[nodiscard]] auto is_namespace() const& -> bool; + public: [[nodiscard]] auto is_alias() const& -> bool; + + public: [[nodiscard]] auto is_type_alias() const& -> bool; + public: [[nodiscard]] auto is_namespace_alias() const& -> bool; + public: [[nodiscard]] auto is_object_alias() const& -> bool; + + public: [[nodiscard]] auto is_function_expression() const& -> bool; + + public: [[nodiscard]] auto as_function() const& -> function_declaration; + public: [[nodiscard]] auto as_object() const& -> object_declaration; + public: [[nodiscard]] auto as_type() const& -> type_declaration; + public: [[nodiscard]] auto as_alias() const& -> alias_declaration; + + public: [[nodiscard]] auto get_parent() const& -> declaration; + + public: [[nodiscard]] auto parent_is_function() const& -> bool; + public: [[nodiscard]] auto parent_is_object() const& -> bool; + public: [[nodiscard]] auto parent_is_type() const& -> bool; + public: [[nodiscard]] auto parent_is_namespace() const& -> bool; + public: [[nodiscard]] auto parent_is_alias() const& -> bool; + + public: [[nodiscard]] auto parent_is_type_alias() const& -> bool; + public: [[nodiscard]] auto parent_is_namespace_alias() const& -> bool; + public: [[nodiscard]] auto parent_is_object_alias() const& -> bool; + + public: [[nodiscard]] auto parent_is_polymorphic() const& -> bool; + + public: auto mark_for_removal_from_enclosing_type() & -> void; + public: virtual ~declaration() noexcept; +public: declaration(declaration const& that); + + // this precondition should be sufficient ... + +#line 325 "reflect.h2" +}; + +#line 328 "reflect.h2" +//----------------------------------------------------------------------- +// Function declarations +// +class function_declaration +: public declaration { + +#line 335 "reflect.h2" + public: explicit function_declaration( + + declaration_node* n_, + cpp2::in<compiler_services> s + ); + +#line 345 "reflect.h2" + public: [[nodiscard]] auto index_of_parameter_named(cpp2::in<std::string_view> s) const& -> int; + public: [[nodiscard]] auto has_parameter_named(cpp2::in<std::string_view> s) const& -> bool; + public: [[nodiscard]] auto has_in_parameter_named(cpp2::in<std::string_view> s) const& -> bool; + public: [[nodiscard]] auto has_out_parameter_named(cpp2::in<std::string_view> s) const& -> bool; + public: [[nodiscard]] auto has_move_parameter_named(cpp2::in<std::string_view> s) const& -> bool; + public: [[nodiscard]] auto first_parameter_name() const& -> std::string; + + public: [[nodiscard]] auto has_parameter_with_name_and_pass(cpp2::in<std::string_view> s, cpp2::in<passing_style> pass) const& -> bool; + + public: [[nodiscard]] auto is_function_with_this() const& -> bool; + public: [[nodiscard]] auto is_virtual() const& -> bool; + public: [[nodiscard]] auto is_defaultable() const& -> bool; + public: [[nodiscard]] auto is_constructor() const& -> bool; + public: [[nodiscard]] auto is_default_constructor() const& -> bool; + public: [[nodiscard]] auto is_move() const& -> bool; + public: [[nodiscard]] auto is_swap() const& -> bool; + public: [[nodiscard]] auto is_constructor_with_that() const& -> bool; + public: [[nodiscard]] auto is_constructor_with_in_that() const& -> bool; + public: [[nodiscard]] auto is_constructor_with_move_that() const& -> bool; + public: [[nodiscard]] auto is_assignment() const& -> bool; + public: [[nodiscard]] auto is_assignment_with_that() const& -> bool; + public: [[nodiscard]] auto is_assignment_with_in_that() const& -> bool; + public: [[nodiscard]] auto is_assignment_with_move_that() const& -> bool; + public: [[nodiscard]] auto is_destructor() const& -> bool; + + public: [[nodiscard]] auto is_copy_or_move() const& -> bool; + + public: [[nodiscard]] auto has_declared_return_type() const& -> bool; + public: [[nodiscard]] auto has_deduced_return_type() const& -> bool; + public: [[nodiscard]] auto has_bool_return_type() const& -> bool; + public: [[nodiscard]] auto has_non_void_return_type() const& -> bool; + + public: [[nodiscard]] auto unnamed_return_type() const& -> std::string; + + public: [[nodiscard]] auto get_parameters() const& -> std::vector<object_declaration>; + +#line 389 "reflect.h2" + public: [[nodiscard]] auto is_binary_comparison_function() const& -> bool; + + public: auto default_to_virtual() & -> void; + + public: [[nodiscard]] auto make_virtual() & -> bool; + + public: auto add_initializer(cpp2::in<std::string_view> source) & -> void; + public: function_declaration(function_declaration const& that); + + +#line 412 "reflect.h2" +}; + +#line 415 "reflect.h2" +//----------------------------------------------------------------------- +// Object declarations +// +class object_declaration +: public declaration { + +#line 422 "reflect.h2" + public: explicit object_declaration( + + declaration_node* n_, + cpp2::in<compiler_services> s + ); + +#line 432 "reflect.h2" + public: [[nodiscard]] auto is_const() const& -> bool; + public: [[nodiscard]] auto has_wildcard_type() const& -> bool; + + public: [[nodiscard]] auto type() const& -> std::string; + +#line 442 "reflect.h2" + public: [[nodiscard]] auto initializer() const& -> std::string; + public: object_declaration(object_declaration const& that); + + +#line 448 "reflect.h2" +}; + +#line 451 "reflect.h2" +//----------------------------------------------------------------------- +// Type declarations +// +class type_declaration +: public declaration { + +#line 458 "reflect.h2" + public: explicit type_declaration( + + declaration_node* n_, + cpp2::in<compiler_services> s + ); + +#line 468 "reflect.h2" + public: auto reserve_names(cpp2::in<std::string_view> name, auto&& ...etc) const& -> void; + +#line 480 "reflect.h2" + public: [[nodiscard]] auto is_polymorphic() const& -> bool; + public: [[nodiscard]] auto is_final() const& -> bool; + public: [[nodiscard]] auto make_final() & -> bool; + + public: [[nodiscard]] auto get_member_functions() const& -> std::vector<function_declaration>; + +#line 495 "reflect.h2" + public: [[nodiscard]] auto get_member_functions_needing_initializer() const& -> std::vector<function_declaration>; + +#line 510 "reflect.h2" + public: [[nodiscard]] auto get_member_objects() const& -> std::vector<object_declaration>; + +#line 520 "reflect.h2" + public: [[nodiscard]] auto get_member_types() const& -> std::vector<type_declaration>; + +#line 530 "reflect.h2" + public: [[nodiscard]] auto get_member_aliases() const& -> std::vector<alias_declaration>; + +#line 540 "reflect.h2" + public: [[nodiscard]] auto get_members() const& -> std::vector<declaration>; +struct query_declared_value_set_functions_ret { bool out_this_in_that; bool out_this_move_that; bool inout_this_in_that; bool inout_this_move_that; }; + + + +#line 550 "reflect.h2" + public: [[nodiscard]] auto query_declared_value_set_functions() const& -> query_declared_value_set_functions_ret; + +#line 565 "reflect.h2" + public: auto add_member(cpp2::in<std::string_view> source) & -> void; + +#line 579 "reflect.h2" + public: auto remove_marked_members() & -> void; + public: auto remove_all_members() & -> void; + + public: auto disable_member_function_generation() & -> void; + public: type_declaration(type_declaration const& that); + +#line 583 "reflect.h2" +}; + +#line 586 "reflect.h2" +//----------------------------------------------------------------------- +// Alias declarations +// +class alias_declaration +: public declaration { + +#line 593 "reflect.h2" + public: explicit alias_declaration( + + declaration_node* n_, + cpp2::in<compiler_services> s + ); + public: alias_declaration(alias_declaration const& that); + + +#line 602 "reflect.h2" +}; + +#line 605 "reflect.h2" +//----------------------------------------------------------------------- +// +// Metafunctions - these are hardwired for now until we get to the +// step of writing a Cpp2 interpreter to run inside the compiler +// +//----------------------------------------------------------------------- +// + +//----------------------------------------------------------------------- +// Some common metafunction helpers (metafunctions are just functions, +// so they can be factored as usual) +// +auto add_virtual_destructor(meta::type_declaration& t) -> void; + +#line 623 "reflect.h2" +//----------------------------------------------------------------------- +// +// "... an abstract base class defines an interface ..." +// +// -- Stroustrup (The Design and Evolution of C++, 12.3.1) +// +//----------------------------------------------------------------------- +// +// interface +// +// an abstract base class having only pure virtual functions +// +auto interface(meta::type_declaration& t) -> void; + +#line 662 "reflect.h2" +//----------------------------------------------------------------------- +// +// "C.35: A base class destructor should be either public and +// virtual, or protected and non-virtual." +// +// "[C.43] ... a base class should not be copyable, and so does not +// necessarily need a default constructor." +// +// -- Stroustrup, Sutter, et al. (C++ Core Guidelines) +// +//----------------------------------------------------------------------- +// +// polymorphic_base +// +// A pure polymorphic base type that is not copyable, and whose +// destructor is either public and virtual or protected and nonvirtual. +// +// Unlike an interface, it can have nonpublic and nonvirtual functions. +// +auto polymorphic_base(meta::type_declaration& t) -> void; + +#line 706 "reflect.h2" +//----------------------------------------------------------------------- +// +// "... A totally ordered type ... requires operator<=> that +// returns std::strong_ordering. If the function is not +// user-written, a lexicographical memberwise implementation +// is generated by default..." +// +// -- P0707R4, section 3 +// +// Note: This feature derived from Cpp2 was already adopted +// into Standard C++ via paper P0515, so most of the +// heavy lifting is done by the Cpp1 C++20/23 compiler, +// including the memberwise default semantics +// (In contrast, cppfront has to do the work itself for +// default memberwise semantics for operator= assignment +// as those aren't yet part of Standard C++) +// +//----------------------------------------------------------------------- +// + +auto ordered_impl( + meta::type_declaration& t, + cpp2::in<std::string_view> ordering// must be "strong_ordering" etc. +) -> void; + +#line 750 "reflect.h2" +//----------------------------------------------------------------------- +// ordered - a totally ordered type +// +// Note: the ordering that should be encouraged as default gets the nice name +// +auto ordered(meta::type_declaration& t) -> void; + +#line 760 "reflect.h2" +//----------------------------------------------------------------------- +// weakly_ordered - a weakly ordered type +// +auto weakly_ordered(meta::type_declaration& t) -> void; + +#line 768 "reflect.h2" +//----------------------------------------------------------------------- +// partially_ordered - a partially ordered type +// +auto partially_ordered(meta::type_declaration& t) -> void; + +#line 777 "reflect.h2" +//----------------------------------------------------------------------- +// +// "A value is ... a regular type. It must have all public +// default construction, copy/move construction/assignment, +// and destruction, all of which are generated by default +// if not user-written; and it must not have any protected +// or virtual functions (including the destructor)." +// +// -- P0707R4, section 3 +// +//----------------------------------------------------------------------- +// +// copyable +// +// A type with (copy and move) x (construction and assignment) +// +auto copyable(meta::type_declaration& t) -> void; + +#line 814 "reflect.h2" +//----------------------------------------------------------------------- +// +// basic_value +// +// A regular type: copyable, plus has public default construction +// and no protected or virtual functions +// +auto basic_value(meta::type_declaration& t) -> void; + +#line 839 "reflect.h2" +//----------------------------------------------------------------------- +// +// "A 'value' is a totally ordered basic_value..." +// +// -- P0707R4, section 3 +// +// value - a value type that is totally ordered +// +// Note: the ordering that should be encouraged as default gets the nice name +// +auto value(meta::type_declaration& t) -> void; + +#line 855 "reflect.h2" +auto weakly_ordered_value(meta::type_declaration& t) -> void; + +#line 861 "reflect.h2" +auto partially_ordered_value(meta::type_declaration& t) -> void; + +#line 868 "reflect.h2" +//----------------------------------------------------------------------- +// +// "By definition, a `struct` is a `class` in which members +// are by default `public`; that is, +// +// struct s { ... +// +// is simply shorthand for +// +// class s { public: ... +// +// ... Which style you use depends on circumstances and taste. +// I usually prefer to use `struct` for classes that have all +// data `public`." +// +// -- Stroustrup (The C++ Programming Language, 3rd ed., p. 234) +// +//----------------------------------------------------------------------- +// +// struct +// +// a type with only public bases, objects, and functions, +// no virtual functions, and no user-defined constructors +// (i.e., no invariants) or assignment or destructors. +// +auto cpp2_struct(meta::type_declaration& t) -> void; + +#line 911 "reflect.h2" +//----------------------------------------------------------------------- +// +// "C enumerations constitute a curiously half-baked concept. ... +// the cleanest way out was to deem each enumeration a separate type." +// +// -- Stroustrup (The Design and Evolution of C++, 11.7) +// +// "An enumeration is a distinct type ... with named constants" +// +// -- ISO C++ Standard +// +//----------------------------------------------------------------------- +// +// basic_enum +// +// a type together with named constants that are its possible values +// +class value_member_info { + public: std::string name; + public: std::string type; + public: std::string value; +}; + +auto basic_enum( + meta::type_declaration& t, + auto const& nextval, + cpp2::in<bool> bitwise + ) -> void; + +#line 1117 "reflect.h2" +//----------------------------------------------------------------------- +// +// "An enum[...] is a totally ordered value type that stores a +// value of its enumerators's type, and otherwise has only public +// member variables of its enumerator's type, all of which are +// naturally scoped because they are members of a type." +// +// -- P0707R4, section 3 +// +auto cpp2_enum(meta::type_declaration& t) -> void; + +#line 1143 "reflect.h2" +//----------------------------------------------------------------------- +// +// "flag_enum expresses an enumeration that stores values +// corresponding to bitwise-or'd enumerators. The enumerators must +// be powers of two, and are automatically generated [...] A none +// value is provided [...] Operators | and & are provided to +// combine and extract values." +// +// -- P0707R4, section 3 +// +auto flag_enum(meta::type_declaration& t) -> void; + +#line 1175 "reflect.h2" +//----------------------------------------------------------------------- +// +// "As with void*, programmers should know that unions [...] are +// inherently dangerous, should be avoided wherever possible, +// and should be handled with special care when actually needed." +// +// -- Stroustrup (The Design and Evolution of C++, 14.3.4.1) +// +// "C++17 needs a type-safe union... The implications of the +// consensus `variant` design are well understood and have been +// explored over several LEWG discussions, over a thousand emails, +// a joint LEWG/EWG session, and not to mention 12 years of +// experience with Boost and other libraries." +// +// -- Axel Naumann, in P0088 (wg21.link/p0088), +// the adopted proposal for C++17 std::variant +// +//----------------------------------------------------------------------- +// +// union +// +// a type that contains exactly one of a fixed set of values at a time +// + +auto cpp2_union(meta::type_declaration& t) -> void; + +#line 1331 "reflect.h2" +//----------------------------------------------------------------------- +// +// print - output a pretty-printed visualization of t +// +auto print(cpp2::in<meta::type_declaration> t) -> void; + +#line 1341 "reflect.h2" +//----------------------------------------------------------------------- +// +// apply_metafunctions +// +[[nodiscard]] auto apply_metafunctions( + declaration_node& n, + type_declaration& rtype, + auto const& error + ) -> bool; + +#line 1445 "reflect.h2" +} + +} + + +//=== Cpp2 function definitions ================================================= + +#line 1 "reflect.h2" + +#line 20 "reflect.h2" +namespace cpp2 { + +namespace meta { + +#line 46 "reflect.h2" + compiler_services::compiler_services( + + std::vector<error_entry>* errors_, + std::deque<token>* generated_tokens_ + ) + : errors{ errors_ } + , errors_original_size{ cpp2::unsafe_narrow<int>(std::ssize(*cpp2::assert_not_null(errors))) } + , generated_tokens{ generated_tokens_ } + , parser{ *cpp2::assert_not_null(errors) } +#line 51 "reflect.h2" + { + +#line 56 "reflect.h2" + } + +#line 60 "reflect.h2" + auto compiler_services::set_metafunction_name(cpp2::in<std::string_view> name, cpp2::in<std::vector<std::string>> args) & -> void{ + metafunction_name = name; + metafunction_args = args; + metafunctions_used = CPP2_UFCS(empty)(args); + } + + [[nodiscard]] auto compiler_services::get_metafunction_name() const& -> std::string_view { return metafunction_name; } + + [[nodiscard]] auto compiler_services::get_argument(cpp2::in<int> index) & -> std::string{ + metafunctions_used = true; + if (([_0 = 0, _1 = index, _2 = CPP2_UFCS(ssize)(metafunction_args)]{ return cpp2::cmp_less_eq(_0,_1) && cpp2::cmp_less(_1,_2); }())) { + return CPP2_ASSERT_IN_BOUNDS(metafunction_args, index); + } + return ""; + } + + [[nodiscard]] auto compiler_services::get_arguments() & -> std::vector<std::string>{ + metafunctions_used = true; + return metafunction_args; + } + + [[nodiscard]] auto compiler_services::arguments_were_used() const& -> bool { return metafunctions_used; } + + [[nodiscard]] auto compiler_services::parse_statement( + + std::string_view source + ) & -> parse_statement_ret + + { + cpp2::deferred_init<std::unique_ptr<statement_node>> ret; +#line 89 "reflect.h2" + auto original_source {source}; + + CPP2_UFCS(push_back)(generated_lines, std::vector<source_line>()); + auto lines {&CPP2_UFCS(back)(generated_lines)}; + + auto add_line {[&, _1 = lines](cpp2::in<std::string_view> s) mutable -> void{ + static_cast<void>(CPP2_UFCS(emplace_back)((*cpp2::assert_not_null(_1)), s, source_line::category::cpp2)); + }}; +{ +auto newline_pos = CPP2_UFCS(find)(source, '\n'); + + // First split this string into source_lines + // + +#line 101 "reflect.h2" + if ( cpp2::cmp_greater(CPP2_UFCS(ssize)(source),1) + && newline_pos != source.npos) + { + while( newline_pos != std::string_view::npos ) + { + add_line(CPP2_UFCS(substr)(source, 0, newline_pos)); + CPP2_UFCS(remove_prefix)(source, newline_pos + 1); + newline_pos = CPP2_UFCS(find)(source, '\n'); + } + } +} + +#line 112 "reflect.h2" + if (!(CPP2_UFCS(empty)(source))) { + std::move(add_line)(std::move(source)); + } + + // Now lex this source fragment to generate + // a single grammar_map entry, whose .second + // is the vector of tokens + static_cast<void>(CPP2_UFCS(emplace_back)(generated_lexers, *cpp2::assert_not_null(errors))); + auto tokens {&CPP2_UFCS(back)(generated_lexers)}; + CPP2_UFCS(lex)((*cpp2::assert_not_null(tokens)), *cpp2::assert_not_null(std::move(lines)), true); + + if (cpp2::Default.has_handler() && !(std::ssize(CPP2_UFCS(get_map)((*cpp2::assert_not_null(tokens)))) == 1) ) { cpp2::Default.report_violation(""); } + + // Now parse this single declaration from + // the lexed tokens + ret.construct(CPP2_UFCS(parse_one_declaration)(parser, + (*cpp2::assert_not_null(CPP2_UFCS(begin)(CPP2_UFCS(get_map)(*cpp2::assert_not_null(std::move(tokens)))))).second, + *cpp2::assert_not_null(generated_tokens) + )); + if (!(ret.value())) { + error("parse failed - the source string is not a valid statement:\n" + cpp2::to_string(std::move(original_source))); + }return std::move(ret.value()); + } + + [[nodiscard]] auto compiler_services::position() const -> source_position + + { + return { }; + } + +#line 145 "reflect.h2" + auto compiler_services::require( + + cpp2::in<bool> b, + cpp2::in<std::string_view> msg + ) const& -> void + { + if (!(b)) { + error(msg); + } + } + + auto compiler_services::error(cpp2::in<std::string_view> msg) const& -> void + { + auto message {cpp2::as_<std::string>(msg)}; + if (!(CPP2_UFCS(empty)(metafunction_name))) { + message = "while applying @" + cpp2::to_string(metafunction_name) + " - " + cpp2::to_string(message); + } + static_cast<void>(CPP2_UFCS(emplace_back)((*cpp2::assert_not_null(errors)), position(), std::move(message))); + } + +#line 168 "reflect.h2" + auto compiler_services::report_violation(auto const& msg) const& -> void{ + error(msg); + throw(std::runtime_error(" ==> programming bug found in metafunction @" + cpp2::to_string(metafunction_name) + " - contract violation - see previous errors")); + } + + [[nodiscard]] auto compiler_services::has_handler() const& -> auto { return true; } + + compiler_services::~compiler_services() noexcept{} +compiler_services::compiler_services(compiler_services const& that) + : errors{ that.errors } + , errors_original_size{ that.errors_original_size } + , generated_tokens{ that.generated_tokens } + , parser{ that.parser } + , metafunction_name{ that.metafunction_name } + , metafunction_args{ that.metafunction_args } + , metafunctions_used{ that.metafunctions_used }{} + +#line 229 "reflect.h2" + declaration_base::declaration_base( + + declaration_node* n_, + cpp2::in<compiler_services> s + ) + : compiler_services{ s } + , n{ n_ } +#line 234 "reflect.h2" + { + +#line 237 "reflect.h2" + if (cpp2::Default.has_handler() && !(n) ) { cpp2::Default.report_violation(CPP2_CONTRACT_MSG("a meta::declaration must point to a valid declaration_node, not null")); } + } + + [[nodiscard]] auto declaration_base::position() const -> source_position { return CPP2_UFCS(position)((*cpp2::assert_not_null(n))); } + + [[nodiscard]] auto declaration_base::print() const& -> std::string { return CPP2_UFCS(pretty_print_visualize)((*cpp2::assert_not_null(n)), 0); } + + declaration_base::~declaration_base() noexcept{} +declaration_base::declaration_base(declaration_base const& that) + : compiler_services{ static_cast<compiler_services const&>(that) } + , n{ that.n }{} + +#line 253 "reflect.h2" + declaration::declaration( + + declaration_node* n_, + cpp2::in<compiler_services> s + ) + : declaration_base{ n_, s } +#line 258 "reflect.h2" + { + + } + + [[nodiscard]] auto declaration::is_public() const& -> bool { return CPP2_UFCS(is_public)((*cpp2::assert_not_null(n))); } + [[nodiscard]] auto declaration::is_protected() const& -> bool { return CPP2_UFCS(is_protected)((*cpp2::assert_not_null(n))); } + [[nodiscard]] auto declaration::is_private() const& -> bool { return CPP2_UFCS(is_private)((*cpp2::assert_not_null(n))); } + [[nodiscard]] auto declaration::is_default_access() const& -> bool { return CPP2_UFCS(is_default_access)((*cpp2::assert_not_null(n))); } + + auto declaration::default_to_public() & -> void { static_cast<void>(CPP2_UFCS(make_public)((*cpp2::assert_not_null(n)))); } + auto declaration::default_to_protected() & -> void { static_cast<void>(CPP2_UFCS(make_protected)((*cpp2::assert_not_null(n)))); } + auto declaration::default_to_private() & -> void { static_cast<void>(CPP2_UFCS(make_private)((*cpp2::assert_not_null(n)))); } + + [[nodiscard]] auto declaration::make_public() & -> bool { return CPP2_UFCS(make_public)((*cpp2::assert_not_null(n))); } + [[nodiscard]] auto declaration::make_protected() & -> bool { return CPP2_UFCS(make_protected)((*cpp2::assert_not_null(n))); } + [[nodiscard]] auto declaration::make_private() & -> bool { return CPP2_UFCS(make_private)((*cpp2::assert_not_null(n))); } + + [[nodiscard]] auto declaration::has_name() const& -> bool { return CPP2_UFCS(has_name)((*cpp2::assert_not_null(n))); } + [[nodiscard]] auto declaration::has_name(cpp2::in<std::string_view> s) const& -> bool { return CPP2_UFCS(has_name)((*cpp2::assert_not_null(n)), s); } + + [[nodiscard]] auto declaration::name() const& -> std::string_view{ + if (has_name()) {return CPP2_UFCS(as_string_view)((*cpp2::assert_not_null(CPP2_UFCS(name)(*cpp2::assert_not_null(n))))); } + else { return ""; } + } + + [[nodiscard]] auto declaration::has_initializer() const& -> bool { return CPP2_UFCS(has_initializer)((*cpp2::assert_not_null(n))); } + + [[nodiscard]] auto declaration::is_global() const& -> bool { return CPP2_UFCS(is_global)((*cpp2::assert_not_null(n))); } + [[nodiscard]] auto declaration::is_function() const& -> bool { return CPP2_UFCS(is_function)((*cpp2::assert_not_null(n))); } + [[nodiscard]] auto declaration::is_object() const& -> bool { return CPP2_UFCS(is_object)((*cpp2::assert_not_null(n))); } + [[nodiscard]] auto declaration::is_base_object() const& -> bool { return CPP2_UFCS(is_base_object)((*cpp2::assert_not_null(n))); } + [[nodiscard]] auto declaration::is_member_object() const& -> bool { return CPP2_UFCS(is_member_object)((*cpp2::assert_not_null(n))); } + [[nodiscard]] auto declaration::is_type() const& -> bool { return CPP2_UFCS(is_type)((*cpp2::assert_not_null(n))); } + [[nodiscard]] auto declaration::is_namespace() const& -> bool { return CPP2_UFCS(is_namespace)((*cpp2::assert_not_null(n))); } + [[nodiscard]] auto declaration::is_alias() const& -> bool { return CPP2_UFCS(is_alias)((*cpp2::assert_not_null(n))); } + + [[nodiscard]] auto declaration::is_type_alias() const& -> bool { return CPP2_UFCS(is_type_alias)((*cpp2::assert_not_null(n))); } + [[nodiscard]] auto declaration::is_namespace_alias() const& -> bool { return CPP2_UFCS(is_namespace_alias)((*cpp2::assert_not_null(n))); } + [[nodiscard]] auto declaration::is_object_alias() const& -> bool { return CPP2_UFCS(is_object_alias)((*cpp2::assert_not_null(n))); } + + [[nodiscard]] auto declaration::is_function_expression() const& -> bool { return CPP2_UFCS(is_function_expression)((*cpp2::assert_not_null(n))); } + + [[nodiscard]] auto declaration::as_function() const& -> function_declaration { return function_declaration(n, (*this)); } + [[nodiscard]] auto declaration::as_object() const& -> object_declaration { return object_declaration(n, (*this)); } + [[nodiscard]] auto declaration::as_type() const& -> type_declaration { return type_declaration(n, (*this)); } + [[nodiscard]] auto declaration::as_alias() const& -> alias_declaration { return alias_declaration(n, (*this)); } + + [[nodiscard]] auto declaration::get_parent() const& -> declaration { return declaration((*cpp2::assert_not_null(n)).parent_declaration, (*this)); } + + [[nodiscard]] auto declaration::parent_is_function() const& -> bool { return CPP2_UFCS(parent_is_function)((*cpp2::assert_not_null(n))); } + [[nodiscard]] auto declaration::parent_is_object() const& -> bool { return CPP2_UFCS(parent_is_object)((*cpp2::assert_not_null(n))); } + [[nodiscard]] auto declaration::parent_is_type() const& -> bool { return CPP2_UFCS(parent_is_type)((*cpp2::assert_not_null(n))); } + [[nodiscard]] auto declaration::parent_is_namespace() const& -> bool { return CPP2_UFCS(parent_is_namespace)((*cpp2::assert_not_null(n))); } + [[nodiscard]] auto declaration::parent_is_alias() const& -> bool { return CPP2_UFCS(parent_is_alias)((*cpp2::assert_not_null(n))); } + + [[nodiscard]] auto declaration::parent_is_type_alias() const& -> bool { return CPP2_UFCS(parent_is_type_alias)((*cpp2::assert_not_null(n))); } + [[nodiscard]] auto declaration::parent_is_namespace_alias() const& -> bool { return CPP2_UFCS(parent_is_namespace_alias)((*cpp2::assert_not_null(n))); } + [[nodiscard]] auto declaration::parent_is_object_alias() const& -> bool { return CPP2_UFCS(parent_is_object_alias)((*cpp2::assert_not_null(n))); } + + [[nodiscard]] auto declaration::parent_is_polymorphic() const& -> bool { return CPP2_UFCS(parent_is_polymorphic)((*cpp2::assert_not_null(n))); } + + auto declaration::mark_for_removal_from_enclosing_type() & -> void + + { + if (cpp2::Type.has_handler() && !(parent_is_type()) ) { cpp2::Type.report_violation(""); } +#line 322 "reflect.h2" + auto test {CPP2_UFCS(type_member_mark_for_removal)((*cpp2::assert_not_null(n)))}; + if (cpp2::Default.has_handler() && !(std::move(test)) ) { cpp2::Default.report_violation(""); }// ... to ensure this assert is true + } + + declaration::~declaration() noexcept{} +declaration::declaration(declaration const& that) + : declaration_base{ static_cast<declaration_base const&>(that) }{} + +#line 335 "reflect.h2" + function_declaration::function_declaration( + + declaration_node* n_, + cpp2::in<compiler_services> s + ) + : declaration{ n_, s } +#line 340 "reflect.h2" + { + + if (cpp2::Default.has_handler() && !(CPP2_UFCS(is_function)((*cpp2::assert_not_null(n)))) ) { cpp2::Default.report_violation(""); } + } + + [[nodiscard]] auto function_declaration::index_of_parameter_named(cpp2::in<std::string_view> s) const& -> int { return CPP2_UFCS(index_of_parameter_named)((*cpp2::assert_not_null(n)), s); } + [[nodiscard]] auto function_declaration::has_parameter_named(cpp2::in<std::string_view> s) const& -> bool { return CPP2_UFCS(has_parameter_named)((*cpp2::assert_not_null(n)), s); } + [[nodiscard]] auto function_declaration::has_in_parameter_named(cpp2::in<std::string_view> s) const& -> bool { return CPP2_UFCS(has_in_parameter_named)((*cpp2::assert_not_null(n)), s); } + [[nodiscard]] auto function_declaration::has_out_parameter_named(cpp2::in<std::string_view> s) const& -> bool { return CPP2_UFCS(has_out_parameter_named)((*cpp2::assert_not_null(n)), s); } + [[nodiscard]] auto function_declaration::has_move_parameter_named(cpp2::in<std::string_view> s) const& -> bool { return CPP2_UFCS(has_move_parameter_named)((*cpp2::assert_not_null(n)), s); } + [[nodiscard]] auto function_declaration::first_parameter_name() const& -> std::string { return CPP2_UFCS(first_parameter_name)((*cpp2::assert_not_null(n))); } + + [[nodiscard]] auto function_declaration::has_parameter_with_name_and_pass(cpp2::in<std::string_view> s, cpp2::in<passing_style> pass) const& -> bool { + return CPP2_UFCS(has_parameter_with_name_and_pass)((*cpp2::assert_not_null(n)), s, pass); } + [[nodiscard]] auto function_declaration::is_function_with_this() const& -> bool { return CPP2_UFCS(is_function_with_this)((*cpp2::assert_not_null(n))); } + [[nodiscard]] auto function_declaration::is_virtual() const& -> bool { return CPP2_UFCS(is_virtual_function)((*cpp2::assert_not_null(n))); } + [[nodiscard]] auto function_declaration::is_defaultable() const& -> bool { return CPP2_UFCS(is_defaultable_function)((*cpp2::assert_not_null(n))); } + [[nodiscard]] auto function_declaration::is_constructor() const& -> bool { return CPP2_UFCS(is_constructor)((*cpp2::assert_not_null(n))); } + [[nodiscard]] auto function_declaration::is_default_constructor() const& -> bool { return CPP2_UFCS(is_default_constructor)((*cpp2::assert_not_null(n))); } + [[nodiscard]] auto function_declaration::is_move() const& -> bool { return CPP2_UFCS(is_move)((*cpp2::assert_not_null(n))); } + [[nodiscard]] auto function_declaration::is_swap() const& -> bool { return CPP2_UFCS(is_swap)((*cpp2::assert_not_null(n))); } + [[nodiscard]] auto function_declaration::is_constructor_with_that() const& -> bool { return CPP2_UFCS(is_constructor_with_that)((*cpp2::assert_not_null(n))); } + [[nodiscard]] auto function_declaration::is_constructor_with_in_that() const& -> bool { return CPP2_UFCS(is_constructor_with_in_that)((*cpp2::assert_not_null(n))); } + [[nodiscard]] auto function_declaration::is_constructor_with_move_that() const& -> bool { return CPP2_UFCS(is_constructor_with_move_that)((*cpp2::assert_not_null(n))); } + [[nodiscard]] auto function_declaration::is_assignment() const& -> bool { return CPP2_UFCS(is_assignment)((*cpp2::assert_not_null(n))); } + [[nodiscard]] auto function_declaration::is_assignment_with_that() const& -> bool { return CPP2_UFCS(is_assignment_with_that)((*cpp2::assert_not_null(n))); } + [[nodiscard]] auto function_declaration::is_assignment_with_in_that() const& -> bool { return CPP2_UFCS(is_assignment_with_in_that)((*cpp2::assert_not_null(n))); } + [[nodiscard]] auto function_declaration::is_assignment_with_move_that() const& -> bool { return CPP2_UFCS(is_assignment_with_move_that)((*cpp2::assert_not_null(n))); } + [[nodiscard]] auto function_declaration::is_destructor() const& -> bool { return CPP2_UFCS(is_destructor)((*cpp2::assert_not_null(n))); } + + [[nodiscard]] auto function_declaration::is_copy_or_move() const& -> bool { return is_constructor_with_that() || is_assignment_with_that(); } + + [[nodiscard]] auto function_declaration::has_declared_return_type() const& -> bool { return CPP2_UFCS(has_declared_return_type)((*cpp2::assert_not_null(n))); } + [[nodiscard]] auto function_declaration::has_deduced_return_type() const& -> bool { return CPP2_UFCS(has_deduced_return_type)((*cpp2::assert_not_null(n))); } + [[nodiscard]] auto function_declaration::has_bool_return_type() const& -> bool { return CPP2_UFCS(has_bool_return_type)((*cpp2::assert_not_null(n))); } + [[nodiscard]] auto function_declaration::has_non_void_return_type() const& -> bool { return CPP2_UFCS(has_non_void_return_type)((*cpp2::assert_not_null(n))); } + + [[nodiscard]] auto function_declaration::unnamed_return_type() const& -> std::string { return CPP2_UFCS(unnamed_return_type_to_string)((*cpp2::assert_not_null(n))); } + + [[nodiscard]] auto function_declaration::get_parameters() const& -> std::vector<object_declaration> + + { + std::vector<object_declaration> ret {}; + for ( auto const& param : CPP2_UFCS(get_function_parameters)((*cpp2::assert_not_null(n))) ) { + static_cast<void>(CPP2_UFCS(emplace_back)(ret, &*cpp2::assert_not_null((*cpp2::assert_not_null(param)).declaration), (*this))); + } + return ret; + } + + [[nodiscard]] auto function_declaration::is_binary_comparison_function() const& -> bool { return CPP2_UFCS(is_binary_comparison_function)((*cpp2::assert_not_null(n))); } + + auto function_declaration::default_to_virtual() & -> void { static_cast<void>(CPP2_UFCS(make_function_virtual)((*cpp2::assert_not_null(n)))); } + + [[nodiscard]] auto function_declaration::make_virtual() & -> bool { return CPP2_UFCS(make_function_virtual)((*cpp2::assert_not_null(n))); } + + auto function_declaration::add_initializer(cpp2::in<std::string_view> source) & -> void + +#line 398 "reflect.h2" + { + if ((*this).has_handler() && !(!(has_initializer())) ) { (*this).report_violation(CPP2_CONTRACT_MSG("cannot add an initializer to a function that already has one")); } + if ((*this).has_handler() && !(parent_is_type()) ) { (*this).report_violation(CPP2_CONTRACT_MSG("cannot add an initializer to a function that isn't in a type scope")); } + //require( !has_initializer(), + // "cannot add an initializer to a function that already has one"); + //require( parent_is_type(), + // "cannot add an initializer to a function that isn't in a type scope"); + +#line 404 "reflect.h2" + auto stmt {parse_statement(source)}; + if (!((cpp2::as_<bool>(stmt)))) { + error("cannot add an initializer that is not a valid statement"); + return ; + } + require(CPP2_UFCS(add_function_initializer)((*cpp2::assert_not_null(n)), std::move(stmt)), + std::string("unexpected error while attempting to add initializer")); + } + + function_declaration::function_declaration(function_declaration const& that) + : declaration{ static_cast<declaration const&>(that) }{} + +#line 422 "reflect.h2" + object_declaration::object_declaration( + + declaration_node* n_, + cpp2::in<compiler_services> s + ) + : declaration{ n_, s } +#line 427 "reflect.h2" + { + + if (cpp2::Default.has_handler() && !(CPP2_UFCS(is_object)((*cpp2::assert_not_null(n)))) ) { cpp2::Default.report_violation(""); } + } + + [[nodiscard]] auto object_declaration::is_const() const& -> bool { return CPP2_UFCS(is_const)((*cpp2::assert_not_null(n))); } + [[nodiscard]] auto object_declaration::has_wildcard_type() const& -> bool { return CPP2_UFCS(has_wildcard_type)((*cpp2::assert_not_null(n))); } + + [[nodiscard]] auto object_declaration::type() const& -> std::string{ + auto ret {CPP2_UFCS(object_type)((*cpp2::assert_not_null(n)))}; + require(!(contains(ret, "(*ERROR*)")), + "cannot to_string this type: " + ret); + return ret; + } + + [[nodiscard]] auto object_declaration::initializer() const& -> std::string{ + auto ret {CPP2_UFCS(object_initializer)((*cpp2::assert_not_null(n)))}; + require(!(contains(ret, "(*ERROR*)")), + "cannot to_string this initializer: " + ret); + return ret; + } + + object_declaration::object_declaration(object_declaration const& that) + : declaration{ static_cast<declaration const&>(that) }{} + +#line 458 "reflect.h2" + type_declaration::type_declaration( + + declaration_node* n_, + cpp2::in<compiler_services> s + ) + : declaration{ n_, s } +#line 463 "reflect.h2" + { + + if (cpp2::Default.has_handler() && !(CPP2_UFCS(is_type)((*cpp2::assert_not_null(n)))) ) { cpp2::Default.report_violation(""); } + } + + auto type_declaration::reserve_names(cpp2::in<std::string_view> name, auto&& ...etc) const& -> void + { // etc is not declared ':string_view' for compatibility with GCC 10.x + for ( + auto const& m : get_members() ) { + CPP2_UFCS(require)(m, !(CPP2_UFCS(has_name)(m, name)), + "in a '" + cpp2::to_string(get_metafunction_name()) + "' type, the name '" + cpp2::to_string(name) + "' is reserved for use by the '" + cpp2::to_string(get_metafunction_name()) + "' implementation"); + } + if constexpr (!(CPP2_PACK_EMPTY(etc))) { + reserve_names(CPP2_FORWARD(etc)...); + } + } + + [[nodiscard]] auto type_declaration::is_polymorphic() const& -> bool { return CPP2_UFCS(is_polymorphic)((*cpp2::assert_not_null(n))); } + [[nodiscard]] auto type_declaration::is_final() const& -> bool { return CPP2_UFCS(is_type_final)((*cpp2::assert_not_null(n))); } + [[nodiscard]] auto type_declaration::make_final() & -> bool { return CPP2_UFCS(make_type_final)((*cpp2::assert_not_null(n))); } + + [[nodiscard]] auto type_declaration::get_member_functions() const& -> std::vector<function_declaration> + + { + std::vector<function_declaration> ret {}; + for ( + auto const& d : CPP2_UFCS(get_type_scope_declarations)((*cpp2::assert_not_null(n)), declaration_node::functions) ) { + static_cast<void>(CPP2_UFCS(emplace_back)(ret, d, (*this))); + } + return ret; + } + + [[nodiscard]] auto type_declaration::get_member_functions_needing_initializer() const& -> std::vector<function_declaration> + + { + std::vector<function_declaration> ret {}; + for ( + auto const& d : CPP2_UFCS(get_type_scope_declarations)((*cpp2::assert_not_null(n)), declaration_node::functions) ) + if ( !(CPP2_UFCS(has_initializer)((*cpp2::assert_not_null(d)))) + && !(CPP2_UFCS(is_virtual_function)((*cpp2::assert_not_null(d)))) + && !(CPP2_UFCS(is_defaultable_function)((*cpp2::assert_not_null(d))))) + { + static_cast<void>(CPP2_UFCS(emplace_back)(ret, d, (*this))); + } + return ret; + } + + [[nodiscard]] auto type_declaration::get_member_objects() const& -> std::vector<object_declaration> + + { + std::vector<object_declaration> ret {}; + for ( auto const& d : CPP2_UFCS(get_type_scope_declarations)((*cpp2::assert_not_null(n)), declaration_node::objects) ) { + static_cast<void>(CPP2_UFCS(emplace_back)(ret, d, (*this))); + } + return ret; + } + + [[nodiscard]] auto type_declaration::get_member_types() const& -> std::vector<type_declaration> + + { + std::vector<type_declaration> ret {}; + for ( auto const& d : CPP2_UFCS(get_type_scope_declarations)((*cpp2::assert_not_null(n)), declaration_node::types) ) { + static_cast<void>(CPP2_UFCS(emplace_back)(ret, d, (*this))); + } + return ret; + } + + [[nodiscard]] auto type_declaration::get_member_aliases() const& -> std::vector<alias_declaration> + + { + std::vector<alias_declaration> ret {}; + for ( auto const& d : CPP2_UFCS(get_type_scope_declarations)((*cpp2::assert_not_null(n)), declaration_node::aliases) ) { + static_cast<void>(CPP2_UFCS(emplace_back)(ret, d, (*this))); + } + return ret; + } + + [[nodiscard]] auto type_declaration::get_members() const& -> std::vector<declaration> + + { + std::vector<declaration> ret {}; + for ( auto const& d : CPP2_UFCS(get_type_scope_declarations)((*cpp2::assert_not_null(n)), declaration_node::all) ) { + static_cast<void>(CPP2_UFCS(emplace_back)(ret, d, (*this))); + } + return ret; + } + + [[nodiscard]] auto type_declaration::query_declared_value_set_functions() const& -> query_declared_value_set_functions_ret + +#line 557 "reflect.h2" + { + cpp2::deferred_init<bool> out_this_in_that; + cpp2::deferred_init<bool> out_this_move_that; + cpp2::deferred_init<bool> inout_this_in_that; + cpp2::deferred_init<bool> inout_this_move_that; +#line 558 "reflect.h2" + auto declared {CPP2_UFCS(find_declared_value_set_functions)((*cpp2::assert_not_null(n)))}; + out_this_in_that.construct(declared.out_this_in_that != nullptr); + out_this_move_that.construct(declared.out_this_move_that != nullptr); + inout_this_in_that.construct(declared.inout_this_in_that != nullptr); + inout_this_move_that.construct(std::move(declared).inout_this_move_that != nullptr); + return { std::move(out_this_in_that.value()), std::move(out_this_move_that.value()), std::move(inout_this_in_that.value()), std::move(inout_this_move_that.value()) }; } + + auto type_declaration::add_member(cpp2::in<std::string_view> source) & -> void + { + auto decl {parse_statement(source)}; + if (!((cpp2::as_<bool>(decl)))) { + error("the provided source string is not a valid statement"); + return ; + } + if (!(CPP2_UFCS(is_declaration)((*cpp2::assert_not_null(decl))))) { + error("cannot add a member that is not a declaration"); + } + require(CPP2_UFCS(add_type_member)((*cpp2::assert_not_null(n)), std::move(decl)), + std::string("unexpected error while attempting to add member:\n") + source); + } + + auto type_declaration::remove_marked_members() & -> void { CPP2_UFCS(type_remove_marked_members)((*cpp2::assert_not_null(n))); } + auto type_declaration::remove_all_members() & -> void { CPP2_UFCS(type_remove_all_members)((*cpp2::assert_not_null(n))); } + + auto type_declaration::disable_member_function_generation() & -> void { CPP2_UFCS(type_disable_member_function_generation)((*cpp2::assert_not_null(n))); } + + type_declaration::type_declaration(type_declaration const& that) + : declaration{ static_cast<declaration const&>(that) }{} + +#line 593 "reflect.h2" + alias_declaration::alias_declaration( + + declaration_node* n_, + cpp2::in<compiler_services> s + ) + : declaration{ n_, s } +#line 598 "reflect.h2" + { + + if (cpp2::Default.has_handler() && !(CPP2_UFCS(is_alias)((*cpp2::assert_not_null(n)))) ) { cpp2::Default.report_violation(""); } + } + + alias_declaration::alias_declaration(alias_declaration const& that) + : declaration{ static_cast<declaration const&>(that) }{} + +#line 617 "reflect.h2" +auto add_virtual_destructor(meta::type_declaration& t) -> void +{ + CPP2_UFCS(add_member)(t, "operator=: (virtual move this) = { }"); +} + +#line 635 "reflect.h2" +auto interface(meta::type_declaration& t) -> void +{ + auto has_dtor {false}; + + for ( auto& m : CPP2_UFCS(get_members)(t) ) + { + CPP2_UFCS(require)(m, !(CPP2_UFCS(is_object)(m)), + "interfaces may not contain data objects"); + if (CPP2_UFCS(is_function)(m)) { + auto mf {CPP2_UFCS(as_function)(m)}; + CPP2_UFCS(require)(mf, !(CPP2_UFCS(is_copy_or_move)(mf)), + "interfaces may not copy or move; consider a virtual clone() instead"); + CPP2_UFCS(require)(mf, !(CPP2_UFCS(has_initializer)(mf)), + "interface functions must not have a function body; remove the '=' initializer"); + CPP2_UFCS(require)(mf, CPP2_UFCS(make_public)(mf), + "interface functions must be public"); + CPP2_UFCS(default_to_virtual)(mf); + has_dtor |= CPP2_UFCS(is_destructor)(mf); + } + } + + if (!(std::move(has_dtor))) { + CPP2_UFCS(add_virtual_destructor)(t); + } +} + +#line 681 "reflect.h2" +auto polymorphic_base(meta::type_declaration& t) -> void +{ + auto has_dtor {false}; + + for ( auto& mf : CPP2_UFCS(get_member_functions)(t) ) + { + if (CPP2_UFCS(is_default_access)(mf)) { + CPP2_UFCS(default_to_public)(mf); + } + CPP2_UFCS(require)(mf, !(CPP2_UFCS(is_copy_or_move)(mf)), + "polymorphic base types may not copy or move; consider a virtual clone() instead"); + if (CPP2_UFCS(is_destructor)(mf)) { + has_dtor = true; + CPP2_UFCS(require)(mf, ((CPP2_UFCS(is_public)(mf) || CPP2_UFCS(is_default_access)(mf)) && CPP2_UFCS(is_virtual)(mf)) + || (CPP2_UFCS(is_protected)(mf) && !(CPP2_UFCS(is_virtual)(mf))), + "a polymorphic base type destructor must be public and virtual, or protected and nonvirtual"); + } + } + + if (!(std::move(has_dtor))) { + CPP2_UFCS(add_virtual_destructor)(t); + } +} + +#line 726 "reflect.h2" +auto ordered_impl( + meta::type_declaration& t, + cpp2::in<std::string_view> ordering +) -> void +{ + auto has_spaceship {false}; + + for ( auto& mf : CPP2_UFCS(get_member_functions)(t) ) + { + if (CPP2_UFCS(has_name)(mf, "operator<=>")) { + has_spaceship = true; + auto return_name {CPP2_UFCS(unnamed_return_type)(mf)}; + if (CPP2_UFCS(find)(return_name, ordering) == return_name.npos) + { + CPP2_UFCS(error)(mf, "operator<=> must return std::" + cpp2::as_<std::string>(ordering)); + } + } + } + + if (!(std::move(has_spaceship))) { + CPP2_UFCS(add_member)(t, "operator<=>: (this, that) -> std::" + (cpp2::as_<std::string>(ordering)) + ";"); + } +} + +#line 755 "reflect.h2" +auto ordered(meta::type_declaration& t) -> void +{ + ordered_impl(t, "strong_ordering"); +} + +#line 763 "reflect.h2" +auto weakly_ordered(meta::type_declaration& t) -> void +{ + ordered_impl(t, "weak_ordering"); +} + +#line 771 "reflect.h2" +auto partially_ordered(meta::type_declaration& t) -> void +{ + ordered_impl(t, "partial_ordering"); +} + +#line 793 "reflect.h2" +auto copyable(meta::type_declaration& t) -> void +{ + // If the user explicitly wrote any of the copy/move functions, + // they must also have written the most general one - we can't + // assume we can safely generate it for them since they've opted + // into customized semantics + auto smfs {CPP2_UFCS(query_declared_value_set_functions)(t)}; + if ( !(smfs.out_this_in_that) + && ( + smfs.out_this_move_that + || smfs.inout_this_in_that + || smfs.inout_this_move_that)) + + { + CPP2_UFCS(error)(t, "this type is partially copyable/movable - when you provide any of the more-specific operator= signatures, you must also provide the one with the general signature (out this, that); alternatively, consider removing all the operator= functions and let them all be generated for you with default memberwise semantics"); + } + else {if (!(std::move(smfs).out_this_in_that)) { + CPP2_UFCS(add_member)(t, "operator=: (out this, that) = { }"); + }} +} + +#line 821 "reflect.h2" +auto basic_value(meta::type_declaration& t) -> void +{ + CPP2_UFCS(copyable)(t); + + auto has_default_ctor {false}; + for ( auto& mf : CPP2_UFCS(get_member_functions)(t) ) { + has_default_ctor |= CPP2_UFCS(is_default_constructor)(mf); + CPP2_UFCS(require)(mf, !(CPP2_UFCS(is_protected)(mf)) && !(CPP2_UFCS(is_virtual)(mf)), + "a value type may not have a protected or virtual function"); + CPP2_UFCS(require)(mf, !(CPP2_UFCS(is_destructor)(mf)) || CPP2_UFCS(is_public)(mf) || CPP2_UFCS(is_default_access)(mf), + "a value type may not have a non-public destructor"); + } + + if (!(std::move(has_default_ctor))) { + CPP2_UFCS(add_member)(t, "operator=: (out this) = { }"); + } +} + +#line 849 "reflect.h2" +auto value(meta::type_declaration& t) -> void +{ + CPP2_UFCS(ordered)(t); + CPP2_UFCS(basic_value)(t); +} + +auto weakly_ordered_value(meta::type_declaration& t) -> void +{ + CPP2_UFCS(weakly_ordered)(t); + CPP2_UFCS(basic_value)(t); +} + +auto partially_ordered_value(meta::type_declaration& t) -> void +{ + CPP2_UFCS(partially_ordered)(t); + CPP2_UFCS(basic_value)(t); +} + +#line 893 "reflect.h2" +auto cpp2_struct(meta::type_declaration& t) -> void +{ + for ( auto& m : CPP2_UFCS(get_members)(t) ) + { + CPP2_UFCS(require)(m, CPP2_UFCS(make_public)(m), + "all struct members must be public"); + if (CPP2_UFCS(is_function)(m)) { + auto mf {CPP2_UFCS(as_function)(m)}; + CPP2_UFCS(require)(t, !(CPP2_UFCS(is_virtual)(mf)), + "a struct may not have a virtual function"); + CPP2_UFCS(require)(t, !(CPP2_UFCS(has_name)(mf, "operator=")), + "a struct may not have a user-defined operator="); + } + } + CPP2_UFCS(disable_member_function_generation)(t); +} + +#line 934 "reflect.h2" +auto basic_enum( + meta::type_declaration& t, + auto const& nextval, + cpp2::in<bool> bitwise + ) -> void +{ + std::vector<value_member_info> enumerators {}; + cpp2::i64 min_value {}; + cpp2::i64 max_value {}; + cpp2::deferred_init<std::string> underlying_type; + + CPP2_UFCS(reserve_names)(t, "operator=", "operator<=>"); + if (bitwise) { + CPP2_UFCS(reserve_names)(t, "has", "set", "clear", "to_string", "get_raw_value", "none"); + } + + // 1. Gather: The names of all the user-written members, and find/compute the type + + underlying_type.construct(CPP2_UFCS(get_argument)(t, 0));// use the first template argument, if there was one + + auto found_non_numeric {false}; +{ +std::string value = "-1"; + +#line 957 "reflect.h2" + for ( + auto const& m : CPP2_UFCS(get_members)(t) ) + if ( CPP2_UFCS(is_member_object)(m)) + { + CPP2_UFCS(require)(m, CPP2_UFCS(is_public)(m) || CPP2_UFCS(is_default_access)(m), + "an enumerator cannot be protected or private"); + + auto mo {CPP2_UFCS(as_object)(m)}; + if (!(CPP2_UFCS(has_wildcard_type)(mo))) { + CPP2_UFCS(error)(mo, "an explicit underlying type should be specified as a template argument to the metafunction - try 'enum<u16>' or 'flag_enum<u64>'"); + } + + auto init {CPP2_UFCS(initializer)(mo)}; + + auto is_default_or_numeric {is_empty_or_a_decimal_number(init)}; + found_non_numeric |= !(CPP2_UFCS(empty)(init)) && !(is_default_or_numeric); + CPP2_UFCS(require)(m, !(is_default_or_numeric) || !(found_non_numeric) || CPP2_UFCS(has_name)(mo, "none"), + cpp2::to_string(CPP2_UFCS(name)(mo)) + ": enumerators with non-numeric values must come after all default and numeric values"); + + nextval(value, init); + + auto v {std::strtoll(&CPP2_ASSERT_IN_BOUNDS(value, 0), nullptr, 10)}; // for non-numeric values we'll just get 0 which is okay for now + if (cpp2::cmp_less(v,min_value)) { + min_value = v; + } + if (cpp2::cmp_greater(v,max_value)) { + max_value = v; + } + + // Adding local variable 'e' to work around a Clang warning + value_member_info e {cpp2::as_<std::string>(CPP2_UFCS(name)(mo)), "", value}; + CPP2_UFCS(push_back)(enumerators, e); + + CPP2_UFCS(mark_for_removal_from_enclosing_type)(mo); + } +} + +#line 993 "reflect.h2" + if ((CPP2_UFCS(empty)(enumerators))) { + CPP2_UFCS(error)(t, "an enumeration must contain at least one enumerator value"); + return ; + } + + // Compute the default underlying type, if it wasn't explicitly specified + if (underlying_type.value() == "") + { + CPP2_UFCS(require)(t, !(std::move(found_non_numeric)), + "if you write an enumerator with a non-numeric-literal value, you must specify the enumeration's underlying type"); + + if (!(bitwise)) { + if (cpp2::cmp_greater_eq(min_value,std::numeric_limits<cpp2::i8>::min()) && cpp2::cmp_less_eq(max_value,std::numeric_limits<cpp2::i8>::max())) { + underlying_type.value() = "i8"; + } + else {if (cpp2::cmp_greater_eq(min_value,std::numeric_limits<cpp2::i16>::min()) && cpp2::cmp_less_eq(max_value,std::numeric_limits<cpp2::i16>::max())) { + underlying_type.value() = "i16"; + } + else {if (cpp2::cmp_greater_eq(min_value,std::numeric_limits<cpp2::i32>::min()) && cpp2::cmp_less_eq(max_value,std::numeric_limits<cpp2::i32>::max())) { + underlying_type.value() = "i32"; + } + else {if (cpp2::cmp_greater_eq(std::move(min_value),std::numeric_limits<cpp2::i64>::min()) && cpp2::cmp_less_eq(max_value,std::numeric_limits<cpp2::i64>::max())) { + underlying_type.value() = "i64"; + } + else { + CPP2_UFCS(error)(t, "values are outside the range representable by the largest supported underlying signed type (i64)"); + }}}} + } + else { + auto umax {std::move(max_value) * cpp2::as_<cpp2::u64, 2>()}; + if (cpp2::cmp_less_eq(umax,std::numeric_limits<cpp2::u8>::max())) { + underlying_type.value() = "u8"; + } + else {if (cpp2::cmp_less_eq(umax,std::numeric_limits<cpp2::u16>::max())) { + underlying_type.value() = "u16"; + } + else {if (cpp2::cmp_less_eq(std::move(umax),std::numeric_limits<cpp2::u32>::max())) { + underlying_type.value() = "u32"; + } + else { + underlying_type.value() = "u64"; + }}} + } + } + +#line 1039 "reflect.h2" + // 2. Replace: Erase the contents and replace with modified contents + // + // Note that most values and functions are declared as '==' compile-time values, i.e. Cpp1 'constexpr' + + CPP2_UFCS(remove_marked_members)(t); + + // Generate the 'none' value if appropriate, and use that or + // else the first enumerator as the default-constructed value + auto default_value {CPP2_ASSERT_IN_BOUNDS(enumerators, 0).name}; + if (bitwise) { + default_value = "none"; + value_member_info e {"none", "", "0"}; + CPP2_UFCS(push_back)(enumerators, std::move(e)); + } + + // Generate all the private implementation + CPP2_UFCS(add_member)(t, " _value : " + cpp2::to_string(underlying_type.value()) + ";"); + CPP2_UFCS(add_member)(t, " private operator= : (implicit out this, _val: i64) == _value = cpp2::unsafe_narrow<" + cpp2::to_string(underlying_type.value()) + ">(_val);"); + + // Generate the bitwise operations + if (bitwise) { + CPP2_UFCS(add_member)(t, " operator|=: ( inout this, that ) == _value |= that._value;"); + CPP2_UFCS(add_member)(t, " operator&=: ( inout this, that ) == _value &= that._value;"); + CPP2_UFCS(add_member)(t, " operator^=: ( inout this, that ) == _value ^= that._value;"); + CPP2_UFCS(add_member)(t, " operator| : ( this, that ) -> " + cpp2::to_string(CPP2_UFCS(name)(t)) + " == _value | that._value;"); + CPP2_UFCS(add_member)(t, " operator& : ( this, that ) -> " + cpp2::to_string(CPP2_UFCS(name)(t)) + " == _value & that._value;"); + CPP2_UFCS(add_member)(t, " operator^ : ( this, that ) -> " + cpp2::to_string(CPP2_UFCS(name)(t)) + " == _value ^ that._value;"); + CPP2_UFCS(add_member)(t, " has : ( inout this, that ) -> bool == _value & that._value;"); + CPP2_UFCS(add_member)(t, " set : ( inout this, that ) == _value |= that._value;"); + CPP2_UFCS(add_member)(t, " clear : ( inout this, that ) == _value &= that._value~;"); + } + + // Add the enumerators + for ( auto const& e : enumerators ) { + CPP2_UFCS(add_member)(t, " " + cpp2::to_string(e.name) + " : " + cpp2::to_string(CPP2_UFCS(name)(t)) + " == " + cpp2::to_string(e.value) + ";"); + } + + // Generate the common functions + CPP2_UFCS(add_member)(t, " get_raw_value : (this) -> " + cpp2::to_string(std::move(underlying_type.value())) + " == _value;"); + CPP2_UFCS(add_member)(t, " operator= : (out this) == { _value = " + cpp2::to_string(std::move(default_value)) + "._value; }"); + CPP2_UFCS(add_member)(t, " operator= : (out this, that) == { }"); + CPP2_UFCS(add_member)(t, " operator<=> : (this, that) -> std::strong_ordering;"); +{ +std::string to_string = " to_string: (this) -> std::string = { \n"; + + // Provide a 'to_string' function to print enumerator name(s) + +#line 1084 "reflect.h2" + { + if (bitwise) { + to_string += " _ret : std::string = \"(\";\n"; + to_string += " _comma : std::string = ();\n"; + to_string += " if this == none { return \"(none)\"; }\n"; + } + + for ( + auto const& e : enumerators ) { + if (e.name != "_") {// ignore unnamed values + if (bitwise) { + if (e.name != "none") { + to_string += " if (this & " + cpp2::to_string(e.name) + ") == " + cpp2::to_string(e.name) + " { _ret += _comma + \"" + cpp2::to_string(e.name) + "\"; _comma = \", \"; }\n"; + } + } + else { + to_string += " if this == " + cpp2::to_string(e.name) + " { return \"" + cpp2::to_string(e.name) + "\"; }\n"; + } + } + } + + if (bitwise) { + to_string += " return _ret+\")\";\n}\n"; + } + else { + to_string += " return \"invalid " + cpp2::to_string(CPP2_UFCS(name)(t)) + " value\";\n}\n"; + } + + CPP2_UFCS(add_member)(t, std::move(to_string)); + } +} +#line 1114 "reflect.h2" +} + +#line 1126 "reflect.h2" +auto cpp2_enum(meta::type_declaration& t) -> void +{ + // Let basic_enum do its thing, with an incrementing value generator + CPP2_UFCS(basic_enum)(t, + [](std::string& value, cpp2::in<std::string> specified_value) mutable -> void{ + if (!(CPP2_UFCS(empty)(specified_value))) { + value = specified_value; + }else { + auto v {std::strtoll(&CPP2_ASSERT_IN_BOUNDS(value, 0), nullptr, 10)}; + value = cpp2::as_<std::string>((std::move(v) + 1)); + } + }, + false // disable bitwise operations + ); +} + +#line 1153 "reflect.h2" +auto flag_enum(meta::type_declaration& t) -> void +{ + // Let basic_enum do its thing, with a power-of-two value generator + CPP2_UFCS(basic_enum)(t, + [](std::string& value, cpp2::in<std::string> specified_value) mutable -> void{ + if (!(CPP2_UFCS(empty)(specified_value))) { + value = specified_value; + }else { + auto v {std::strtoll(&CPP2_ASSERT_IN_BOUNDS(value, 0), nullptr, 10)}; + if (cpp2::cmp_less(v,1)) { + value = "1"; + } + else { + value = cpp2::as_<std::string>((std::move(v) * 2)); + } + } + }, + true // enable bitwise operations + ); +} + +#line 1199 "reflect.h2" +auto cpp2_union(meta::type_declaration& t) -> void +{ + std::vector<value_member_info> alternatives {}; +{ +auto value = 0; + + // 1. Gather: All the user-written members, and find/compute the max size + +#line 1206 "reflect.h2" + for ( + + auto const& m : CPP2_UFCS(get_members)(t) ) { do + if ( CPP2_UFCS(is_member_object)(m)) + { + CPP2_UFCS(require)(m, CPP2_UFCS(is_public)(m) || CPP2_UFCS(is_default_access)(m), + "a union alternative cannot be protected or private"); + + CPP2_UFCS(require)(m, !(CPP2_UFCS(starts_with)(CPP2_UFCS(name)(m), "is_")) + && !(CPP2_UFCS(starts_with)(CPP2_UFCS(name)(m), "set_")), + "a union alternative's name cannot start with 'is_' or 'set_' - that could cause user confusion with the 'is_alternative' and 'set_alternative' generated functions"); + + auto mo {CPP2_UFCS(as_object)(m)}; + CPP2_UFCS(require)(mo, CPP2_UFCS(empty)(CPP2_UFCS(initializer)(mo)), + "a union alternative cannot have an initializer"); + + // Adding local variable 'e' to work around a Clang warning + value_member_info e {cpp2::as_<std::string>(CPP2_UFCS(name)(mo)), CPP2_UFCS(type)(mo), cpp2::as_<std::string>(value)}; + CPP2_UFCS(push_back)(alternatives, e); + + CPP2_UFCS(mark_for_removal_from_enclosing_type)(mo); + } while (false); ++value; } +} + +#line 1229 "reflect.h2" + std::string discriminator_type {}; + if (cpp2::cmp_less(CPP2_UFCS(ssize)(alternatives),std::numeric_limits<cpp2::i8>::max())) { + discriminator_type = "i8"; + } + else {if (cpp2::cmp_less(CPP2_UFCS(ssize)(alternatives),std::numeric_limits<cpp2::i16>::max())) { + discriminator_type = "i16"; + } + else {if (cpp2::cmp_less(CPP2_UFCS(ssize)(alternatives),std::numeric_limits<cpp2::i32>::max())) { + discriminator_type = "i32"; + } + else { + discriminator_type = "i64"; + }}} + +#line 1244 "reflect.h2" + // 2. Replace: Erase the contents and replace with modified contents + + CPP2_UFCS(remove_marked_members)(t); +{ +std::string storage = " _storage: cpp2::aligned_storage<cpp2::max( "; + + // Provide storage + +#line 1250 "reflect.h2" + { +{ +std::string comma = ""; + +#line 1252 "reflect.h2" + for ( + + auto const& e : alternatives ) { do { + storage += comma + "sizeof(" + cpp2::to_string(e.type) + ")"; + } while (false); comma = ", "; } +} + +#line 1258 "reflect.h2" + storage += "), cpp2::max( "; +{ +std::string comma = ""; + +#line 1261 "reflect.h2" + for ( + + auto const& e : alternatives ) { do { + storage += comma + "alignof(" + cpp2::to_string(e.type) + ")"; + } while (false); comma = ", "; } +} + +#line 1267 "reflect.h2" + storage += " )> = ();\n"; + CPP2_UFCS(add_member)(t, std::move(storage)); + } +} + + // Provide discriminator +#line 1272 "reflect.h2" + CPP2_UFCS(add_member)(t, " _discriminator: " + cpp2::to_string(std::move(discriminator_type)) + " = -1;\n"); + + // Add the alternatives: is_alternative, get_alternative, and set_alternative + for ( + auto const& a : alternatives ) + { + CPP2_UFCS(add_member)(t, " is_" + cpp2::to_string(a.name) + ": (this) -> bool = _discriminator == " + cpp2::to_string(a.value) + ";\n"); + + CPP2_UFCS(add_member)(t, " " + cpp2::to_string(a.name) + ": (this) -> forward " + cpp2::to_string(a.type) + " pre(is_" + cpp2::to_string(a.name) + "()) = reinterpret_cast<* const " + cpp2::to_string(a.type) + ">(_storage&)*;\n"); + + CPP2_UFCS(add_member)(t, " " + cpp2::to_string(a.name) + ": (inout this) -> forward " + cpp2::to_string(a.type) + " pre(is_" + cpp2::to_string(a.name) + "()) = reinterpret_cast<*" + cpp2::to_string(a.type) + ">(_storage&)*;\n"); + + CPP2_UFCS(add_member)(t, " set_" + cpp2::to_string(a.name) + ": (inout this, _value: " + cpp2::to_string(a.type) + ") = { if !is_" + cpp2::to_string(a.name) + "() { _destroy(); std::construct_at( reinterpret_cast<*" + cpp2::to_string(a.type) + ">(_storage&), _value); } else { reinterpret_cast<*" + cpp2::to_string(a.type) + ">(_storage&)* = _value; } _discriminator = " + cpp2::to_string(a.value) + "; }\n"); + + CPP2_UFCS(add_member)(t, " set_" + cpp2::to_string(a.name) + ": (inout this, forward _args...: _) = { if !is_" + cpp2::to_string(a.name) + "() { _destroy(); std::construct_at( reinterpret_cast<*" + cpp2::to_string(a.type) + ">(_storage&), _args...); } else { reinterpret_cast<*" + cpp2::to_string(a.type) + ">(_storage&)* = :" + cpp2::to_string(a.type) + " = (_args...); } _discriminator = " + cpp2::to_string(a.value) + "; }\n"); + } +{ +std::string destroy = " private _destroy: (inout this) = {\n"; + + // Add destroy + +#line 1291 "reflect.h2" + { + for ( + auto const& a : alternatives ) { + destroy += " if _discriminator == " + cpp2::to_string(a.value) + " { std::destroy_at( reinterpret_cast<*" + cpp2::to_string(a.type) + ">(_storage&) ); }\n"; + } + + destroy += " _discriminator = -1;\n"; + destroy += " }\n"; + CPP2_UFCS(add_member)(t, std::move(destroy)); + } +} + + // Add the destructor +#line 1303 "reflect.h2" + CPP2_UFCS(add_member)(t, " operator=: (move this) = { _destroy(); }"); + + // Add default constructor + CPP2_UFCS(add_member)(t, " operator=: (out this) = { }"); +{ +std::string value_set = ""; + + // Add copy/move construction and assignment + +#line 1310 "reflect.h2" + { + for ( + auto const& a : alternatives ) { + value_set += " if that.is_" + cpp2::to_string(a.name) + "() { set_" + cpp2::to_string(a.name) + "( that." + cpp2::to_string(a.name) + "() ); }\n"; + } + value_set += " }\n"; + + CPP2_UFCS(add_member)(t, std::string(" operator=: (out this, that) = {\n") + + " _storage = ();\n" + + " _discriminator = -1;\n" + + value_set + ); + CPP2_UFCS(add_member)(t, std::string(" operator=: (inout this, that) = {\n") + + " _storage = _;\n" + + " _discriminator = _;\n" + + std::move(value_set) + ); + } +} +#line 1328 "reflect.h2" +} + +#line 1335 "reflect.h2" +auto print(cpp2::in<meta::type_declaration> t) -> void +{ + std::cout << CPP2_UFCS(print)(t) << "\n"; +} + +#line 1345 "reflect.h2" +[[nodiscard]] auto apply_metafunctions( + declaration_node& n, + type_declaration& rtype, + auto const& error + ) -> bool + +{ + if (cpp2::Default.has_handler() && !(CPP2_UFCS(is_type)(n)) ) { cpp2::Default.report_violation(""); } + + // Check for _names reserved for the metafunction implementation + for ( + auto const& m : CPP2_UFCS(get_members)(rtype) ) + { + CPP2_UFCS(require)(m, !(CPP2_UFCS(starts_with)(CPP2_UFCS(name)(m), "_")) || cpp2::cmp_greater(CPP2_UFCS(ssize)(CPP2_UFCS(name)(m)),1), + "a type that applies a metafunction cannot have a body that declares a name that starts with '_' - those names are reserved for the metafunction implementation"); + } + + // For each metafunction, apply it + for ( + auto const& meta : n.metafunctions ) + { + // Convert the name and any template arguments to strings + // and record that in rtype + auto name {CPP2_UFCS(to_string)((*cpp2::assert_not_null(meta)))}; + name = CPP2_UFCS(substr)(name, 0, CPP2_UFCS(find)(name, '<')); + + std::vector<std::string> args {}; + for ( + auto const& arg : CPP2_UFCS(template_arguments)((*cpp2::assert_not_null(meta))) ) + CPP2_UFCS(push_back)(args, CPP2_UFCS(to_string)(arg)); + + CPP2_UFCS(set_metafunction_name)(rtype, name, args); + + // Dispatch + // + if (name == "interface") { + interface(rtype); + } + else {if (name == "polymorphic_base") { + polymorphic_base(rtype); + } + else {if (name == "ordered") { + ordered(rtype); + } + else {if (name == "weakly_ordered") { + weakly_ordered(rtype); + } + else {if (name == "partially_ordered") { + partially_ordered(rtype); + } + else {if (name == "copyable") { + copyable(rtype); + } + else {if (name == "basic_value") { + basic_value(rtype); + } + else {if (name == "value") { + value(rtype); + } + else {if (name == "weakly_ordered_value") { + weakly_ordered_value(rtype); + } + else {if (name == "partially_ordered_value") { + partially_ordered_value(rtype); + } + else {if (name == "struct") { + cpp2_struct(rtype); + } + else {if (name == "enum") { + cpp2_enum(rtype); + } + else {if (name == "flag_enum") { + flag_enum(rtype); + } + else {if (name == "union") { + cpp2_union(rtype); + } + else {if (name == "print") { + print(rtype); + } + else { + error("unrecognized metafunction name: " + name); + error("(temporary alpha limitation) currently the supported names are: interface, polymorphic_base, ordered, weakly_ordered, partially_ordered, copyable, basic_value, value, weakly_ordered_value, partially_ordered_value, struct, enum, flag_enum, union, print"); + return false; + }}}}}}}}}}}}}}} + + if (( + !(CPP2_UFCS(empty)(args)) + && !(CPP2_UFCS(arguments_were_used)(rtype)))) + + { + error(name + " did not use its template arguments - did you mean to write '" + name + " <" + CPP2_ASSERT_IN_BOUNDS(args, 0) + "> type' (with the spaces)?"); + return false; + } + } + + return true; +} + +#line 1445 "reflect.h2" +} + +} + +#endif diff --git a/CompilerDriver/cc2/source/reflect.h2 b/CompilerDriver/cc2/source/reflect.h2 new file mode 100644 index 0000000..072087e --- /dev/null +++ b/CompilerDriver/cc2/source/reflect.h2 @@ -0,0 +1,1447 @@ + +// Copyright (c) Herb Sutter +// SPDX-License-Identifier: CC-BY-NC-ND-4.0 + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + + +//=========================================================================== +// Reflection and meta +//=========================================================================== + +#include "parse.h" + +cpp2: namespace = { + +meta: namespace = { + + +//----------------------------------------------------------------------- +// +// Compiler services +// +//----------------------------------------------------------------------- +// + +compiler_services: @polymorphic_base @copyable type = +{ + // Common data members + // + errors : *std::vector<error_entry>; + errors_original_size : int; + generated_tokens : *std::deque<token>; + parser : cpp2::parser; + metafunction_name : std::string = (); + metafunction_args : std::vector<std::string> = (); + metafunctions_used : bool = false; + + // Constructor + // + operator=: ( + out this, + errors_ : *std::vector<error_entry>, + generated_tokens_: *std::deque<token> + ) + = { + errors = errors_; + errors_original_size = cpp2::unsafe_narrow<int>(std::ssize(errors*)); + generated_tokens = generated_tokens_; + parser = errors*; + } + + // Common API + // + set_metafunction_name: (inout this, name: std::string_view, args: std::vector<std::string>) = { + metafunction_name = name; + metafunction_args = args; + metafunctions_used = args.empty(); + } + + get_metafunction_name: (this) -> std::string_view = metafunction_name; + + get_argument: (inout this, index: int) -> std::string = { + metafunctions_used = true; + if (0 <= index < metafunction_args.ssize()) { + return metafunction_args[index]; + } + return ""; + } + + get_arguments: (inout this) -> std::vector<std::string> = { + metafunctions_used = true; + return metafunction_args; + } + + arguments_were_used: (this) -> bool = metafunctions_used; + + protected parse_statement: ( + inout this, + copy source: std::string_view + ) + -> (ret: std::unique_ptr<statement_node>) + = { + original_source := source; + + generated_lines.push_back( std::vector<source_line>() ); + lines := generated_lines.back()&; + + add_line := :(s: std::string_view) = { + _ = lines$*.emplace_back( s, source_line::category::cpp2 ); + }; + + // First split this string into source_lines + // + (copy newline_pos := source.find('\n')) + if source.ssize() > 1 + && newline_pos != source.npos + { + while newline_pos != std::string_view::npos + { + add_line( source.substr(0, newline_pos) ); + source.remove_prefix( newline_pos+1 ); + newline_pos = source.find('\n'); + } + } + + if !source.empty() { + add_line( source ); + } + + // Now lex this source fragment to generate + // a single grammar_map entry, whose .second + // is the vector of tokens + _ = generated_lexers.emplace_back( errors* ); + tokens := generated_lexers.back()&; + tokens*.lex( lines*, true ); + + assert( std::ssize(tokens* .get_map()) == 1 ); + + // Now parse this single declaration from + // the lexed tokens + ret = parser.parse_one_declaration( + tokens*.get_map().begin()*.second, + generated_tokens* + ); + if !ret { + error( "parse failed - the source string is not a valid statement:\n(original_source)$"); + } + } + + position: (virtual this) + -> source_position + = { + return (); + } + + // Error diagnosis and handling, integrated with compiler output + // Unlike a contract violation, .requires continues further processing + // + require:( + this, + b : bool, + msg : std::string_view + ) + = { + if !b { + error( msg ); + } + } + + error: (this, msg: std::string_view) + = { + message := msg as std::string; + if !metafunction_name.empty() { + message = "while applying @(metafunction_name)$ - (message)$"; + } + _ = errors*.emplace_back( position(), message); + } + + // Enable custom contracts on this object, integrated with compiler output + // Unlike .requires, a contract violation stops further processing + // + report_violation: (this, msg) = { + error(msg); + throw( std::runtime_error(" ==> programming bug found in metafunction @(metafunction_name)$ - contract violation - see previous errors") ); + } + + has_handler:(this) true; +} + + +/* +//----------------------------------------------------------------------- +// +// Type IDs +// +//----------------------------------------------------------------------- +// + +// All type_ids are wrappers around a pointer to node +// +type_id: @polymorphic_base @copyable type = +{ + this: compiler_services = (); + + n: *type_id_node; + + protected operator=: ( + out this, + n_: *type_id_node, + s : compiler_services + ) + = { + compiler_services = s; + n = n_; + assert( n, "a meta::type_id must point to a valid type_id_node, not null" ); + } + + is_wildcard : (this) -> bool = n*.is_wildcard(); + is_pointer_qualified: (this) -> bool = n*.is_pointer_qualified(); + template_args_count : (this) -> int = n*.template_arguments().ssize(); + to_string : (this) -> std::string = n*.to_string(); + + position: (override this) -> source_position = n*.position(); +} +*/ + + +//----------------------------------------------------------------------- +// +// Declarations +// +//----------------------------------------------------------------------- +// + +// All declarations are wrappers around a pointer to node +// +declaration_base: @polymorphic_base @copyable type = +{ + this: compiler_services = (); + + protected n: *declaration_node; + + protected operator=: ( + out this, + n_: *declaration_node, + s : compiler_services + ) + = { + compiler_services = s; + n = n_; + assert( n, "a meta::declaration must point to a valid declaration_node, not null" ); + } + + position: (override this) -> source_position = n*.position(); + + print: (this) -> std::string = n*.pretty_print_visualize(0); +} + + +//----------------------------------------------------------------------- +// All declarations +// +declaration: @polymorphic_base @copyable type = +{ + this: declaration_base = (); + + operator=: ( + out this, + n_: *declaration_node, + s : compiler_services + ) + = { + declaration_base = (n_, s); + } + + is_public : (this) -> bool = n*.is_public(); + is_protected : (this) -> bool = n*.is_protected(); + is_private : (this) -> bool = n*.is_private(); + is_default_access: (this) -> bool = n*.is_default_access(); + + default_to_public : (inout this) = _ = n*.make_public(); + default_to_protected: (inout this) = _ = n*.make_protected(); + default_to_private : (inout this) = _ = n*.make_private(); + + make_public : (inout this) -> bool = n*.make_public(); + make_protected : (inout this) -> bool = n*.make_protected(); + make_private : (inout this) -> bool = n*.make_private(); + + has_name : (this) -> bool = n*.has_name(); + has_name : (this, s: std::string_view) -> bool = n*.has_name(s); + + name: (this) -> std::string_view = { + if has_name() { return n*.name()*.as_string_view(); } + else { return ""; } + } + + has_initializer: (this) -> bool = n*.has_initializer(); + + is_global : (this) -> bool = n*.is_global(); + is_function : (this) -> bool = n*.is_function(); + is_object : (this) -> bool = n*.is_object(); + is_base_object : (this) -> bool = n*.is_base_object(); + is_member_object : (this) -> bool = n*.is_member_object(); + is_type : (this) -> bool = n*.is_type(); + is_namespace : (this) -> bool = n*.is_namespace(); + is_alias : (this) -> bool = n*.is_alias(); + + is_type_alias : (this) -> bool = n*.is_type_alias(); + is_namespace_alias : (this) -> bool = n*.is_namespace_alias(); + is_object_alias : (this) -> bool = n*.is_object_alias(); + + is_function_expression : (this) -> bool = n*.is_function_expression(); + + as_function : (this) -> function_declaration = function_declaration(n, this); + as_object : (this) -> object_declaration = object_declaration(n, this); + as_type : (this) -> type_declaration = type_declaration(n, this); + as_alias : (this) -> alias_declaration = alias_declaration(n, this); + + get_parent : (this) -> declaration = declaration(n*.parent_declaration, this); + + parent_is_function : (this) -> bool = n*.parent_is_function(); + parent_is_object : (this) -> bool = n*.parent_is_object(); + parent_is_type : (this) -> bool = n*.parent_is_type(); + parent_is_namespace : (this) -> bool = n*.parent_is_namespace(); + parent_is_alias : (this) -> bool = n*.parent_is_alias(); + + parent_is_type_alias : (this) -> bool = n*.parent_is_type_alias(); + parent_is_namespace_alias : (this) -> bool = n*.parent_is_namespace_alias(); + parent_is_object_alias : (this) -> bool = n*.parent_is_object_alias(); + + parent_is_polymorphic: (this) -> bool = n*.parent_is_polymorphic(); + + mark_for_removal_from_enclosing_type: (inout this) + pre<Type>( parent_is_type() ) // this precondition should be sufficient ... + = { + test := n*.type_member_mark_for_removal(); + assert( test ); // ... to ensure this assert is true + } +} + + +//----------------------------------------------------------------------- +// Function declarations +// +function_declaration: @copyable type = +{ + this: declaration = (); + + operator=: ( + out this, + n_: *declaration_node, + s : compiler_services + ) = + { + declaration = (n_, s); + assert( n*.is_function() ); + } + + index_of_parameter_named : (this, s: std::string_view) -> int = n*.index_of_parameter_named(s); + has_parameter_named : (this, s: std::string_view) -> bool = n*.has_parameter_named(s); + has_in_parameter_named : (this, s: std::string_view) -> bool = n*.has_in_parameter_named(s); + has_out_parameter_named : (this, s: std::string_view) -> bool = n*.has_out_parameter_named(s); + has_move_parameter_named : (this, s: std::string_view) -> bool = n*.has_move_parameter_named(s); + first_parameter_name : (this) -> std::string = n*.first_parameter_name(); + + has_parameter_with_name_and_pass: (this, s: std::string_view, pass: passing_style) -> bool + = n*.has_parameter_with_name_and_pass(s, pass); + is_function_with_this : (this) -> bool = n*.is_function_with_this(); + is_virtual : (this) -> bool = n*.is_virtual_function(); + is_defaultable : (this) -> bool = n*.is_defaultable_function(); + is_constructor : (this) -> bool = n*.is_constructor(); + is_default_constructor : (this) -> bool = n*.is_default_constructor(); + is_move : (this) -> bool = n*.is_move(); + is_swap : (this) -> bool = n*.is_swap(); + is_constructor_with_that : (this) -> bool = n*.is_constructor_with_that(); + is_constructor_with_in_that : (this) -> bool = n*.is_constructor_with_in_that(); + is_constructor_with_move_that: (this) -> bool = n*.is_constructor_with_move_that(); + is_assignment : (this) -> bool = n*.is_assignment(); + is_assignment_with_that : (this) -> bool = n*.is_assignment_with_that(); + is_assignment_with_in_that : (this) -> bool = n*.is_assignment_with_in_that(); + is_assignment_with_move_that : (this) -> bool = n*.is_assignment_with_move_that(); + is_destructor : (this) -> bool = n*.is_destructor(); + + is_copy_or_move : (this) -> bool = is_constructor_with_that() || is_assignment_with_that(); + + has_declared_return_type : (this) -> bool = n*.has_declared_return_type(); + has_deduced_return_type : (this) -> bool = n*.has_deduced_return_type(); + has_bool_return_type : (this) -> bool = n*.has_bool_return_type(); + has_non_void_return_type : (this) -> bool = n*.has_non_void_return_type(); + + unnamed_return_type : (this) -> std::string = n*.unnamed_return_type_to_string(); + + get_parameters: (this) + -> std::vector<object_declaration> + = { + ret: std::vector<object_declaration> = (); + for n*.get_function_parameters() do (param) { + _ = ret.emplace_back( param*.declaration*&, this ); + } + return ret; + } + + is_binary_comparison_function: (this) -> bool = n*.is_binary_comparison_function(); + + default_to_virtual : (inout this) = _ = n*.make_function_virtual(); + + make_virtual : (inout this) -> bool = n*.make_function_virtual(); + + add_initializer: (inout this, source: std::string_view) + pre<this> (!has_initializer(), "cannot add an initializer to a function that already has one") + pre<this> (parent_is_type(), "cannot add an initializer to a function that isn't in a type scope") + = { + //require( !has_initializer(), + // "cannot add an initializer to a function that already has one"); + //require( parent_is_type(), + // "cannot add an initializer to a function that isn't in a type scope"); + + stmt := parse_statement(source); + if !(stmt as bool) { + error( "cannot add an initializer that is not a valid statement"); + return; + } + require (n*.add_function_initializer(stmt), + std::string("unexpected error while attempting to add initializer")); + } +} + + +//----------------------------------------------------------------------- +// Object declarations +// +object_declaration: @copyable type = +{ + this: declaration = (); + + operator=: ( + out this, + n_: *declaration_node, + s : compiler_services + ) = + { + declaration = (n_, s); + assert( n*.is_object() ); + } + + is_const : (this) -> bool = n*.is_const(); + has_wildcard_type: (this) -> bool = n*.has_wildcard_type(); + + type: (this) -> std::string = { + ret := n*.object_type(); + require( !contains(ret, "(*ERROR*)"), + "cannot to_string this type: " + ret); + return ret; + } + + initializer: (this) -> std::string = { + ret := n*.object_initializer(); + require( !contains(ret, "(*ERROR*)"), + "cannot to_string this initializer: " + ret); + return ret; + } +} + + +//----------------------------------------------------------------------- +// Type declarations +// +type_declaration: @copyable type = +{ + this: declaration = (); + + operator=: ( + out this, + n_: *declaration_node, + s : compiler_services + ) = + { + declaration = (n_, s); + assert( n*.is_type() ); + } + + reserve_names: (this, name: std::string_view, forward etc...) = + { // etc is not declared ':string_view' for compatibility with GCC 10.x + for get_members() + do (m) { + m.require( !m.has_name( name ), + "in a '(get_metafunction_name())$' type, the name '(name)$' is reserved for use by the '(get_metafunction_name())$' implementation"); + } + if constexpr !CPP2_PACK_EMPTY(etc) { + reserve_names( etc... ); + } + } + + is_polymorphic: (this) -> bool = n*.is_polymorphic(); + is_final : (this) -> bool = n*.is_type_final(); + make_final : (inout this) -> bool = n*.make_type_final(); + + get_member_functions: (this) + -> std::vector<function_declaration> + = { + ret: std::vector<function_declaration> = (); + for n*.get_type_scope_declarations(declaration_node::functions) + do (d) { + _ = ret.emplace_back( d, this ); + } + return ret; + } + + get_member_functions_needing_initializer: (this) + -> std::vector<function_declaration> + = { + ret: std::vector<function_declaration> = (); + for n*.get_type_scope_declarations(declaration_node::functions) + do (d) + if !d*.has_initializer() + && !d*.is_virtual_function() + && !d*.is_defaultable_function() + { + _ = ret.emplace_back( d, this ); + } + return ret; + } + + get_member_objects: (this) + -> std::vector<object_declaration> + = { + ret: std::vector<object_declaration> = (); + for n*.get_type_scope_declarations(declaration_node::objects) do (d) { + _ = ret.emplace_back( d, this ); + } + return ret; + } + + get_member_types: (this) + -> std::vector<type_declaration> + = { + ret: std::vector<type_declaration> = (); + for n*.get_type_scope_declarations(declaration_node::types) do (d) { + _ = ret.emplace_back( d, this ); + } + return ret; + } + + get_member_aliases: (this) + -> std::vector<alias_declaration> + = { + ret: std::vector<alias_declaration> = (); + for n*.get_type_scope_declarations(declaration_node::aliases) do (d) { + _ = ret.emplace_back( d, this ); + } + return ret; + } + + get_members: (this) + -> std::vector<declaration> + = { + ret: std::vector<declaration> = (); + for n*.get_type_scope_declarations(declaration_node::all) do (d) { + _ = ret.emplace_back( d, this ); + } + return ret; + } + + query_declared_value_set_functions: (this) + -> ( + out_this_in_that : bool, + out_this_move_that : bool, + inout_this_in_that : bool, + inout_this_move_that : bool + ) + = { + declared := n*.find_declared_value_set_functions(); + out_this_in_that = declared.out_this_in_that != nullptr; + out_this_move_that = declared.out_this_move_that != nullptr; + inout_this_in_that = declared.inout_this_in_that != nullptr; + inout_this_move_that = declared.inout_this_move_that != nullptr; + } + + add_member: (inout this, source: std::string_view) + = { + decl := parse_statement(source); + if !(decl as bool) { + error("the provided source string is not a valid statement"); + return; + } + if !decl*.is_declaration() { + error("cannot add a member that is not a declaration"); + } + require( n*.add_type_member(decl), + std::string("unexpected error while attempting to add member:\n") + source ); + } + + remove_marked_members: (inout this) = n*.type_remove_marked_members(); + remove_all_members : (inout this) = n*.type_remove_all_members(); + + disable_member_function_generation: (inout this) = n*.type_disable_member_function_generation(); +} + + +//----------------------------------------------------------------------- +// Alias declarations +// +alias_declaration: @copyable type = +{ + this: declaration = (); + + operator=: ( + out this, + n_: *declaration_node, + s : compiler_services + ) = + { + declaration = (n_, s); + assert( n*.is_alias() ); + } +} + + +//----------------------------------------------------------------------- +// +// Metafunctions - these are hardwired for now until we get to the +// step of writing a Cpp2 interpreter to run inside the compiler +// +//----------------------------------------------------------------------- +// + +//----------------------------------------------------------------------- +// Some common metafunction helpers (metafunctions are just functions, +// so they can be factored as usual) +// +add_virtual_destructor: (inout t: meta::type_declaration) = +{ + t.add_member( "operator=: (virtual move this) = { }"); +} + + +//----------------------------------------------------------------------- +// +// "... an abstract base class defines an interface ..." +// +// -- Stroustrup (The Design and Evolution of C++, 12.3.1) +// +//----------------------------------------------------------------------- +// +// interface +// +// an abstract base class having only pure virtual functions +// +interface: (inout t: meta::type_declaration) = +{ + has_dtor := false; + + for t.get_members() do (inout m) + { + m.require( !m.is_object(), + "interfaces may not contain data objects"); + if m.is_function() { + mf := m.as_function(); + mf.require( !mf.is_copy_or_move(), + "interfaces may not copy or move; consider a virtual clone() instead"); + mf.require( !mf.has_initializer(), + "interface functions must not have a function body; remove the '=' initializer"); + mf.require( mf.make_public(), + "interface functions must be public"); + mf.default_to_virtual(); + has_dtor |= mf.is_destructor(); + } + } + + if !has_dtor { + t.add_virtual_destructor(); + } +} + + +//----------------------------------------------------------------------- +// +// "C.35: A base class destructor should be either public and +// virtual, or protected and non-virtual." +// +// "[C.43] ... a base class should not be copyable, and so does not +// necessarily need a default constructor." +// +// -- Stroustrup, Sutter, et al. (C++ Core Guidelines) +// +//----------------------------------------------------------------------- +// +// polymorphic_base +// +// A pure polymorphic base type that is not copyable, and whose +// destructor is either public and virtual or protected and nonvirtual. +// +// Unlike an interface, it can have nonpublic and nonvirtual functions. +// +polymorphic_base: (inout t: meta::type_declaration) = +{ + has_dtor := false; + + for t.get_member_functions() do (inout mf) + { + if mf.is_default_access() { + mf.default_to_public(); + } + mf.require( !mf.is_copy_or_move(), + "polymorphic base types may not copy or move; consider a virtual clone() instead"); + if mf.is_destructor() { + has_dtor = true; + mf.require( ((mf.is_public() || mf.is_default_access()) && mf.is_virtual()) + || (mf.is_protected() && !mf.is_virtual()), + "a polymorphic base type destructor must be public and virtual, or protected and nonvirtual"); + } + } + + if !has_dtor { + t.add_virtual_destructor(); + } +} + + +//----------------------------------------------------------------------- +// +// "... A totally ordered type ... requires operator<=> that +// returns std::strong_ordering. If the function is not +// user-written, a lexicographical memberwise implementation +// is generated by default..." +// +// -- P0707R4, section 3 +// +// Note: This feature derived from Cpp2 was already adopted +// into Standard C++ via paper P0515, so most of the +// heavy lifting is done by the Cpp1 C++20/23 compiler, +// including the memberwise default semantics +// (In contrast, cppfront has to do the work itself for +// default memberwise semantics for operator= assignment +// as those aren't yet part of Standard C++) +// +//----------------------------------------------------------------------- +// + +ordered_impl: ( + inout t: meta::type_declaration, + ordering: std::string_view // must be "strong_ordering" etc. +) = +{ + has_spaceship := false; + + for t.get_member_functions() do (inout mf) + { + if mf.has_name("operator<=>") { + has_spaceship = true; + return_name := mf.unnamed_return_type(); + if return_name.find(ordering) == return_name.npos + { + mf.error( "operator<=> must return std::" + ordering as std::string ); + } + } + } + + if !has_spaceship { + t.add_member( "operator<=>: (this, that) -> std::" + (ordering as std::string) + ";" ); + } +} + +//----------------------------------------------------------------------- +// ordered - a totally ordered type +// +// Note: the ordering that should be encouraged as default gets the nice name +// +ordered: (inout t: meta::type_declaration) = +{ + ordered_impl( t, "strong_ordering" ); +} + +//----------------------------------------------------------------------- +// weakly_ordered - a weakly ordered type +// +weakly_ordered: (inout t: meta::type_declaration) = +{ + ordered_impl( t, "weak_ordering" ); +} + +//----------------------------------------------------------------------- +// partially_ordered - a partially ordered type +// +partially_ordered: (inout t: meta::type_declaration) = +{ + ordered_impl( t, "partial_ordering" ); +} + + +//----------------------------------------------------------------------- +// +// "A value is ... a regular type. It must have all public +// default construction, copy/move construction/assignment, +// and destruction, all of which are generated by default +// if not user-written; and it must not have any protected +// or virtual functions (including the destructor)." +// +// -- P0707R4, section 3 +// +//----------------------------------------------------------------------- +// +// copyable +// +// A type with (copy and move) x (construction and assignment) +// +copyable: (inout t: meta::type_declaration) = +{ + // If the user explicitly wrote any of the copy/move functions, + // they must also have written the most general one - we can't + // assume we can safely generate it for them since they've opted + // into customized semantics + smfs := t.query_declared_value_set_functions(); + if !smfs.out_this_in_that + && ( + smfs.out_this_move_that + || smfs.inout_this_in_that + || smfs.inout_this_move_that + ) + { + t.error( "this type is partially copyable/movable - when you provide any of the more-specific operator= signatures, you must also provide the one with the general signature (out this, that); alternatively, consider removing all the operator= functions and let them all be generated for you with default memberwise semantics" ); + } + else if !smfs.out_this_in_that { + t.add_member( "operator=: (out this, that) = { }"); + } +} + +//----------------------------------------------------------------------- +// +// basic_value +// +// A regular type: copyable, plus has public default construction +// and no protected or virtual functions +// +basic_value: (inout t: meta::type_declaration) = +{ + t.copyable(); + + has_default_ctor := false; + for t.get_member_functions() do (inout mf) { + has_default_ctor |= mf.is_default_constructor(); + mf.require( !mf.is_protected() && !mf.is_virtual(), + "a value type may not have a protected or virtual function"); + mf.require( !mf.is_destructor() || mf.is_public() || mf.is_default_access(), + "a value type may not have a non-public destructor"); + } + + if !has_default_ctor { + t.add_member( "operator=: (out this) = { }"); + } +} + +//----------------------------------------------------------------------- +// +// "A 'value' is a totally ordered basic_value..." +// +// -- P0707R4, section 3 +// +// value - a value type that is totally ordered +// +// Note: the ordering that should be encouraged as default gets the nice name +// +value: (inout t: meta::type_declaration) = +{ + t.ordered(); + t.basic_value(); +} + +weakly_ordered_value: (inout t: meta::type_declaration) = +{ + t.weakly_ordered(); + t.basic_value(); +} + +partially_ordered_value: (inout t: meta::type_declaration) = +{ + t.partially_ordered(); + t.basic_value(); +} + + +//----------------------------------------------------------------------- +// +// "By definition, a `struct` is a `class` in which members +// are by default `public`; that is, +// +// struct s { ... +// +// is simply shorthand for +// +// class s { public: ... +// +// ... Which style you use depends on circumstances and taste. +// I usually prefer to use `struct` for classes that have all +// data `public`." +// +// -- Stroustrup (The C++ Programming Language, 3rd ed., p. 234) +// +//----------------------------------------------------------------------- +// +// struct +// +// a type with only public bases, objects, and functions, +// no virtual functions, and no user-defined constructors +// (i.e., no invariants) or assignment or destructors. +// +struct: (inout t: meta::type_declaration) = +{ + for t.get_members() do (inout m) + { + m.require( m.make_public(), + "all struct members must be public"); + if m.is_function() { + mf := m.as_function(); + t.require( !mf.is_virtual(), + "a struct may not have a virtual function"); + t.require( !mf.has_name("operator="), + "a struct may not have a user-defined operator="); + } + } + t.disable_member_function_generation(); +} + + +//----------------------------------------------------------------------- +// +// "C enumerations constitute a curiously half-baked concept. ... +// the cleanest way out was to deem each enumeration a separate type." +// +// -- Stroustrup (The Design and Evolution of C++, 11.7) +// +// "An enumeration is a distinct type ... with named constants" +// +// -- ISO C++ Standard +// +//----------------------------------------------------------------------- +// +// basic_enum +// +// a type together with named constants that are its possible values +// +value_member_info: @struct type = { + name : std::string; + type : std::string; + value : std::string; +} + +basic_enum: ( + inout t : meta::type_declaration, + nextval , + bitwise : bool + ) += { + enumerators : std::vector<value_member_info> = (); + min_value : i64 = (); + max_value : i64 = (); + underlying_type : std::string; + + t.reserve_names( "operator=", "operator<=>" ); + if bitwise { + t.reserve_names( "has", "set", "clear", "to_string", "get_raw_value", "none" ); + } + + // 1. Gather: The names of all the user-written members, and find/compute the type + + underlying_type = t.get_argument(0); // use the first template argument, if there was one + + found_non_numeric := false; + + (copy value: std::string = "-1") + for t.get_members() + do (m) + if m.is_member_object() + { + m.require( m.is_public() || m.is_default_access(), + "an enumerator cannot be protected or private"); + + mo := m.as_object(); + if !mo.has_wildcard_type() { + mo.error( "an explicit underlying type should be specified as a template argument to the metafunction - try 'enum<u16>' or 'flag_enum<u64>'"); + } + + init := mo.initializer(); + + is_default_or_numeric := is_empty_or_a_decimal_number(init); + found_non_numeric |= !init.empty() && !is_default_or_numeric; + m.require( !is_default_or_numeric || !found_non_numeric || mo.has_name("none"), + "(mo.name())$: enumerators with non-numeric values must come after all default and numeric values"); + + nextval( value, init ); + + v := std::strtoll(value[0]&, nullptr, 10); // for non-numeric values we'll just get 0 which is okay for now + if v < min_value { + min_value = v; + } + if v > max_value { + max_value = v; + } + + // Adding local variable 'e' to work around a Clang warning + e: value_member_info = ( mo.name() as std::string, "", value ); + enumerators.push_back( e ); + + mo.mark_for_removal_from_enclosing_type(); + } + + if (enumerators.empty()) { + t.error( "an enumeration must contain at least one enumerator value"); + return; + } + + // Compute the default underlying type, if it wasn't explicitly specified + if underlying_type == "" + { + t.require( !found_non_numeric, + "if you write an enumerator with a non-numeric-literal value, you must specify the enumeration's underlying type"); + + if !bitwise { + if min_value >= std::numeric_limits<i8>::min() && max_value <= std::numeric_limits<i8>::max() { + underlying_type = "i8"; + } + else if min_value >= std::numeric_limits<i16>::min() && max_value <= std::numeric_limits<i16>::max() { + underlying_type = "i16"; + } + else if min_value >= std::numeric_limits<i32>::min() && max_value <= std::numeric_limits<i32>::max() { + underlying_type = "i32"; + } + else if min_value >= std::numeric_limits<i64>::min() && max_value <= std::numeric_limits<i64>::max() { + underlying_type = "i64"; + } + else { + t.error( "values are outside the range representable by the largest supported underlying signed type (i64)" ); + } + } + else { + umax := max_value * 2 as u64; + if umax <= std::numeric_limits<u8>::max() { + underlying_type = "u8"; + } + else if umax <= std::numeric_limits<u16>::max() { + underlying_type = "u16"; + } + else if umax <= std::numeric_limits<u32>::max() { + underlying_type = "u32"; + } + else { + underlying_type = "u64"; + } + } + } + + + // 2. Replace: Erase the contents and replace with modified contents + // + // Note that most values and functions are declared as '==' compile-time values, i.e. Cpp1 'constexpr' + + t.remove_marked_members(); + + // Generate the 'none' value if appropriate, and use that or + // else the first enumerator as the default-constructed value + default_value := enumerators[0].name; + if bitwise{ + default_value = "none"; + e: value_member_info = ( "none", "", "0"); + enumerators.push_back( e ); + } + + // Generate all the private implementation + t.add_member( " _value : (underlying_type)$;"); + t.add_member( " private operator= : (implicit out this, _val: i64) == _value = cpp2::unsafe_narrow<(underlying_type)$>(_val);"); + + // Generate the bitwise operations + if bitwise { + t.add_member( " operator|=: ( inout this, that ) == _value |= that._value;"); + t.add_member( " operator&=: ( inout this, that ) == _value &= that._value;"); + t.add_member( " operator^=: ( inout this, that ) == _value ^= that._value;"); + t.add_member( " operator| : ( this, that ) -> (t.name())$ == _value | that._value;"); + t.add_member( " operator& : ( this, that ) -> (t.name())$ == _value & that._value;"); + t.add_member( " operator^ : ( this, that ) -> (t.name())$ == _value ^ that._value;"); + t.add_member( " has : ( inout this, that ) -> bool == _value & that._value;"); + t.add_member( " set : ( inout this, that ) == _value |= that._value;"); + t.add_member( " clear : ( inout this, that ) == _value &= that._value~;"); + } + + // Add the enumerators + for enumerators do (e) { + t.add_member( " (e.name)$ : (t.name())$ == (e.value)$;"); + } + + // Generate the common functions + t.add_member( " get_raw_value : (this) -> (underlying_type)$ == _value;"); + t.add_member( " operator= : (out this) == { _value = (default_value)$._value; }"); + t.add_member( " operator= : (out this, that) == { }"); + t.add_member( " operator<=> : (this, that) -> std::strong_ordering;"); + + // Provide a 'to_string' function to print enumerator name(s) + (copy to_string: std::string = " to_string: (this) -> std::string = { \n") + { + if bitwise { + to_string += " _ret : std::string = \"(\";\n"; + to_string += " _comma : std::string = ();\n"; + to_string += " if this == none { return \"(none)\"; }\n"; + } + + for enumerators + do (e) { + if e.name != "_" { // ignore unnamed values + if bitwise { + if e.name != "none" { + to_string += " if (this & (e.name)$) == (e.name)$ { _ret += _comma + \"(e.name)$\"; _comma = \", \"; }\n"; + } + } + else { + to_string += " if this == (e.name)$ { return \"(e.name)$\"; }\n"; + } + } + } + + if bitwise { + to_string += " return _ret+\")\";\n}\n"; + } + else { + to_string += " return \"invalid (t.name())$ value\";\n}\n"; + } + + t.add_member( to_string ); + } +} + + +//----------------------------------------------------------------------- +// +// "An enum[...] is a totally ordered value type that stores a +// value of its enumerators's type, and otherwise has only public +// member variables of its enumerator's type, all of which are +// naturally scoped because they are members of a type." +// +// -- P0707R4, section 3 +// +enum: (inout t: meta::type_declaration) = +{ + // Let basic_enum do its thing, with an incrementing value generator + t.basic_enum( + :(inout value: std::string, specified_value: std::string) = { + if !specified_value.empty() { + value = specified_value; + } else { + v := std::strtoll(value[0]&, nullptr, 10); + value = (v + 1) as std::string; + } + }, + false // disable bitwise operations + ); +} + + +//----------------------------------------------------------------------- +// +// "flag_enum expresses an enumeration that stores values +// corresponding to bitwise-or'd enumerators. The enumerators must +// be powers of two, and are automatically generated [...] A none +// value is provided [...] Operators | and & are provided to +// combine and extract values." +// +// -- P0707R4, section 3 +// +flag_enum: (inout t: meta::type_declaration) = +{ + // Let basic_enum do its thing, with a power-of-two value generator + t.basic_enum( + :(inout value: std::string, specified_value: std::string) = { + if !specified_value.empty() { + value = specified_value; + } else { + v := std::strtoll(value[0]&, nullptr, 10); + if v < 1 { + value = "1"; + } + else { + value = (v * 2) as std::string; + } + } + }, + true // enable bitwise operations + ); +} + + +//----------------------------------------------------------------------- +// +// "As with void*, programmers should know that unions [...] are +// inherently dangerous, should be avoided wherever possible, +// and should be handled with special care when actually needed." +// +// -- Stroustrup (The Design and Evolution of C++, 14.3.4.1) +// +// "C++17 needs a type-safe union... The implications of the +// consensus `variant` design are well understood and have been +// explored over several LEWG discussions, over a thousand emails, +// a joint LEWG/EWG session, and not to mention 12 years of +// experience with Boost and other libraries." +// +// -- Axel Naumann, in P0088 (wg21.link/p0088), +// the adopted proposal for C++17 std::variant +// +//----------------------------------------------------------------------- +// +// union +// +// a type that contains exactly one of a fixed set of values at a time +// + +union: (inout t : meta::type_declaration) += { + alternatives : std::vector<value_member_info> = (); + + // 1. Gather: All the user-written members, and find/compute the max size + + (copy value := 0) + for t.get_members() + next value++ + do (m) + if m.is_member_object() + { + m.require( m.is_public() || m.is_default_access(), + "a union alternative cannot be protected or private"); + + m.require( !m.name().starts_with("is_") + && !m.name().starts_with("set_"), + "a union alternative's name cannot start with 'is_' or 'set_' - that could cause user confusion with the 'is_alternative' and 'set_alternative' generated functions"); + + mo := m.as_object(); + mo.require( mo.initializer().empty(), + "a union alternative cannot have an initializer"); + + // Adding local variable 'e' to work around a Clang warning + e: value_member_info = ( mo.name() as std::string, mo.type(), value as std::string ); + alternatives.push_back( e ); + + mo.mark_for_removal_from_enclosing_type(); + } + + discriminator_type: std::string = (); + if alternatives.ssize() < std::numeric_limits<i8>::max() { + discriminator_type = "i8"; + } + else if alternatives.ssize() < std::numeric_limits<i16>::max() { + discriminator_type = "i16"; + } + else if alternatives.ssize() < std::numeric_limits<i32>::max() { + discriminator_type = "i32"; + } + else { + discriminator_type = "i64"; + } + + + // 2. Replace: Erase the contents and replace with modified contents + + t.remove_marked_members(); + + // Provide storage + (copy storage: std::string = " _storage: cpp2::aligned_storage<cpp2::max( ") + { + (copy comma: std::string = "") + for alternatives + next comma = ", " + do (e) { + storage += comma + "sizeof((e.type)$)"; + } + + storage += "), cpp2::max( "; + + (copy comma: std::string = "") + for alternatives + next comma = ", " + do (e) { + storage += comma + "alignof((e.type)$)"; + } + + storage += " )> = ();\n"; + t.add_member( storage ); + } + + // Provide discriminator + t.add_member( " _discriminator: (discriminator_type)$ = -1;\n"); + + // Add the alternatives: is_alternative, get_alternative, and set_alternative + for alternatives + do (a) + { + t.add_member( " is_(a.name)$: (this) -> bool = _discriminator == (a.value)$;\n"); + + t.add_member( " (a.name)$: (this) -> forward (a.type)$ pre(is_(a.name)$()) = reinterpret_cast<* const (a.type)$>(_storage&)*;\n"); + + t.add_member( " (a.name)$: (inout this) -> forward (a.type)$ pre(is_(a.name)$()) = reinterpret_cast<*(a.type)$>(_storage&)*;\n"); + + t.add_member( " set_(a.name)$: (inout this, _value: (a.type)$) = { if !is_(a.name)$() { _destroy(); std::construct_at( reinterpret_cast<*(a.type)$>(_storage&), _value); } else { reinterpret_cast<*(a.type)$>(_storage&)* = _value; } _discriminator = (a.value)$; }\n"); + + t.add_member( " set_(a.name)$: (inout this, forward _args...: _) = { if !is_(a.name)$() { _destroy(); std::construct_at( reinterpret_cast<*(a.type)$>(_storage&), _args...); } else { reinterpret_cast<*(a.type)$>(_storage&)* = :(a.type)$ = (_args...); } _discriminator = (a.value)$; }\n"); + } + + // Add destroy + (copy destroy: std::string = " private _destroy: (inout this) = {\n") + { + for alternatives + do (a) { + destroy += " if _discriminator == (a.value)$ { std::destroy_at( reinterpret_cast<*(a.type)$>(_storage&) ); }\n"; + } + + destroy += " _discriminator = -1;\n"; + destroy += " }\n"; + t.add_member( destroy ); + } + + // Add the destructor + t.add_member( " operator=: (move this) = { _destroy(); }" ); + + // Add default constructor + t.add_member( " operator=: (out this) = { }" ); + + // Add copy/move construction and assignment + (copy value_set: std::string = "") + { + for alternatives + do (a) { + value_set += " if that.is_(a.name)$() { set_(a.name)$( that.(a.name)$() ); }\n"; + } + value_set += " }\n"; + + t.add_member( std::string(" operator=: (out this, that) = {\n") + + " _storage = ();\n" + + " _discriminator = -1;\n" + + value_set + ); + t.add_member( std::string(" operator=: (inout this, that) = {\n") + + " _storage = _;\n" + + " _discriminator = _;\n" + + value_set + ); + } +} + + +//----------------------------------------------------------------------- +// +// print - output a pretty-printed visualization of t +// +print: (t: meta::type_declaration) = +{ + std::cout << t.print() << "\n"; +} + + +//----------------------------------------------------------------------- +// +// apply_metafunctions +// +apply_metafunctions: ( + inout n : declaration_node, + inout rtype : type_declaration, + error + ) + -> bool += { + assert( n.is_type() ); + + // Check for _names reserved for the metafunction implementation + for rtype.get_members() + do (m) + { + m.require( !m.name().starts_with("_") || m.name().ssize() > 1, + "a type that applies a metafunction cannot have a body that declares a name that starts with '_' - those names are reserved for the metafunction implementation"); + } + + // For each metafunction, apply it + for n.metafunctions + do (meta) + { + // Convert the name and any template arguments to strings + // and record that in rtype + name := meta*.to_string(); + name = name.substr(0, name.find('<')); + + args: std::vector<std::string> = (); + for meta*.template_arguments() + do (arg) + args.push_back( arg.to_string() ); + + rtype.set_metafunction_name( name, args ); + + // Dispatch + // + if name == "interface" { + interface( rtype ); + } + else if name == "polymorphic_base" { + polymorphic_base( rtype ); + } + else if name == "ordered" { + ordered( rtype ); + } + else if name == "weakly_ordered" { + weakly_ordered( rtype ); + } + else if name == "partially_ordered" { + partially_ordered( rtype ); + } + else if name == "copyable" { + copyable( rtype ); + } + else if name == "basic_value" { + basic_value( rtype ); + } + else if name == "value" { + value( rtype ); + } + else if name == "weakly_ordered_value" { + weakly_ordered_value( rtype ); + } + else if name == "partially_ordered_value" { + partially_ordered_value( rtype ); + } + else if name == "struct" { + cpp2_struct( rtype ); + } + else if name == "enum" { + cpp2_enum( rtype ); + } + else if name == "flag_enum" { + flag_enum( rtype ); + } + else if name == "union" { + cpp2_union( rtype ); + } + else if name == "print" { + print( rtype ); + } + else { + error( "unrecognized metafunction name: " + name ); + error( "(temporary alpha limitation) currently the supported names are: interface, polymorphic_base, ordered, weakly_ordered, partially_ordered, copyable, basic_value, value, weakly_ordered_value, partially_ordered_value, struct, enum, flag_enum, union, print" ); + return false; + } + + if ( + !args.empty() + && !rtype.arguments_were_used() + ) + { + error( name + " did not use its template arguments - did you mean to write '" + name + " <" + args[0] + "> type' (with the spaces)?"); + return false; + } + } + + return true; +} + + +} + +} diff --git a/CompilerDriver/cc2/source/sema.h b/CompilerDriver/cc2/source/sema.h new file mode 100644 index 0000000..d339659 --- /dev/null +++ b/CompilerDriver/cc2/source/sema.h @@ -0,0 +1,1892 @@ + +// Copyright (c) Herb Sutter +// SPDX-License-Identifier: CC-BY-NC-ND-4.0 + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + + +//=========================================================================== +// Semantic analysis +//=========================================================================== + +#ifndef CPP2_SEMA_H +#define CPP2_SEMA_H + +#include "reflect.h" + + +namespace cpp2 { + +auto parser::apply_type_metafunctions( declaration_node& n ) + -> bool +{ + assert(n.is_type()); + + // Get the reflection state ready to pass to the function + auto cs = meta::compiler_services{ &errors, generated_tokens }; + auto rtype = meta::type_declaration{ &n, cs }; + + return apply_metafunctions( + n, + rtype, + [&](std::string const& msg) { error( msg, false ); } + ); +} + + +//----------------------------------------------------------------------- +// +// Symbol/scope table +// +//----------------------------------------------------------------------- +// +struct declaration_sym { + bool start = false; + declaration_node const* declaration = {}; + token const* identifier = {}; + statement_node const* initializer = {}; + parameter_declaration_node const* parameter = {}; + bool member = false; + + declaration_sym( + bool s = false, + declaration_node const* decl = {}, + token const* id = {}, + statement_node const* init = {}, + parameter_declaration_node const* param = {}, + bool mem = false + ) + : start{s} + , declaration{decl} + , identifier{id} + , initializer{init} + , parameter{param} + , member{mem} + { } + + auto position() const + -> source_position + { + assert (declaration); + return declaration->position(); + } +}; + +struct identifier_sym { + bool standalone_assignment_to = false; + token const* identifier = {}; + + identifier_sym( + bool a, + token const* id + ) + : standalone_assignment_to{a} + , identifier{id} + { } + + auto position() const + -> source_position + { + assert (identifier); + return identifier->position(); + } +}; + +struct selection_sym { + bool start = false; + selection_statement_node const* selection = {}; + + selection_sym( + bool s, + selection_statement_node const* sel + ) + : start{s} + , selection{sel} + { } + + auto position() const + -> source_position + { + assert (selection); + return selection->position(); + } +}; + +struct compound_sym { + bool start = false; + compound_statement_node const* compound = {}; + enum kind { is_scope, is_true, is_false } kind_ = is_scope; + + compound_sym( + bool s, + compound_statement_node const* c, + kind k + ) + : start{s} + , compound{c} + , kind_{k} + { } + + auto position() const + -> source_position + { + assert (compound); + return compound->position(); + } +}; + +struct symbol { + int depth = -1; + + enum active { declaration=0, identifier, selection, compound }; + std::variant < + declaration_sym, + identifier_sym, + selection_sym, + compound_sym + > sym; + + bool start = true; + + symbol(int depth, declaration_sym const& sym) : depth{depth}, sym{sym}, start{sym.start} { } + symbol(int depth, identifier_sym const& sym) : depth{depth}, sym{sym} { } + symbol(int depth, selection_sym const& sym) : depth{depth}, sym{sym}, start{sym.start} { } + symbol(int depth, compound_sym const& sym) : depth{depth}, sym{sym}, start{sym.start} { } + + auto position() const + -> source_position + { + switch (sym.index()) + { + break;case declaration: { + auto const& s = std::get<declaration>(sym); + return s.position(); + } + + break;case identifier: { + auto const& s = std::get<identifier>(sym); + return s.position(); + } + + break;case selection: { + auto const& s = std::get<selection>(sym); + return s.position(); + } + + break;case compound: { + auto const& s = std::get<compound>(sym); + return s.position(); + } + + break;default: + assert (!"illegal symbol state"); + return { 0, 0 }; + } + } +}; + + +// Keep a list of all token*'s found that are definite first uses +// of the form "x = expr;" for an uninitialized local variable x, +// which we will rewrite to construct the local variable. +// +std::vector<token const*> definite_initializations; + +auto is_definite_initialization(token const* t) + -> bool +{ + return + std::find( + definite_initializations.begin(), + definite_initializations.end(), + t + ) + != definite_initializations.end(); +} + + +// Keep a list of all token*'s found that are definite last uses +// for a local variable or copy or forward parameter x, which we +// will rewrite to move or forward from the variable. +// +struct last_use { + token const* t; + bool is_forward; + + last_use( + token const* t_, + bool is_forward_ = false + ) + : t{t_} + , is_forward{is_forward_} + { } + + bool operator==(last_use const& that) { return t == that.t; } +}; +std::vector<last_use> definite_last_uses; + +auto is_definite_last_use(token const* t) + -> last_use const* +{ + auto iter = std::find( + definite_last_uses.begin(), + definite_last_uses.end(), + t + ); + if (iter != definite_last_uses.end()) { + return &*iter; + } + else { + return {}; + } +} + + +//----------------------------------------------------------------------- +// +// sema: Semantic analysis +// +//----------------------------------------------------------------------- +// +class sema +{ +public: + std::vector<error_entry>& errors; + std::vector<symbol> symbols; + + std::vector<selection_statement_node const*> active_selections; + +public: + //----------------------------------------------------------------------- + // Constructor + // + // errors error list + // + sema( + std::vector<error_entry>& errors_ + ) + : errors{ errors_ } + { + } + + // Get the declaration of t within the same named function or beyond it + // + auto get_declaration_of( + token const* t, + bool look_beyond_current_function = false + ) + -> declaration_sym const* + { + if (!t) { + return {}; + } + return get_declaration_of(*t, look_beyond_current_function); + } + + auto get_declaration_of( + token const& t, + bool look_beyond_current_function = false + ) + -> declaration_sym const* + { + // First find the position the query is coming from + // and remember its depth + auto i = symbols.cbegin(); + while ( + i != symbols.cend() + && i->position() < t.position() + ) + { + ++i; + } + + while ( + i == symbols.cend() + || !i->start + ) + { + if (i == symbols.cbegin()) { + return nullptr; + } + --i; + } + + auto depth = i->depth; + + // Then look backward to find the first declaration of + // this name that is not deeper (in a nested scope) + // and is in the same function + for ( + auto ri = std::make_reverse_iterator(i+1); + ri != symbols.crend() && ri->position() <= t.position(); // TODO: See pure2-deducing-pointers-error.cpp2 + ++ri + ) + { + if ( + ri->sym.index() == symbol::active::declaration + && ri->depth <= depth + ) + { + auto const& decl = std::get<symbol::active::declaration>(ri->sym); + + // Conditionally look beyond the start of the current named (has identifier) function + // (an unnamed function is ok to look beyond) + assert(decl.declaration); + if ( + decl.declaration->type.index() == declaration_node::a_function + && decl.declaration->identifier + && !look_beyond_current_function + ) + { + return nullptr; + } + + // If the name matches, this is it + if ( + decl.identifier + && *decl.identifier == t + ) + { + return &decl; + } + depth = ri->depth; + } + } + + return nullptr; + } + + + //----------------------------------------------------------------------- + // Factor out the uninitialized var decl test + // + auto is_uninitialized_decl(declaration_sym const& sym) + -> bool + { + return + sym.start + && !(sym.identifier && *sym.identifier == "this") + && !sym.initializer + && !(sym.parameter && sym.parameter->pass != passing_style::out) + ; + } + + + auto debug_print(std::ostream& o) + -> void + { + for (auto const& s : symbols) + { + o << std::setw(3) << s.depth << " |"; + o << std::setw(s.depth*2+1) << " "; + + switch (s.sym.index()) { + + break;case symbol::active::declaration: { + auto const& sym = std::get<symbol::active::declaration>(s.sym); + + assert (sym.declaration); + if (sym.declaration->is_function()) { + if (sym.start) { + o << "function "; + } + else { + o << "/function"; + } + } + else if (sym.declaration->is_object()) { + if (sym.start) { + o << "var "; + } + else { + o << "/var "; + } + } + + if (sym.start && sym.identifier) { + o << sym.identifier->to_string(); + } + + if (is_uninitialized_decl(sym)) { + o << " *** UNINITIALIZED"; + } + } + + break;case symbol::active::identifier: { + auto const& sym = std::get<symbol::active::identifier>(s.sym); + assert (sym.identifier); + if (auto use = is_definite_last_use(sym.identifier)) { + o << "*** " << sym.identifier->position().to_string() + << " DEFINITE LAST " + << (use->is_forward ? "FORWARDING" : "POTENTIALLY MOVING") + << " USE OF "; + } + + if (is_definite_initialization(sym.identifier)) { + o << "*** " << sym.identifier->position().to_string() + << " DEFINITE INITIALIZATION OF "; + } + else if (sym.standalone_assignment_to) { + o << "*** assignment to "; + } + else { + o << "*** use of "; + } + o << sym.identifier->to_string(); + } + + break;case symbol::active::selection: { + auto const& sym = std::get<symbol::active::selection>(s.sym); + if (!sym.start) { + o << "/"; + } + o << "selection"; + } + + break;case symbol::active::compound: { + auto const& sym = std::get<symbol::active::compound>(s.sym); + if (!sym.start) { + o << "/"; + --scope_depth; + } + if (sym.kind_ == sym.is_true) { + o << "true branch"; + } + else if (sym.kind_ == sym.is_false) { + o << "false branch"; + } + else { + o << "scope"; + } + + } + + break;default: + o << "ERROR"; + } + + o << "\n"; + } + } + + + //----------------------------------------------------------------------- + // Apply local first- and last-use rules + // + auto apply_local_rules() + -> bool + { + auto ret = true; + + //----------------------------------------------------------------------- + // Helpers for readability + + // It's an uninitialized variable (incl. named return values) if it's + // a non-namespace-scope non-parameter object with no initializer + // + auto is_uninitialized_variable_decl = [&](symbol const& s) + -> declaration_sym const* + { + if (auto const* sym = std::get_if<symbol::active::declaration>(&s.sym)) { + assert (sym); + if (is_uninitialized_decl(*sym)) { + if ( + sym->declaration->is_object() + && !sym->declaration->parent_is_namespace() + ) + { + return sym; + } + else { + return {}; + } + } + } + return {}; + }; + + // It's a local (incl. named return value or copy or move or forward parameter) + // + auto is_potentially_movable_local = [&](symbol const& s) + -> declaration_sym const* + { + if (auto const* sym = std::get_if<symbol::active::declaration>(&s.sym)) { + if ( + sym->start + && sym->declaration->is_object() + && (!sym->parameter + || sym->parameter->pass == passing_style::copy + || sym->parameter->pass == passing_style::move + || sym->parameter->pass == passing_style::forward + ) + ) + { + // Must be in function scope + if ( + sym->declaration->parent_declaration + && sym->declaration->parent_declaration->is_function() + ) + { + return sym; + } + else { + return {}; + } + } + } + return {}; + }; + + //----------------------------------------------------------------------- + // Function logic: For each entry in the table... + // + for (auto sympos = std::ssize(symbols) - 1; sympos >= 0; --sympos) + { + // If this is an uninitialized local variable, + // ensure it is definitely initialized and tag those initializations + // + if (auto decl = is_uninitialized_variable_decl(symbols[sympos])) { + assert( + decl->identifier + && !decl->initializer + ); + ret = ret + && ensure_definitely_initialized(decl, sympos+1, symbols[sympos].depth) + ; + } + + // If this is a copy, move, or forward parameter or a local variable, + // identify and tag its definite last uses to `std::move` from them + // + if (auto decl = is_potentially_movable_local(symbols[sympos])) { + assert (decl->identifier); + find_definite_last_uses( + decl->identifier, + sympos, + decl->parameter && decl->parameter->pass == passing_style::forward + ); + } + } + + return ret; + } + +private: + // Find the definite last uses for local variable *id starting at the + // given position and depth in the symbol/scope table + // + auto find_definite_last_uses( + token const* id, + int pos, + bool is_forward + ) const + -> void + { + auto i = pos; + auto depth = symbols[pos].depth; + + // Maintain a stack of the depths of the most recently seen + // selection statements, using the current depth-2 as a sentinel + auto selections = std::vector<int>{depth-2}; + + // Scan forward to the end of this scope, keeping track of + // the trailing nest of selection statements + while ( + i+1 < std::ssize(symbols) + && symbols[i+1].depth >= depth + ) + { + assert (std::ssize(symbols) > 1); + if (symbols[i].sym.index() == symbol::selection) { + auto const& s = std::get<symbol::selection>(symbols[i].sym); + if (s.start) { + selections.push_back(symbols[i].depth); + } + //else { + // assert (symbols[i].depth-1 == selections.back()); + // selections.pop_back(); + //} + } + ++i; + } + + // i is now at the end of id's scope, so start scanning backwards + // until we find the first definite last use + for (auto found = false; i > pos; --i) + { + // Once we find something, don't continue back further + // than the closest enclosing selection statement + if ( + found + && symbols[i].depth <= selections.back() + ) + { + break; + } + + if (symbols[i].sym.index() == symbol::active::identifier) + { + auto const& sym = std::get<symbol::active::identifier>(symbols[i].sym); + assert (sym.identifier); + + // If we find a use of this identifier + if (*sym.identifier == *id) + { + if ( + !found + || symbols[i].depth > selections.back()+1 + ) + { + definite_last_uses.emplace_back( sym.identifier, is_forward ); + found = true; + } + + // Pop any of the last branches that we're outside of + while (symbols[i].depth <= selections.back()) { + selections.pop_back(); + assert (!selections.empty()); // won't remove sentinel + } + // Then skip over the earlier part of the current branch + while ( + i > pos + && symbols[i].depth > selections.back() + 1 + ) + { + --i; + } + } + } + } + + // If we arrived back at the declaration without finding a use + // and this isn't generated code (ignore that for now) + // and this is a user-named object (not 'this', 'that', or '_') + if ( + i == pos + && id->position().lineno > 0 + && *id != "this" + && *id != "that" + && *id != "_" + ) + { + errors.emplace_back( + id->position(), + "local variable " + id->to_string() + " is not used; consider changing its name to '_' to make it explicitly anonymous, or removing it entirely if its side effects are not needed" + ); + } + } + + + // Check that local variable *id is initialized before use on all paths + // starting at the given position and depth in the symbol/scope table + // + // TODO: After writing the first version of this, I realized that it could be + // simplified a lot by using a sentinel value to represent the base case like + // the others instead of as a special case. It's tempting to rewrite this now + // to do that cleanup, but the code is working and fully localized, so + // rewriting it wouldn't give any benefit, and I need to resist the urge to + // be distracted by goldplating when I could be implementing a new feature. + // + auto ensure_definitely_initialized( + declaration_sym const* decl, + int pos, + int depth + ) const + -> bool + { + // If this is a member variable in a constructor, the name doesn't + // appear lexically right in the constructor, so prepending "this." + // to the printed name might make the error more readable to the programmer + auto name = decl->identifier->to_string(); + if (decl->declaration->parent_is_type()) { + name += " (aka this." + name + ")"; + } + + struct stack_entry{ + int pos; // start of this selection statement + + struct branch { + int start; + bool result = false; + + branch(int s, bool r) : start{s}, result{r} { } + }; + std::vector<branch> branches; + + stack_entry(int p) : pos{p} { } + + auto debug_print(std::ostream& o) const -> void + { + o << "Stack entry: " << pos << "\n"; + for (auto const& e : branches) { + o << " ( " << e.start << " , " << e.result << " )\n"; + } + } + }; + std::vector<stack_entry> selection_stack; + + for ( + ; + pos < std::ssize(symbols) && symbols[pos].depth >= depth; + ++pos + ) + { + switch (symbols[pos].sym.index()) { + + break;case symbol::active::declaration: { + auto const& sym = std::get<symbol::active::declaration>(symbols[pos].sym); + if ( + sym.start + && sym.identifier + && *sym.identifier == *decl->identifier + ) + { + errors.emplace_back( + sym.identifier->position(), + "local variable " + sym.identifier->to_string() + + " cannot have the same name as an uninitialized" + " variable in the same function"); + } + } + + break;case symbol::active::identifier: { + auto const& sym = std::get<symbol::active::identifier>(symbols[pos].sym); + assert (sym.identifier); + + if (is_definite_initialization(sym.identifier)) { + errors.emplace_back( + sym.identifier->position(), + "local variable " + name + + " must be initialized before " + sym.identifier->to_string() + + " (local variables must be initialized in the order they are declared)" + ); + return false; + } + + // If we find a use of this identifier + if (*sym.identifier == *decl->identifier) { + + // If we're not inside a selection statement, we're at the top level -- + // just return true if it's an assignment to it, else return false + if (std::ssize(selection_stack) == 0) { + if (sym.standalone_assignment_to) { + definite_initializations.push_back( sym.identifier ); + } + else { + errors.emplace_back( + sym.identifier->position(), + "local variable " + name + + " is used before it was initialized"); + } + return sym.standalone_assignment_to; + } + + // Else if we're inside a selection statement but still in the condition + // portion (there are no branches entered yet) + else if (std::ssize(selection_stack.back().branches) == 0) { + // If this is a top-level selection statement, handle it the same as + // if we weren't an a selection statement + if (std::ssize(selection_stack) == 1) { + if (sym.standalone_assignment_to) { + definite_initializations.push_back( sym.identifier ); + } + else { + errors.emplace_back( + sym.identifier->position(), + "local variable " + name + + " is used in a condition before it was initialized"); + } + return sym.standalone_assignment_to; + } + // Else we can skip the rest of this selection statement, and record + // this as the result of the next outer selection statement's current branch + else { + selection_stack.pop_back(); + assert (std::ssize(selection_stack.back().branches) > 0); + selection_stack.back().branches.back().result = sym.standalone_assignment_to; + + int this_depth = symbols[pos].depth; + while (symbols[pos + 1].depth >= this_depth) { + ++pos; + } + } + } + + // Else we're in a selection branch and can skip the rest of this branch + // and record this as the result for the current branch + else { + if (sym.standalone_assignment_to) { + definite_initializations.push_back( sym.identifier ); + } + else { + errors.emplace_back( + sym.identifier->position(), + "local variable " + name + + " is used in a branch before it was initialized"); + } + selection_stack.back().branches.back().result = sym.standalone_assignment_to; + + // The depth of this branch should always be the depth of + // the current selection statement + 1 + int branch_depth = symbols[selection_stack.back().pos].depth + 1; + while (symbols[pos + 1].depth > branch_depth) { + ++pos; + } + } + + } + } + + break;case symbol::active::selection: { + auto const& sym = std::get<symbol::active::selection>(symbols[pos].sym); + + // If we're starting a new selection statement, add a stack entry for it + if (sym.start) { + selection_stack.emplace_back( pos ); + } + + // If we're ending a selection statement, look at the partial results -- + // they must all be false or all true, if they're a mix we are missing + // initializations on some path(s) + else { + assert (std::ssize(selection_stack) > 0); + + auto true_branches = std::string{}; + auto false_branches = std::string{}; + for (auto const& b : selection_stack.back().branches) + { + // If this is not an implicit 'else' branch (i.e., if lineno > 0) + if (symbols[b.start].position().lineno > 0) { + (b.result ? true_branches : false_branches) + += "\n branch starting at line " + + std::to_string(symbols[b.start].position().lineno); + } + else { + (b.result ? true_branches : false_branches) + += "\n implicit else branch"; + } + } + + // If none of the branches was true + if (true_branches.length() == 0) + { + selection_stack.pop_back(); + // Nothing else to do, just continue + } + + // Else if all of the branches were true + else if (false_branches.length() == 0) + { + // If this is a top-level selection statement, handle it the same as + // if we weren't an a selection statement + if (std::ssize(selection_stack) == 1) { + return true; + } + // Else pop this selection statement, and record this as the result + // of the next outer selection statement's current branch + else { + selection_stack.pop_back(); + assert (std::ssize(selection_stack.back().branches) > 0); + selection_stack.back().branches.back().result = true; + + // And skip the rest of this branch + auto skip_depth = symbols[pos].depth - 1; + while (symbols[pos + 1].depth >= skip_depth) { + ++pos; + } + } + } + + // Else we found a missing initializion, report it and return false + else + { + errors.emplace_back( + decl->identifier->position(), + "local variable " + name + + " must be initialized on both branches or neither branch"); + + assert (symbols[selection_stack.back().pos].sym.index() == symbol::active::selection); + auto const& sym = std::get<symbol::active::selection>(symbols[pos].sym); + errors.emplace_back( + sym.selection->identifier->position(), + "\"" + sym.selection->identifier->to_string() + + "\" initializes " + name + + " on:" + true_branches + + "\nbut not on:" + false_branches + ); + + return false; + } + + } + } + + break;case symbol::active::compound: { + auto const& sym = std::get<symbol::active::compound>(symbols[pos].sym); + + // If we're in a selection + if (std::ssize(selection_stack) > 0) { + // If this is a compound start with the current selection's depth + // plus one, it's the start of one of the branches of that selection + if ( + sym.start + && symbols[pos].depth == symbols[selection_stack.back().pos].depth+1 + ) + { + selection_stack.back().branches.emplace_back( pos, false ); + } + } + } + + break;default: + assert (!"illegal symbol"); + } + + } + + errors.emplace_back( + decl->identifier->position(), + name + + " - variable must be initialized on every branch path"); + return false; + } + + +public: + //----------------------------------------------------------------------- + // Per-node sema rules + // + + auto check(qualified_id_node const& n) + { + // Check for some incorrect uses of . + if (auto decl = get_declaration_of(n.get_first_token(), true); + decl && std::ssize(n.ids) > 1 + ) + { + assert (decl->declaration); + + if ( + decl->declaration->is_object() + && n.ids[1].scope_op + && n.ids[1].scope_op->type() == lexeme::Scope + ) + { + errors.emplace_back( + n.position(), + "use '" + decl->identifier->to_string() + ".' to refer to an object member" + ); + return false; + } + } + + return true; + } + + + auto check(postfix_expression_node const& n) + { + // Check for some incorrect uses of :: or . + if (auto decl = get_declaration_of(n.get_first_token_ignoring_this(), true); + decl && !n.ops.empty() + ) + { + assert (decl->declaration); + + if ( + decl->declaration->is_type() + && n.ops[0].op + && n.ops[0].op->type() == lexeme::Dot + ) + { + errors.emplace_back( + n.position(), + "use '" + decl->identifier->to_string() + "::' to refer to a type member" + ); + return false; + } + } + + return true; + } + + + auto check(parameter_declaration_node const& n) + -> bool + { + auto type_name = std::string{}; + if (n.declaration->has_declared_return_type()) { + type_name = n.declaration->get_object_type()->to_string(); + } + + if ( + n.ordinal == 2 + && !n.has_name("that") + && n.declaration->parent_declaration + && n.declaration->parent_declaration->has_name("operator=") + && n.declaration->parent_declaration->parent_declaration + && n.declaration->parent_declaration->parent_declaration->name() + && type_name == *n.declaration->parent_declaration->parent_declaration->name() + ) + { + errors.emplace_back( + n.position(), + "if an 'operator=' second parameter is of the same type (here '" + type_name + "'), it must be named 'that'" + ); + return false; + } + + return true; + } + + auto check(declaration_node const& n) + -> bool + { + // An object of deduced type must have an initializer + if ( + n.is_object() + && n.has_wildcard_type() + && !n.has_initializer() + ) + { + errors.emplace_back( + n.position(), + "an object with a deduced type must have an = initializer" + ); + return false; + } + + // An object initializer must be an expression + if ( + n.is_object() + && n.initializer + && !n.initializer->is_expression() + ) + { + errors.emplace_back( + n.position(), + "an object initializer must be an expression" + ); + return false; + } + + // A namespace must be initialized with a compound expression + if ( + n.is_namespace() + && ( + !n.initializer + || !n.initializer->is_compound() + ) + ) + { + errors.emplace_back( + n.position(), + "a namespace must be = initialized with a { } body containing declarations" + ); + return false; + } + + // A function body must be an expression-statement or a compound-statement + if ( + n.is_function() + && n.initializer + && n.initializer->is_return() + ) + { + errors.emplace_back( + n.position(), + "a function with a single-expression body doesn't need to say 'return' - either omit 'return' or write a full { }-enclosed function body" + ); + return false; + } + + // A nonvirtual and nondefaultable function must have an initializer + if ( + n.is_function() + && !n.is_virtual_function() + && !n.is_defaultable_function() + && !n.has_initializer() + ) + { + errors.emplace_back( + n.position(), + "a function must have a body ('=' initializer), unless it is virtual (has a 'virtual this' parameter) or is defaultable (operator== or operator<=>)" + ); + return false; + } + + if ( + n.is_type() + && !n.parent_is_namespace() + && !n.parent_is_type() + ) + { + errors.emplace_back( + n.position(), + "(temporary alpha limitation) a type must be in a namespace or type scope - function-local types are not yet supported" + ); + return false; + } + + // A type scope variable must have a declared type + if ( + n.parent_is_type() + && n.has_wildcard_type() + ) + { + errors.emplace_back( + n.position(), + "a type scope variable must have a declared type" + ); + return false; + } + + // A 'this' declaration must be an ordinary parameter or a type-scope object + if (n.identifier && *n.identifier->identifier == "this") + { + if ( + n.is_template_parameter + || ( + !n.is_parameter + && !n.parent_is_type() + ) + ) + { + errors.emplace_back( + n.identifier->position(), + "'this' may only be declared as an ordinary function parameter or type-scope (base) object" + ); + return {}; + } + } + + { + auto this_index = n.index_of_parameter_named("this"); + auto that_index = n.index_of_parameter_named("that"); + + if (this_index >= 0) { + if (!n.parent_is_type()) { + errors.emplace_back( + n.position(), + "'this' must be the first parameter of a type-scope function" + ); + return false; + } + if (this_index != 0) { + errors.emplace_back( + n.position(), + "'this' must be the first parameter" + ); + return false; + } + } + + if (that_index >= 0) { + if (!n.parent_is_type()) { + errors.emplace_back( + n.position(), + "'that' must be the second parameter of a type-scope function" + ); + return false; + } + if (that_index != 1) { + errors.emplace_back( + n.position(), + "'that' must be the second parameter" + ); + return false; + } + } + } + + if ( + n.is_object() + && n.has_wildcard_type() + && n.parent_is_namespace() + ) + { + errors.emplace_back( + n.identifier->position(), + "namespace scope objects must have a concrete type, not a deduced type" + ); + return false; + } + + if ( + n.has_name("_") + && !n.is_object() + && !n.is_namespace() + && !n.is_object_alias() + ) + { + errors.emplace_back( + n.identifier->position(), + "'_' (wildcard) may not be the name of a function or type - it may only be used as the name of an anonymous object, object alias, or namespace" + ); + return false; + } + + if ( + n.has_name("this") + && n.parent_is_type() + ) + { + if (!n.is_object()) { + errors.emplace_back( + n.position(), + "a member named 'this' declares a base subobject, and must be followed by a base type name" + ); + return false; + } + + if ( + !n.is_public() + && !n.is_default_access() + ) + { + errors.emplace_back( + n.position(), + "a base type must be public (the default)" + ); + return false; + } + + if (n.has_wildcard_type()) + { + errors.emplace_back( + n.position(), + "a base type must be a specific type, not a deduced type (omitted or '_'-wildcarded)" + ); + return false; + } + } + + if ( + n.access != accessibility::default_ + && !n.parent_is_type() + ) + { + errors.emplace_back( + n.position(), + "an access-specifier is only allowed on a type-scope (member) declaration" + ); + return false; + } + + if (n.is_constructor()) + { + auto& func = std::get<declaration_node::a_function>(n.type); + assert( + func->parameters->ssize() > 0 + && (*func->parameters)[0]->has_name("this") + ); + if ((*func->parameters)[0]->is_polymorphic()) { + errors.emplace_back( + n.position(), + "a constructor may not be declared virtual, override, or final" + ); + return false; + } + } + + if ( + n.is_function() + && n.has_name() + && n.parent_is_function() + ) + { + assert (n.identifier->get_token()); + auto name = n.identifier->get_token()->to_string(); + errors.emplace_back( + n.position(), + "(temporary alpha limitation) local functions like '" + name + ": (/*params*/) = {/*body*/}' are not currently supported - write a local variable initialized with an unnamed function like '" + name + " := :(/*params*/) = {/*body*/};' instead (add '=' and ';')" + ); + return false; + } + + // Ban overloading operators &&, ||, and , (comma) + if ( + n.identifier + && n.is_function() + && ( + n.has_name("operator&&") + || n.has_name("operator||") + || (n.has_name("operator&") && n.parameter_count() < 2) + || n.has_name("operator,") + ) + ) + { + errors.emplace_back( + n.position(), + "overloading '" + n.name()->to_string() + "' is not allowed" + ); + return false; + } + + // Require that ~/comparison/assignment operators must be members + if ( + n.identifier + && !n.is_function_with_this() + && ( + // Note re comparisons: The reason I'm restricting comparisons to be members + // is because with comparison symmetry (since C++20, derived from Cpp2) + // there's no longer a need for a type author to write them as nonmembers, + // and I want to discourage that habit by banning nonmembers. However, there + // could be a motivation to write them as nonmembers in the case where the + // type author doesn't provide them -- if that turns out to be important we + // can remove the restriction on nonmember comparisons here + n.is_comparison() + + // The following would be rejected anyway by the Cpp1 compiler, + // but including them here gives nicer and earlier error messages + || n.has_name("operator~") + || n.is_compound_assignment() + ) + ) + { + errors.emplace_back( + n.position(), + n.name()->to_string() + " must have 'this' as the first parameter" + ); + return false; + } + + // If this is the main function, it must be 'main: ()' or 'main: (args)' + if ( + n.identifier + && n.has_name("main") + && n.is_function() + && n.is_global() + ) + { + auto& func = std::get<declaration_node::a_function>(n.type); + auto& params = func->parameters->parameters; + + // It's more readable to express this as positive condition here... + if ( + // There are no parameters + params.empty() + // Or there's a single wildcard in-param named 'args' + || ( + params.size() == 1 + && params[0]->has_name("args") + && params[0]->pass == passing_style::in + && params[0]->declaration->is_object() + && std::get<declaration_node::an_object>(params[0]->declaration->type)->is_wildcard() + ) + ) + { + ; // ok + } + // ... and if it isn't that, then complain + else + { + errors.emplace_back( + params[0]->position(), + "'main' must be declared as 'main: ()' with zero parameters, or 'main: (args)' with one parameter named 'args' for which the type 'std::vector<std::string_view>' will be deduced" + ); + return false; + } + } + + if (n.has_name("operator=")) + { + if (!n.is_function()) + { + errors.emplace_back( + n.position(), + "'operator=' must be a function" + ); + return false; + } + auto& func = std::get<declaration_node::a_function>(n.type); + + if (func->has_declared_return_type()) + { + errors.emplace_back( + func->parameters->parameters[0]->position(), + "'operator=' may not have a declared return type" + ); + return false; + } + + if (func->parameters->ssize() == 0) + { + errors.emplace_back( + n.position(), + "an operator= function must have a parameter" + ); + return false; + } + else if ( + (*func->parameters)[0]->has_name("this") + && (*func->parameters)[0]->pass != passing_style::inout + && (*func->parameters)[0]->pass != passing_style::out + && (*func->parameters)[0]->pass != passing_style::move + ) + { + errors.emplace_back( + n.position(), + "an operator= function's 'this' parameter must be inout, out, or move" + ); + return false; + } + + if ( + func->parameters->ssize() > 1 + && (*func->parameters)[1]->has_name("that") + && (*func->parameters)[1]->pass != passing_style::in + && (*func->parameters)[1]->pass != passing_style::move + ) + { + errors.emplace_back( + n.position(), + "an operator= function's 'that' parameter must be in or move" + ); + return false; + } + + if ( + func->parameters->ssize() > 1 + && (*func->parameters)[0]->has_name("this") + && (*func->parameters)[0]->pass == passing_style::move + ) + { + errors.emplace_back( + n.position(), + "a destructor may not have other parameters besides 'this'" + ); + return false; + } + } + + for (auto& decl : n.get_type_scope_declarations()) + { + if (decl->has_name("that")) + { + errors.emplace_back( + n.position(), + "'that' may not be used as a type scope name" + ); + return false; + } + } + + if ( + n.is_binary_comparison_function() + && !n.has_bool_return_type() + ) + { + errors.emplace_back( + n.position(), + n.name()->to_string() + " must return bool" + ); + return false; + } + + if (n.has_name("operator<=>")) { + auto return_name = n.unnamed_return_type_to_string(); + if ( + return_name != "_" + && return_name.find("strong_ordering" ) == return_name.npos + && return_name.find("weak_ordering" ) == return_name.npos + && return_name.find("partial_ordering") == return_name.npos + ) + { + errors.emplace_back( + n.position(), + "operator<=> must return std::strong_ordering, std::weak_ordering, or std::partial_ordering" + ); + return false; + } + } + + if (n.is_type()) { + auto compound_stmt = n.initializer->get_if<compound_statement_node>(); + assert (compound_stmt); + for (auto& stmt : compound_stmt->statements) { + if ( + !stmt->is_declaration() + && !stmt->is_using() + ) + { + errors.emplace_back( + stmt->position(), + "a user-defined type body must contain only declarations or 'using' statements, not other code" + ); + return false; + } + } + } + + return true; + } + + + auto check(function_type_node const& n) + -> bool + { + assert(n.parameters); + + // An increment/decrement function must have a single 'inout' parameter, + // and if it's a member flag it if we know the type is not copyable + if ( + n.my_decl->has_name("operator++") + || n.my_decl->has_name("operator--") + ) + { + if ( + (*n.parameters).ssize() != 1 + || (*n.parameters)[0]->direction() != passing_style::inout + ) + { + errors.emplace_back( + n.position(), + "a user-defined " + n.my_decl->name()->to_string() + " must have a single 'inout' parameter" + ); + return false; + } + + if (n.has_deduced_return_type()) { + errors.emplace_back( + n.position(), + "a user-defined " + n.my_decl->name()->to_string() + " must have a specific (not deduced) return type" + ); + return false; + } + + if ( + n.my_decl->parent_declaration + && n.my_decl->parent_declaration->cannot_be_a_copy_constructible_type() + ) + { + errors.emplace_back( + n.position(), + "a user-defined " + n.my_decl->name()->to_string() + " in type scope must be a member of a copyable type" + ); + return false; + } + } + + return true; + } + + + auto check(statement_node const& n) + -> bool + { + if (auto expr_stmt = n.get_if<expression_statement_node>(); + expr_stmt + && n.compound_parent + && ( + expr_stmt->expr->is_identifier() + || expr_stmt->expr->is_id_expression() + || expr_stmt->expr->is_literal() + ) + ) + { + errors.emplace_back( + n.position(), + "unused literal or identifier" + ); + return false; + } + + return true; + } + + + //----------------------------------------------------------------------- + // Visitor functions + // + int scope_depth = 0; + bool started_standalone_assignment_expression = false; + bool started_postfix_expression = false; + bool is_out_expression = false; + bool inside_next_expression = false; + bool inside_parameter_list = false; + bool inside_parameter_identifier = false; + bool inside_returns_list = false; + bool just_entered_for = false; + parameter_declaration_node const* inside_out_parameter = {}; + + auto start(next_expression_tag const&, int) -> void + { + inside_next_expression = true; + } + + auto end(next_expression_tag const&, int) -> void + { + inside_next_expression = false; + } + + auto start(parameter_declaration_list_node const&, int) -> void + { + inside_parameter_list = true; + } + + auto end(parameter_declaration_list_node const&, int) -> void + { + inside_parameter_list = false; + } + + auto start(declaration_identifier_tag const&, int) -> void + { + inside_parameter_identifier = inside_parameter_list; + } + + auto end(declaration_identifier_tag const&, int) -> void + { + inside_parameter_identifier = false; + } + + auto start(parameter_declaration_node const& n, int) -> void + { + if ( + // If it's an 'out' parameter + ( + !inside_returns_list + && n.pass == passing_style::out + ) + // Or it's an uninitialized 'out' return value + || ( + inside_returns_list + && n.pass == passing_style::out + && !n.declaration->initializer + ) + ) + { + inside_out_parameter = &n; + } + + if ( + n.pass == passing_style::copy + || n.pass == passing_style::move + || n.pass == passing_style::forward + ) + { + // Handle variables in unnamed functions. For such cases scope_depth is increased by +1 + auto depth = scope_depth + ((n.declaration->parent_is_function() && n.declaration->parent_declaration->name() == nullptr) ? 1 : 0 ); + symbols.emplace_back( depth, declaration_sym( true, n.declaration.get(), n.declaration->name(), n.declaration->initializer.get(), &n)); + } + } + + auto end(parameter_declaration_node const&, int) -> void + { + inside_out_parameter = {}; + } + + auto start(expression_list_node::term const&n, int) -> void + { + is_out_expression = (n.pass == passing_style::out); + } + + auto start(function_returns_tag const&, int) -> void + { + inside_returns_list = true; + } + + auto end(function_returns_tag const&, int) -> void + { + inside_returns_list = false; + } + + auto start(loop_body_tag const &n, int) -> void + { + if (*n.identifier == "for") { + just_entered_for = true; + } + } + + auto start(declaration_node const& n, int) -> void + { + // Skip the first declaration after entering a 'for', + // which is the for loop parameter - it's always + // guaranteed to be initialized by the language + if (just_entered_for) { + just_entered_for = false; + return; + } + + if ( + !n.is_alias() + // Skip type scope (member) variables + && !(n.parent_is_type() && n.is_object()) + // Skip unnamed variables + && n.identifier + // Skip non-out parameters + && ( + !inside_parameter_list + || inside_out_parameter + ) + ) + { + symbols.emplace_back( scope_depth, declaration_sym( true, &n, n.name(), n.initializer.get(), inside_out_parameter ) ); + if (!n.is_object()) { + ++scope_depth; + } + } + } + + auto end(declaration_node const& n, int) -> void + { + if ( + !n.is_alias() + // Skip type scope (member) variables + && !(n.parent_is_type() && n.is_object()) + // Skip unnamed variables + && n.identifier + // Skip non-out parameters + && ( + !inside_parameter_list + || inside_out_parameter + ) + ) + { + symbols.emplace_back( scope_depth, declaration_sym( false, &n, nullptr, nullptr, inside_out_parameter ) ); + if (!n.is_object()) { + --scope_depth; + } + } + } + + auto start(token const& t, int) -> void + { + // We currently only care to look at identifiers + if (t.type() != lexeme::Identifier) { + return; + } + + // If this is the first identifier since we started a new assignment, + // expression, then it's the left-hand side (target) of the assignment + else if (started_standalone_assignment_expression) + { + symbols.emplace_back( scope_depth, identifier_sym( true, &t ) ); + started_standalone_assignment_expression = false; // we were the consumer for this information + } + + // If this is the first identifier since we saw an `out` expression, + // then it's the argument of the `out` expression + // TODO: for now we just take the first identifier, and we should make + // this an id-expression and add a sema rule to disallow complex expressions + else if (is_out_expression) + { + symbols.emplace_back( scope_depth, identifier_sym( true, &t ) ); + is_out_expression = false; + } + + // Otherwise it's just an identifier use (if it's not a parameter name) and + // it's the first identifier of a postfix_expressions (not a member name or something else) + else if (started_postfix_expression) + { + started_postfix_expression = false; + if (!inside_parameter_identifier && !inside_next_expression) + { + // Put this into the table if it's a use of an object in scope + // or it's a 'copy' parameter (but to be a use it must be after + // the declaration, not the token in the decl's name itself) + if (auto decl = get_declaration_of(t); + decl + && decl->declaration->name() != &t + ) + { + symbols.emplace_back( scope_depth, identifier_sym( false, &t ) ); + } + } + } + } + + auto start(selection_statement_node const& n, int) -> void + { + active_selections.push_back( &n ); + symbols.emplace_back( scope_depth, selection_sym{ true, active_selections.back() } ); + ++scope_depth; + } + + auto end(selection_statement_node const&, int) -> void + { + symbols.emplace_back( scope_depth, selection_sym{ false, active_selections.back() } ); + active_selections.pop_back(); + --scope_depth; + } + + auto kind_of(compound_statement_node const& n) + -> compound_sym::kind + { + auto kind = compound_sym::is_scope; + if (!active_selections.empty()) + { + assert(active_selections.back()->true_branch); + if (active_selections.back()->true_branch.get() == &n) + { + kind = compound_sym::is_true; + } + if ( + active_selections.back()->false_branch + && active_selections.back()->false_branch.get() == &n + ) + { + kind = compound_sym::is_false; + } + } + return kind; + } + + auto start(compound_statement_node const& n, int) -> void + { + symbols.emplace_back( + scope_depth, + compound_sym{ true, &n, kind_of(n) } + ); + ++scope_depth; + } + + auto end(compound_statement_node const& n, int) -> void + { + symbols.emplace_back( + scope_depth, + compound_sym{ false, &n, kind_of(n) } + ); + --scope_depth; + } + + auto start(assignment_expression_node const& n, int) + { + if ( + n.is_standalone_expression() + && n.lhs_is_id_expression() + && std::ssize(n.terms) > 0 + ) + { + assert (n.terms.front().op); + if (n.terms.front().op->type() == lexeme::Assignment) { + started_standalone_assignment_expression = true; + } + } + } + + auto start(postfix_expression_node const&, int) { + started_postfix_expression = true; + } + + auto start(auto const&, int) -> void + { + // Ignore other node types + } + + auto end(auto const&, int) -> void + { + // Ignore other node types + } +}; + + +} + +#endif diff --git a/CompilerDriver/cc2/source/to_cpp1.h b/CompilerDriver/cc2/source/to_cpp1.h new file mode 100644 index 0000000..a7b6782 --- /dev/null +++ b/CompilerDriver/cc2/source/to_cpp1.h @@ -0,0 +1,6750 @@ + +// Copyright (c) Herb Sutter +// SPDX-License-Identifier: CC-BY-NC-ND-4.0 + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + + +//=========================================================================== +// Lowering to Cpp1 syntax +//=========================================================================== + +#ifndef CPP2_TO_CPP1_H +#define CPP2_TO_CPP1_H + +#include "sema.h" +#include <iostream> +#include <cstdio> +#include <optional> + +namespace cpp2 { + +// Defined out of line here just to avoid bringing <iostream> in before this, +// so that we can't accidentally start depending on iostreams in earlier phases +auto cmdline_processor::print(std::string_view s, int width) + -> void +{ + if (width > 0) { + std::cout << std::setw(width) << std::left; + } + std::cout << s; +} + + +//----------------------------------------------------------------------- +// +// Stringingizing helpers +// +//----------------------------------------------------------------------- + +auto pad(int padding) + -> std::string_view +{ + if (padding < 1) { + return ""; + } + + return { + indent_str.c_str(), + _as<size_t>( std::min( padding, _as<int>(std::ssize(indent_str))) ) + }; +} + + +//----------------------------------------------------------------------- +// +// positional_printer: a Syntax 1 pretty printer +// +//----------------------------------------------------------------------- +// +static auto flag_emit_cppfront_info = false; +static cmdline_processor::register_flag cmd_emit_cppfront_info( + 9, + "emit-cppfront-info", + "Emit cppfront version/build in output file", + []{ flag_emit_cppfront_info = true; } +); + +static auto flag_clean_cpp1 = false; +static cmdline_processor::register_flag cmd_clean_cpp1( + 9, + "clean-cpp1", + "Emit clean Cpp1 without #line directives", + []{ flag_clean_cpp1 = true; } +); + +static auto flag_import_std = false; +static cmdline_processor::register_flag cmd_import_std( + 0, + "import-std", + "import all std:: via 'import std;' - ignored if -include-std is set", + []{ flag_import_std = true; } +); + +static auto flag_include_std = false; +static cmdline_processor::register_flag cmd_include_std( + 0, + "include-std", + "#include all std:: headers", + []{ flag_include_std = true; } +); + +static auto flag_cpp2_only = false; +static cmdline_processor::register_flag cmd_cpp2_only( + 0, + "pure-cpp2", + "Allow Cpp2 syntax only - also sets -import-std", + []{ flag_cpp2_only = true; flag_import_std = true; } +); + +static auto flag_safe_null_pointers = true; +static cmdline_processor::register_flag cmd_safe_null_pointers( + 2, + "no-null-checks", + "Disable null safety checks", + []{ flag_safe_null_pointers = false; } +); + +static auto flag_safe_subscripts = true; +static cmdline_processor::register_flag cmd_safe_subscripts( + 2, + "no-subscript-checks", + "Disable subscript safety checks", + []{ flag_safe_subscripts = false; } +); + +static auto flag_safe_comparisons = true; +static cmdline_processor::register_flag cmd_safe_comparisons( + 2, + "no-comparison-checks", + "Disable mixed-sign comparison safety checks", + []{ flag_safe_comparisons = false; } +); + +static auto flag_use_source_location = false; +static cmdline_processor::register_flag cmd_enable_source_info( + 2, + "add-source-info", + "Enable source_location information for contract checks", + []{ flag_use_source_location = true; } +); + +static auto flag_cpp1_filename = std::string{}; +static cmdline_processor::register_flag cmd_cpp1_filename( + 9, + "output filename", + "Output to 'filename' (can be 'stdout') - default is *.cpp/*.h", + nullptr, + [](std::string const& name) { flag_cpp1_filename = name; } +); + +static auto flag_print_colon_errors = false; +static cmdline_processor::register_flag cmd_print_colon_errors( + 9, + "format-colon-errors", + "Emit ':line:col:' format for messages - lights up some tools", + []{ flag_print_colon_errors = true; } +); + +static auto flag_verbose = false; +static cmdline_processor::register_flag cmd_verbose( + 9, + "verbose", + "Print verbose statistics and -debug output", + []{ flag_verbose = true; } +); + +static auto flag_no_exceptions = false; +static cmdline_processor::register_flag cmd_no_exceptions( + 4, + "fno-exceptions", + "Disable C++ EH - failed 'as' for 'variant' will assert", + []{ flag_no_exceptions = true; } +); + +static auto flag_no_rtti = false; +static cmdline_processor::register_flag cmd_no_rtti( + 4, + "fno-rtti", + "Disable C++ RTTI - using 'as' for '*'/'std::any' will assert", + []{ flag_no_rtti = true; } +); + +struct text_with_pos{ + std::string text; + source_position pos; + text_with_pos(std::string const& t, source_position p) : text{t}, pos{p} { } +}; + +// Defined out of line so we can use flag_print_colon_errors. +auto error_entry::print( + auto& o, + std::string const& file +) const + -> void +{ + o << file ; + if (where.lineno > 0) { + if (flag_print_colon_errors) { + o << ":" << (where.lineno); + if (where.colno >= 0) { + o << ":" << where.colno; + } + } + else { + o << "("<< (where.lineno); + if (where.colno >= 0) { + o << "," << where.colno; + } + o << ")"; + } + } + o << ":"; + if (internal) { + o << " internal compiler"; + } + o << " error: " << msg << "\n"; +} + +class positional_printer +{ + // Core information + std::ofstream out_file = {}; // Cpp1 syntax output file + std::ostream* out = {}; // will point to out_file or cout + std::string cpp2_filename = {}; + std::string cpp1_filename = {}; + std::vector<comment> const* pcomments = {}; // Cpp2 comments data + source const* psource = {}; + parser const* pparser = {}; + + source_position curr_pos = {}; // current (line,col) in output + lineno_t generated_pos_line = {}; // current line in generated output + int last_line_indentation = {}; + int next_comment = 0; // index of the next comment not yet printed + bool last_was_empty = false; + int empty_lines_suppressed = 0; + bool just_printed_line_directive = false; + bool printed_extra = false; + char last_printed_char = {}; + + struct req_act_info { + colno_t requested; + colno_t offset; + + req_act_info(colno_t r, colno_t o) : requested{r}, offset{o} { } + }; + struct { + lineno_t line = {}; + std::vector<req_act_info> requests = {}; + } prev_line_info = {}; + + // Position override information + std::vector<source_position> preempt_pos = {}; // use this position instead of the next supplied one + int pad_for_this_line = 0; // extra padding to add/subtract for this line only + bool ignore_align = false; + int ignore_align_indent = 0; + lineno_t ignore_align_lineno = 0; + bool enable_indent_heuristic = true; + +public: + // Modal information + enum phases { + phase0_type_decls = 0, + phase1_type_defs_func_decls = 1, + phase2_func_defs = 2 + }; + auto get_phase() const { return phase; } + +private: + phases phase = phase0_type_decls; + + auto inc_phase() -> void { + switch (phase) { + break;case phase0_type_decls : phase = phase1_type_defs_func_decls; + break;case phase1_type_defs_func_decls: phase = phase2_func_defs; + break;default : assert(!"ICE: invalid lowering phase"); + } + curr_pos = {}; + next_comment = 0; // start over with the comments + } + + std::vector<std::string*> emit_string_targets; // option to emit to string instead of out file + std::vector<std::vector<text_with_pos>*> emit_text_chunks_targets; // similar for vector<text_pos> + + enum class target_type { string, chunks }; + std::vector<target_type> emit_target_stack; // to interleave them sensibly + + + //----------------------------------------------------------------------- + // Print text + // + auto print( + std::string_view s, + source_position pos = source_position{}, + bool track_curr_pos = true, + bool is_known_empty = false + ) + -> void + { + // Take ownership of (and reset) just_printed_line_directive value + auto line_directive_already_done = std::exchange(just_printed_line_directive, false); + + // If the caller is capturing this output, emit to the + // current target instead and skip most positioning logic + if (!emit_target_stack.empty()) + { + // If capturing to a string, emit to the specified string + if (emit_target_stack.back() == target_type::string) { + assert(!emit_string_targets.empty()); + *emit_string_targets.back() += s; + } + + // If capturing to a vector of chunks, emit to that + else { + assert(!emit_text_chunks_targets.empty()); + emit_text_chunks_targets.back()->insert( emit_text_chunks_targets.back()->begin(), text_with_pos(std::string(s), pos) ); + } + + return; + } + + // Otherwise, we'll actually print the string to the output file + // and update our curr_pos position + + if (s.length() > 0) { + last_printed_char = s.back(); + } + + // Reject consecutive empty lines: If this line is empty + if ( + ( s == "\n" || is_known_empty ) + && curr_pos.colno <= 1 + ) + { + // And so was the last one, update logical position only + // and increment empty_lines_suppressed instead of printing + if (last_was_empty) { + if (track_curr_pos) { + ++curr_pos.lineno; + curr_pos.colno = 1; + } + ++empty_lines_suppressed; + return; + } + // If this is the first consecutive empty, remember and continue + last_was_empty = true; + } + // Otherwise, if this line is not empty + else { + // Remember that this line was not empty + last_was_empty = false; + + // And if we did suppress any empties, emit a #line to resync + if (empty_lines_suppressed > 0) { + if (!line_directive_already_done) { + print_line_directive(curr_pos.lineno); + } + empty_lines_suppressed = 0; + } + } + + // Output the string + assert (out); + *out << s; + + // Update curr_pos by finding how many line breaks s contained, + // and where the last one was which determines our current colno + if (track_curr_pos) + { + auto last_newline = std::string::npos; // the last newline we found in the string + auto newline_pos = std::size_t(0); // the current newline we found in the string + while ((newline_pos = s.find('\n', newline_pos)) != std::string::npos) + { + // For each line break we find, reset pad and inc current lineno + pad_for_this_line = 0; + ++curr_pos.lineno; + last_newline = newline_pos; + ++newline_pos; + } + + // Now also adjust the colno + if (last_newline != std::string::npos) { + // If we found a newline, it's the distance from the last newline to EOL + curr_pos.colno = s.length() - last_newline; + } + else { + // Else add the length of the string + curr_pos.colno += s.length(); + } + } + } + + + //----------------------------------------------------------------------- + // Internal helpers + + // Start a new line if we're not in col 1 already + // + auto ensure_at_start_of_new_line() + -> void + { + if (curr_pos.colno > 1) { + auto old_pos = curr_pos; + print( "\n" ); + assert(curr_pos.lineno == old_pos.lineno+1); + assert(curr_pos.colno == 1); + } + } + + // Print a #line directive + // + auto print_line_directive( lineno_t line ) + -> void + { + // Ignore requests from generated code (negative line numbers) + if (line < 1) { + return; + } + + // Otherwise, implement the request + prev_line_info = { curr_pos.lineno, { } }; + ensure_at_start_of_new_line(); + + // Not using print() here because this is transparent to the curr_pos + if (!flag_clean_cpp1) { + assert (out); + *out << "#line " << line << " " << std::quoted(cpp2_filename) << "\n"; + } + just_printed_line_directive = true; + } + + // Catch up with comment/blank lines + // + auto print_comment(comment const& c) + -> void + { + // For a line comment, start it at the right indentation and print it + // with a newline end + if (c.kind == comment::comment_kind::line_comment) { + print( pad( c.start.colno - curr_pos.colno + 1 ) ); + print( c.text ); + assert( c.text.find("\n") == std::string::npos ); // we shouldn't have newlines + print("\n"); + } + + // For a stream comment, pad out to its column (if we haven't passed it already) + // and emit it there + else { + print( pad( c.start.colno - curr_pos.colno ) ); + print( c.text ); + } + + c.dbg_was_printed = true; + } + + auto flush_comments( source_position pos ) + -> void + { + if (!pcomments) { + return; + } + + // For convenience + auto& comments = *pcomments; + + // Add unprinted comments and blank lines as needed to catch up vertically + // + while (curr_pos.lineno < pos.lineno) + { + // If a comment goes on this line, print it + if ( + next_comment < std::ssize(comments) + && comments[next_comment].start.lineno <= curr_pos.lineno + ) + { + // Emit non-function body comments in phase1_type_defs_func_decls, + // and emit function body comments in phase2_func_defs + assert(pparser); + if ( + ( + phase == phase1_type_defs_func_decls + && !pparser->is_within_function_body( comments[next_comment].start.lineno ) + ) + || + ( + phase == phase2_func_defs + && pparser->is_within_function_body( comments[next_comment].start.lineno ) + ) + ) + { + print_comment( comments[next_comment] ); + assert(curr_pos.lineno <= pos.lineno); // we shouldn't have overshot + } + + ++next_comment; + } + + // Otherwise, just print a blank line + else { + print("\n"); + } + } + } + + auto print_unprinted_comments() + { + for (auto const& c : *pcomments) { + if (!c.dbg_was_printed) { + print_comment(c); + } + } + } + + // Position ourselves as close to pos as possible, + // and catch up with displaying comments + // + auto align_to( source_position pos ) + -> void + { + auto on_same_line = curr_pos.lineno == pos.lineno; + + // Ignoring this logic is used when we're generating new code sections, + // such as return value structs, and emitting raw string literals + if (ignore_align) { + print( pad( ignore_align_indent - curr_pos.colno ) ); + return; + } + + // Otherwise, we need to apply our usual alignment logic + + // Catch up with displaying comments + flush_comments( pos ); + + // If we're not on the right line + if ( + printed_extra + && !on_same_line + ) + { + print_line_directive(pos.lineno); + curr_pos.lineno = pos.lineno; + printed_extra = false; + } + else if (curr_pos.lineno < pos.lineno) + { + // In case we're just one away, try a blank line + // (this might get ignored and we'll get the line directive) + print( "\n" ); + if (curr_pos.lineno != pos.lineno) { + print_line_directive(pos.lineno); + } + curr_pos.lineno = pos.lineno; + } + + // Finally, align to the target column, if we're on the right line + // and not one-past-the-end on the extra line at section end) + assert( + psource + && 0 <= curr_pos.lineno + && curr_pos.lineno < std::ssize(psource->get_lines())+1 + ); + if ( + curr_pos.lineno == pos.lineno + && curr_pos.lineno < std::ssize(psource->get_lines()) + ) + { + // Record this line's indentation as the 'last' line for next time + last_line_indentation = psource->get_lines()[curr_pos.lineno].indent(); + + // If this line was originally densely spaced (had <2 whitespace + // between all tokens), then the programmer likely wasn't doing a lot + // of special formatting... + if (psource->get_lines()[curr_pos.lineno].all_tokens_are_densely_spaced) + { + // For the first token in a line, use the line's original indentation + if (curr_pos.colno <= 1) + { + print( pad( psource->get_lines()[curr_pos.lineno].indent() ) ); + } + // For later tokens, don't try to add padding + else { + if ( + last_printed_char == ';' + && on_same_line + ) + { + print( " " ); + } + } + } + // Otherwise, make a best effort to adjust position with some padding + else + { + pos.colno = std::max( 1, pos.colno + pad_for_this_line ); + print( pad( pos.colno - curr_pos.colno ) ); + } + } + } + + +public: + //----------------------------------------------------------------------- + // Finalize phase + // + auto finalize_phase(bool print_remaining_comments = false) + { + if ( + is_open() + && psource + && psource->has_cpp2() + ) + { + flush_comments( {curr_pos.lineno+1, 1} ); + + if (print_remaining_comments) { + print_unprinted_comments(); + } + + // Always make sure the very last line ends with a newline + // (not really necessary but makes some tools quieter) + // -- but only if there's any Cpp2, otherwise don't + // because passing through all-Cpp1 code should always + // remain diff-identical + if (phase == phase2_func_defs) { + print_extra("\n"); + } + } + } + + + //----------------------------------------------------------------------- + // Open + // + auto open( + std::string cpp2_filename_, + std::string cpp1_filename_, + std::vector<comment> const& comments, + cpp2::source const& source, + cpp2::parser const& parser + ) + -> void + { + cpp2_filename = cpp2_filename_; + assert( + !is_open() + && !pcomments + && "ICE: tried to call .open twice" + ); + cpp1_filename = cpp1_filename_; + if (cpp1_filename == "stdout") { + out = &std::cout; + } + else { + out_file.open(cpp1_filename); + out = &out_file; + } + pcomments = &comments; + psource = &source; + pparser = &parser; + } + + auto reopen() + -> void + { + assert( + is_open() + && "ICE: tried to call .reopen without first calling .open" + ); + assert(cpp1_filename.ends_with(".h")); + out_file.close(); + out_file.open(cpp1_filename + "pp"); + } + + auto is_open() + -> bool + { + if (out) { + assert( + pcomments + && "ICE: if is_open, pcomments should also be set" + ); + } + return out; + } + + + //----------------------------------------------------------------------- + // Abandon: close and delete + // + auto abandon() + -> void + { + if (!is_open()) { + return; + } + if (out_file.is_open()) { + out_file.close(); + std::remove(cpp1_filename.c_str()); + } + } + + + //----------------------------------------------------------------------- + // Print extra text and don't track positions + // Used for Cpp2 boundary comment and prelude and final newline + // + auto print_extra( std::string_view s ) + -> void + { + assert( + is_open() + && "ICE: printer must be open before printing" + ); + print( s, source_position{}, false ); + printed_extra = true; + } + + + //----------------------------------------------------------------------- + // Print a Cpp1 line, which should be at lineno + // + auto print_cpp1( std::string_view s, lineno_t line ) + -> void + { + assert( + is_open() + && line >= 0 + && "ICE: printer must be open before printing, and line number must not be negative (Cpp1 code is never generated)" + ); + + // Always start a Cpp1 line on its own new line + ensure_at_start_of_new_line(); + + // If we are out of sync with the current logical line number, + // emit a #line directive to re-sync + if (curr_pos.lineno != line) { + print_line_directive( line ); + curr_pos.lineno = line; + } + + // Print the line + assert (curr_pos.colno == 1); + print( s ); + print( "\n" ); + } + + + //----------------------------------------------------------------------- + // Used when we start a new Cpp2 section, or when we emit the same item + // more than once (notably when we emit operator= more than once) + // + auto reset_line_to(lineno_t line, bool force = false) + -> void + { + // Always start a Cpp2 section on its own new line + ensure_at_start_of_new_line(); + + // If we are out of sync with the current logical line number, + // emit a #line directive to re-sync + if ( + force + || curr_pos.lineno != line + ) + { + print_line_directive( line ); + curr_pos.lineno = line; + } + + assert (curr_pos.colno == 1); + } + + + //----------------------------------------------------------------------- + // Print a Cpp2 item, which should be at pos + // + auto print_cpp2( + std::string_view s, + source_position pos, + bool leave_newlines_alone = false, + bool is_known_empty = false + + ) + -> void + { + // If we're printing for real (not to a string target) + if (emit_target_stack.empty()) + { + // If we're in a generated text region (signified by negative + // line numbers), then shunt this call to print_extra instead + if (pos.lineno < 1) { + if (generated_pos_line != pos.lineno) { + *out << "\n" + std::string(last_line_indentation, ' '); + generated_pos_line = pos.lineno; + } + print_extra(s); + return; + } + + // Otherwise, we're no longer in generated code, so reset the + // generated code counter + generated_pos_line = {}; + } + + assert( + is_open() + && "ICE: printer must be open before printing" + ); + + // If there are any embedded newlines, split this string into + // separate print_cpp2 calls + if (auto newline_pos = s.find('\n'); + !leave_newlines_alone + && s.length() > 1 + && newline_pos != s.npos + ) + { + while (newline_pos != std::string_view::npos) + { + // Print the text before the next newline + if (newline_pos > 0) { + print_cpp2( s.substr(0, newline_pos), pos ); + } + + // Emit the newline as a positioned empty string + assert (s[newline_pos] == '\n'); + ++pos.lineno; + pos.colno = 1; + print_cpp2( "", pos, false, curr_pos.colno <= 1 ); + + s.remove_prefix( newline_pos+1 ); + newline_pos = s.find('\n'); + } + // Print any tail following the last newline + if (!s.empty()) { + print_cpp2( s, pos ); + } + return; + } + + // The rest of this call handles a single chunk that's either a + // standalone "\n" or a piece of text that doesn't have a newline + + // Skip alignment work if we're capturing emitted text + if (emit_target_stack.empty()) + { + // Remember where we are + auto last_pos = curr_pos; + + // We may want to adjust the position based on (1) a position preemption request + // or else (2) to repeat a similar adjustment we discovered on the previous line + auto adjusted_pos = pos; + + // (1) See if there's a position preemption request, if so use it up + // For now, the preempt position use cases are about overriding colno + // and only on the same line. In the future, we might have more use cases. + if (!preempt_pos.empty()) { + if (preempt_pos.back().lineno == pos.lineno) { + adjusted_pos.colno = preempt_pos.back().colno; + } + } + + // (2) Otherwise, see if there's a previous line's offset to repeat + // If we moved to a new line, then this is the first + // non-comment non-whitespace text on the new line + else if ( + last_pos.lineno == pos.lineno-1 + && enable_indent_heuristic + ) + { + // If the last line had a request for this colno, remember its actual offset + constexpr int sentinel = -100; + auto last_line_offset = sentinel; + for(auto i = 0; + i < std::ssize(prev_line_info.requests) + && prev_line_info.requests[i].requested <= pos.colno; + ++i + ) + { + if (prev_line_info.requests[i].requested == pos.colno) + { + last_line_offset = prev_line_info.requests[i].offset; + break; + } + } + + // If there was one, apply the actual column number offset + if (last_line_offset > sentinel) { + adjusted_pos.colno += last_line_offset; + } + } + enable_indent_heuristic = true; + + // If we're changing lines, start accumulating this new line's request/actual adjustment info + if (last_pos.lineno < adjusted_pos.lineno) { + prev_line_info = { curr_pos.lineno, { } }; + } + + align_to(adjusted_pos); + + // Remember the requested and actual offset columns for this item + prev_line_info.requests.push_back( req_act_info( pos.colno /*requested*/ , curr_pos.colno /*actual*/ - pos.colno ) ); + } + + print(s, pos, true, is_known_empty ); + } + + + //----------------------------------------------------------------------- + // Position override control functions + // + + // Use this position instead of the next supplied one + // Useful when Cpp1 syntax is emitted in a different order/verbosity + // than Cpp2 such as with declarations + // + auto preempt_position_push(source_position pos) + -> void + { + preempt_pos.push_back( pos ); + } + + auto preempt_position_pop() + -> void + { + assert(!preempt_pos.empty()); + preempt_pos.pop_back(); + } + + // Add (or, if negative, subtract) padding for the current line only + // + auto add_pad_in_this_line(colno_t extra) + -> void + { + pad_for_this_line += extra; + } + + // Enable indent heuristic for just this line + // + auto disable_indent_heuristic_for_next_text() + -> void + { + enable_indent_heuristic = false; + } + + // Ignore position information, usually when emitting generated code + // such as generated multi-return type structs + // + auto ignore_alignment( + bool ignore, + int indent = 0 + ) + -> void + { + // We'll only ever call this in local non-nested true/false pairs. + // If we ever want to generalize (support nesting, or make it non-brittle), + // wrap this in a push/pop stack. + if (ignore) { + ignore_align = true; + ignore_align_indent = indent; + ignore_align_lineno = curr_pos.lineno; // push state + } + else { + ignore_align = false; + ignore_align_indent = 0; + curr_pos.lineno = ignore_align_lineno; // pop state + } + } + + + //----------------------------------------------------------------------- + // Modal state control functions + // + + auto next_phase() + -> void + { + inc_phase(); + } + + // Provide an option to store to a given string instead, which is + // useful for capturing Cpp1-formatted output for generated code + // + auto emit_to_string( std::string* target = {} ) + -> void + { + if (target) { + emit_string_targets.push_back( target ); + emit_target_stack.push_back(target_type::string); + } + else { + emit_string_targets.pop_back(); + emit_target_stack.pop_back(); + } + } + + // Provide an option to store to a vector<text_with_pos>, which is + // useful for postfix expression which have to mix unwrapping operators + // with emitting sub-elements such as expression lists + // + auto emit_to_text_chunks( std::vector<text_with_pos>* target = {} ) + -> void + { + if (target) { + emit_text_chunks_targets.push_back( target ); + emit_target_stack.push_back(target_type::chunks); + } + else { + emit_text_chunks_targets.pop_back(); + emit_target_stack.pop_back(); + } + } + +}; + + +//----------------------------------------------------------------------- +// +// cppfront: a compiler instance +// +//----------------------------------------------------------------------- +// +struct function_prolog { + std::vector<std::string> mem_inits = {}; + std::vector<std::string> statements = {}; +}; + +class cppfront +{ + std::string sourcefile; + std::vector<error_entry> errors; + + // For building + // + cpp2::source source; + cpp2::tokens tokens; + cpp2::parser parser; + cpp2::sema sema; + + bool source_loaded = true; + bool last_postfix_expr_was_pointer = false; + bool violates_bounds_safety = false; + bool violates_initialization_safety = false; + bool suppress_move_from_last_use = false; + + declaration_node const* having_signature_emitted = {}; + + declaration_node const* generating_assignment_from = {}; + declaration_node const* generating_move_from = {}; + declaration_node const* generating_postfix_inc_dec_from = {}; + bool emitting_that_function = false; + bool emitting_move_that_function = false; + std::vector<token const*> already_moved_that_members = {}; + + struct arg_info { + passing_style pass = passing_style::in; + token const* ptoken = {}; + }; + std::vector<arg_info> current_args = { {} }; + + struct active_using_declaration { + token const* identifier = {}; + + explicit active_using_declaration(using_statement_node const& n) { + if (auto id = get_if<id_expression_node::qualified>(&n.id->id)) { + identifier = (*id)->ids.back().id->identifier; + } + } + }; + + using source_order_name_lookup_res = + std::optional<std::variant<declaration_node const*, active_using_declaration>>; + + // Stack of the currently active nested declarations we're inside + std::vector<declaration_node const*> current_declarations = { {} }; + + // Stack of the currently active names for source order name lookup: + // Like 'current_declarations' + also parameters and using declarations + std::vector<source_order_name_lookup_res::value_type> current_names = { {} }; + + // Maintain a stack of the functions we're currently processing, which can + // be up to MaxNestedFunctions in progress (if we run out, bump the Max). + // The main reason for this is to be able to pass function_info's, especially + // their .epilog, by reference for performance while still having lifetime safety + struct function_info + { + declaration_node const* decl = {}; + function_type_node const* func = {}; + declaration_node::declared_value_set_funcs declared_value_set_functions = {}; + function_prolog prolog = {}; + std::vector<std::string> epilog = {}; + + function_info( + declaration_node const* decl_, + function_type_node const* func_, + declaration_node::declared_value_set_funcs declared_value_set_functions_ + ) + : decl{decl_} + , func{func_} + , declared_value_set_functions{declared_value_set_functions_} + { } + }; + class current_functions_ + { + std::deque<function_info> list = { {} }; + public: + auto push( + declaration_node const* decl, + function_type_node const* func, + declaration_node::declared_value_set_funcs thats + ) { + list.emplace_back(decl, func, thats); + } + + auto pop() { + list.pop_back(); + } + + auto back() -> function_info& { + assert(!empty()); + return list.back(); + } + + auto empty() -> bool { + return list.empty(); + } + }; + current_functions_ current_functions; + + // For lowering + // + positional_printer printer; + bool in_definite_init = false; + bool in_parameter_list = false; + + std::string function_return_name; + struct function_return { + parameter_declaration_list_node* param_list; + passing_style pass; + bool is_deduced; + + function_return( + parameter_declaration_list_node* param_list_, + passing_style pass_ = passing_style::invalid, + bool is_deduced_ = false + ) + : param_list{param_list_} + , pass{pass_} + , is_deduced{is_deduced_} + { } + }; + std::vector<function_return> function_returns; + parameter_declaration_list_node single_anon; + // special value - hack for now to note single-anon-return type kind in this function_returns working list + std::vector<std::string> function_requires_conditions; + + struct iter_info { + iteration_statement_node const* stmt; + bool used = false; + }; + std::vector<iter_info> iteration_statements; + + std::vector<bool> in_non_rvalue_context = { false }; + std::vector<bool> need_expression_list_parens = { true }; + auto push_need_expression_list_parens( bool b ) -> void { need_expression_list_parens.push_back(b); } + auto pop_need_expression_list_parens() -> void { assert(std::ssize(need_expression_list_parens) > 1); + need_expression_list_parens.pop_back(); } + auto should_add_expression_list_parens() -> bool { assert(!need_expression_list_parens.empty()); + return need_expression_list_parens.back(); } + auto consumed_expression_list_parens() -> void { if( std::ssize(need_expression_list_parens) > 1 ) + need_expression_list_parens.back() = false; } + +public: + //----------------------------------------------------------------------- + // Constructor + // + // filename the source file to be processed + // + cppfront(std::string const& filename) + : sourcefile{ filename } + , source { errors } + , tokens { errors } + , parser { errors } + , sema { errors } + { + // "Constraints enable creativity in the right directions" + // sort of applies here + // + if ( + !sourcefile.ends_with(".cpp2") + && !sourcefile.ends_with(".h2") + ) + { + errors.emplace_back( + source_position(-1, -1), + "source filename must end with .cpp2 or .h2: " + sourcefile + ); + } + + // Load the program file into memory + // + else if (!source.load(sourcefile)) + { + if (errors.empty()) { + errors.emplace_back( + source_position(-1, -1), + "file not found: " + sourcefile + ); + } + source_loaded = false; + } + + else + { + // Tokenize + // + tokens.lex(source.get_lines()); + + // Parse + // + try + { + for (auto const& [line, entry] : tokens.get_map()) { + if (!parser.parse(entry, tokens.get_generated())) { + errors.emplace_back( + source_position(line, 0), + "parse failed for section starting here", + false, + true // a noisy fallback error message + ); + } + } + + // Sema + parser.visit(sema); + if (!sema.apply_local_rules()) { + violates_initialization_safety = true; + } + } + catch (std::runtime_error& e) { + errors.emplace_back( + source_position(-1, -1), + e.what() + ); + } + } + } + + + //----------------------------------------------------------------------- + // lower_to_cpp1 + // + // Emits the target file with the last '2' stripped + // + struct lower_to_cpp1_ret { + lineno_t cpp1_lines = 0; + lineno_t cpp2_lines = 0; + }; + auto lower_to_cpp1() + -> lower_to_cpp1_ret + { + auto ret = lower_to_cpp1_ret{}; + + // Only lower to Cpp1 if we haven't already encountered errors + if (!errors.empty()) { + return {}; + } + + // Now we'll open the Cpp1 file + auto cpp1_filename = sourcefile.substr(0, std::ssize(sourcefile) - 1); + if (!flag_cpp1_filename.empty()) { + cpp1_filename = flag_cpp1_filename; // use override if present + } + + printer.open( + sourcefile, + cpp1_filename, + tokens.get_comments(), + source, + parser + ); + if (!printer.is_open()) { + errors.emplace_back( + source_position{}, + "could not open output file " + cpp1_filename + ); + return {}; + } + + // Generate a reasonable macroized name + auto cpp1_FILENAME = to_upper_and_underbar(cpp1_filename); + + + //--------------------------------------------------------------------- + // Do lowered file prolog + // + // Only emit extra lines if we actually have Cpp2, because + // we want Cpp1-only files to pass through with zero changes + // (unless the user requested import/include of std) + if ( + source.has_cpp2() + || flag_import_std + || flag_include_std + ) + { + if (flag_emit_cppfront_info) { + printer.print_extra( + "\n// Generated by cppfront " + #include "version.info" + " build " + #include "build.info" + ); + } + printer.print_extra( "\n" ); + if (cpp1_filename.back() == 'h') { + printer.print_extra( "#ifndef " + cpp1_FILENAME+"_CPP2\n"); + printer.print_extra( "#define " + cpp1_FILENAME+"_CPP2" + "\n\n" ); + } + + if (flag_use_source_location) { + printer.print_extra( "#define CPP2_USE_SOURCE_LOCATION Yes\n" ); + } + + if (flag_include_std) { + printer.print_extra( "#define CPP2_INCLUDE_STD Yes\n" ); + } + else if (flag_import_std) { + printer.print_extra( "#define CPP2_IMPORT_STD Yes\n" ); + } + + if (flag_no_exceptions) { + printer.print_extra( "#define CPP2_NO_EXCEPTIONS Yes\n" ); + } + + if (flag_no_rtti) { + printer.print_extra( "#define CPP2_NO_RTTI Yes\n" ); + } + } + + auto map_iter = tokens.get_map().cbegin(); + auto hpp_includes = std::string{}; + + + //--------------------------------------------------------------------- + // Do phase0_type_decls + assert(printer.get_phase() == printer.phase0_type_decls); + + if ( + source.has_cpp2() + && !flag_clean_cpp1 + ) + { + printer.print_extra( "\n//=== Cpp2 type declarations ====================================================\n\n" ); + } + + if ( + !tokens.get_map().empty() + || flag_import_std + || flag_include_std + ) + { + printer.print_extra( "\n#include \"cpp2util.h\"\n\n" ); + } + + if ( + source.has_cpp2() + && !flag_clean_cpp1 + ) + { + printer.reset_line_to(1, true); + } + + for (auto& section : tokens.get_map()) + { + assert (!section.second.empty()); + + // Get the parse tree for this section and emit each forward declaration + auto decls = parser.get_parse_tree_declarations_in_range(section.second); + for (auto& decl : decls) { + assert(decl); + emit(*decl); + } + } + + + //--------------------------------------------------------------------- + // Do phase1_type_defs_func_decls + // + printer.finalize_phase(); + printer.next_phase(); + + if ( + source.has_cpp2() + && !flag_clean_cpp1 + ) + { + printer.print_extra( "\n//=== Cpp2 type definitions and function declarations ===========================\n\n" ); + printer.reset_line_to(1, true); + } + + assert (printer.get_phase() == positional_printer::phase1_type_defs_func_decls); + for ( + lineno_t curr_lineno = 0; + auto const& line : source.get_lines() + ) + { + // Skip dummy line we added to make 0-vs-1-based offsets readable + if (curr_lineno != 0) + { + // If it's a Cpp1 line, emit it + if (line.cat != source_line::category::cpp2) + { + if ( + source.has_cpp2() + && line.cat != source_line::category::preprocessor + ) + { + ++ret.cpp2_lines; + } + else + { + ++ret.cpp1_lines; + } + + if ( + flag_cpp2_only + && !line.text.empty() + && line.cat != source_line::category::comment + && line.cat != source_line::category::import + ) + { + if (line.cat == source_line::category::preprocessor) { + if (!line.text.ends_with(".h2\"")) { + errors.emplace_back( + source_position(curr_lineno, 1), + "pure-cpp2 switch disables the preprocessor, including #include (except of .h2 files) - use import instead (note: 'import std;' is implicit in -pure-cpp2)" + ); + return {}; + } + } + else { + errors.emplace_back( + source_position(curr_lineno, 1), + "pure-cpp2 switch disables Cpp1 syntax" + ); + return {}; + } + } + + if ( + line.cat == source_line::category::preprocessor + && line.text.ends_with(".h2\"") + ) + { + // Strip off the 2" + auto h_include = line.text.substr(0, line.text.size()-2); + printer.print_cpp1( h_include + "\"", curr_lineno ); + hpp_includes += h_include + "pp\"\n"; + } + else { + printer.print_cpp1( line.text, curr_lineno ); + } + } + + // If it's a Cpp2 line... + else { + ++ret.cpp2_lines; + + // We should be in a position to emit a set of Cpp2 declarations + if ( + map_iter != tokens.get_map().cend() + && map_iter->first /*line*/ <= curr_lineno + ) + { + // We should be here only when we're at exactly the first line of a Cpp2 section + assert (map_iter->first == curr_lineno); + assert (!map_iter->second.empty()); + + // Get the parse tree for this section and emit each forward declaration + auto decls = parser.get_parse_tree_declarations_in_range(map_iter->second); + for (auto& decl : decls) { + assert(decl); + emit(*decl); + } + ++map_iter; + } + } + } + ++curr_lineno; + } + + // We can stop here if there's no Cpp2 code -- a file with no Cpp2 + // should have perfect passthrough verifiable with diff, including + // that we didn't misidentify anything as Cpp2 (even in the + // presence of nonstandard vendor extensions) + // + if (!source.has_cpp2()) { + assert(ret.cpp2_lines == 0); + return ret; + } + + // If there is Cpp2 code, we have more to do... + + // First, if this is a .h2 and in a -pure-cpp2 compilation, + // we need to switch filenames + if ( + cpp1_filename.back() == 'h' + && flag_cpp2_only + ) + { + printer.print_extra( "\n#endif\n" ); + + printer.reopen(); + if (!printer.is_open()) { + errors.emplace_back( + source_position{}, + "could not open second output file " + cpp1_filename + ); + return {}; + } + + printer.print_extra( "\n#ifndef " + cpp1_FILENAME+"_CPP2" ); + printer.print_extra( "\n#error This file is part of a '.h2' header compiled to be consumed from another -pure-cpp2 file. To use this file, write '#include \"" + cpp1_filename + "2\"' in a '.h2' or '.cpp2' file compiled with -pure-cpp2." ); + printer.print_extra( "\n#endif\n" ); + + cpp1_FILENAME += "PP"; + printer.print_extra( "\n#ifndef " + cpp1_FILENAME+"_CPP2" ); + printer.print_extra( "\n#define " + cpp1_FILENAME+"_CPP2" + "\n\n" ); + + printer.print_extra( hpp_includes ); + } + + + //--------------------------------------------------------------------- + // Do phase2_func_defs + // + printer.finalize_phase(); + printer.next_phase(); + + if ( + source.has_cpp2() + && !flag_clean_cpp1 + ) + { + printer.print_extra( "\n//=== Cpp2 function definitions =================================================\n\n" ); + printer.reset_line_to(1, true); + } + + for (auto& section : tokens.get_map()) + { + assert (!section.second.empty()); + + // Get the parse tree for this section and emit each forward declaration + auto decls = parser.get_parse_tree_declarations_in_range(section.second); + for (auto& decl : decls) { + assert(decl); + emit(*decl); + } + } + + if (cpp1_filename.back() == 'h') { + printer.print_extra( "\n#endif" ); + } + + printer.finalize_phase( true ); + + // Finally, some debug checks + assert( + (!errors.empty() || tokens.num_unprinted_comments() == 0) + && "ICE: not all comments were printed" + ); + + return ret; + } + + + //----------------------------------------------------------------------- + // + // emit() functions - each emits a kind of node + // + // The body often mirrors the node's visit() function, unless customization + // is needed where Cpp1 and Cpp2 have different grammar orders + // + + void print_to_string( + std::string* str, + auto& i, + auto... more + ) + { + // Quick special-purpose state preservation... this tactical hack + // is fine for now, but if needed more then generalize this + auto state1 = need_expression_list_parens; + auto state2 = already_moved_that_members; + + printer.emit_to_string(str); + emit(i, more...); + printer.emit_to_string(); + + // Restore state + need_expression_list_parens.swap(state1); + already_moved_that_members .swap(state2); + }; + + auto print_to_string( + auto& i, + auto... more + ) + -> std::string + { + auto print = std::string{}; + print_to_string(&print, i, more...); + return print; + }; + + //----------------------------------------------------------------------- + // try_emit + // + // Helper to emit whatever is in a variant where each + // alternative is a smart pointer + // + template <int I> + auto try_emit( + auto& v, + auto&&... more + ) + -> void + { + if (v.index() == I) { + auto const& alt = std::get<I>(v); + assert (alt); + emit (*alt, CPP2_FORWARD(more)...); + } + } + + + //----------------------------------------------------------------------- + // + auto emit( + token const& n, + bool is_qualified = false, + source_position pos = {} + ) + -> void + { STACKINSTR + if (pos == source_position{}) { + pos = n.position(); + } + + // Implicit "cpp2::" qualification of Cpp2 fixed-width type aliases + // and cpp2::finally + if ( + !is_qualified + && ( + n.type() == lexeme::Cpp2FixedType + || n == "finally" + ) + ) + { + printer.print_cpp2("cpp2::", pos); + } + + // 'this' is not a pointer + if (n == "this") { + printer.print_cpp2("(*this)", pos); + } + // Reclaim the alternative names and some keywords for users + else if ( + n == "and" + || n == "and_eq" + || n == "bitand" + || n == "bitor" + || n == "compl" + || n == "not" + || n == "not_eq" + || n == "or" + || n == "or_eq" + || n == "xor" + || n == "xor_eq" + || n == "new" + || n == "class" + || n == "struct" + || n == "enum" + || n == "union" + ) + { + printer.print_cpp2("cpp2_"+n.to_string(), pos); + } + else { + printer.print_cpp2(n, pos, true); + } + + in_definite_init = is_definite_initialization(&n); + } + + + //----------------------------------------------------------------------- + // + auto emit( + literal_node const& n, + source_position pos = {} + ) + -> void + { STACKINSTR + if (pos == source_position{}) { + pos = n.position(); + } + + assert(n.literal); + emit(*n.literal); + if (n.user_defined_suffix) { + emit(*n.user_defined_suffix); + } + } + + + //----------------------------------------------------------------------- + // + auto emit( + unqualified_id_node const& n, + bool in_synthesized_multi_return = false, + bool is_local_name = true, + bool is_qualified = false + ) + -> void + { STACKINSTR + auto last_use = is_definite_last_use(n.identifier); + + bool add_forward = + last_use + && last_use->is_forward + && !in_non_rvalue_context.back(); + + bool add_move = + !add_forward + && ( + in_synthesized_multi_return + || (last_use && !suppress_move_from_last_use) + ) + && !in_non_rvalue_context.back(); + + if ( + add_move + && *(n.identifier - 1) == "return" + && *(n.identifier + 1) == ";" + ) + { + add_move = false; + } + + if ( + emitting_move_that_function + && *n.identifier == "that" + ) + { + add_move = true; + } + + // For an explicit 'forward' apply forwarding to correct identifier + assert (!current_args.empty()); + if (current_args.back().pass == passing_style::forward) { + add_forward = current_args.back().ptoken == n.identifier; + } + + if (add_move) { + printer.print_cpp2("std::move(", n.position()); + } + if (add_forward) { + printer.print_cpp2("CPP2_FORWARD(", {n.position().lineno, n.position().colno - 8}); + } + + assert(n.identifier); + emit(*n.identifier, is_qualified); // inform the identifier if we know this is qualified + + if (n.open_angle != source_position{}) { + printer.print_cpp2("<", n.open_angle); + auto first = true; + for (auto& a : n.template_args) { + if (!first) { + printer.print_cpp2(",", a.comma); + } + first = false; + try_emit<template_argument::expression>(a.arg); + try_emit<template_argument::type_id >(a.arg); + } + printer.print_cpp2(">", n.close_angle); + } + + in_definite_init = is_definite_initialization(n.identifier); + if ( + !in_definite_init + && !in_parameter_list + ) + { + if (auto decl = sema.get_declaration_of(*n.identifier); + is_local_name + && !(*n.identifier == "this") + && !(*n.identifier == "that") + && decl + && ( + in_synthesized_multi_return + // note pointer equality: if we're not in the actual declaration of n.identifier + || decl->identifier != n.identifier + ) + // and this variable was uninitialized + && !decl->initializer + // and it's either a non-parameter or an out parameter + && ( + !decl->parameter + || ( + decl->parameter + && decl->parameter->pass == passing_style::out + ) + ) + ) + { + printer.print_cpp2(".value()", n.position()); + } + } + else if (in_synthesized_multi_return) { + printer.print_cpp2(".value()", n.position()); + } + + if ( + add_move + || add_forward + ) + { + printer.print_cpp2(")", n.position()); + } + } + + + //----------------------------------------------------------------------- + // + auto emit( + qualified_id_node const& n, + bool include_unqualified_id = true + ) + -> void + { STACKINSTR + if (!sema.check(n)) { + return; + } + + // Implicit "cpp2::" qualification of "unique.new" and "shared.new" + if ( + n.ids.size() == 2 + && ( + *n.ids[0].id->identifier == "unique" + || *n.ids[0].id->identifier == "shared" + ) + && *n.ids[1].scope_op == "." + && *n.ids[1].id->identifier == "new" + ) + { + printer.print_cpp2("cpp2::", n.position()); + } + + auto ident = std::string{}; + printer.emit_to_string(&ident); + + for (auto const& id : std::span{n.ids}.first(n.ids.size() - !include_unqualified_id)) + { + if (id.scope_op) { + emit(*id.scope_op); + } + emit(*id.id, false, true, true); // inform the unqualified-id that it's qualified + } + + printer.emit_to_string(); + printer.print_cpp2( ident, n.position() ); + } + + + //----------------------------------------------------------------------- + // + auto emit( + type_id_node const& n, + source_position pos = {} + ) + -> void + { STACKINSTR + if (pos == source_position{}) { + pos = n.position(); + } + + if (n.is_wildcard()) { + printer.print_cpp2("auto", pos); + } + else { + try_emit<type_id_node::unqualified>(n.id, false, false); + try_emit<type_id_node::qualified >(n.id); + try_emit<type_id_node::keyword >(n.id); + } + + for (auto i = n.pc_qualifiers.rbegin(); i != n.pc_qualifiers.rend(); ++i) { + if ((**i) == "const") { printer.print_cpp2(" ", pos); } + emit(**i, false, pos); + } + } + + + //----------------------------------------------------------------------- + // + auto emit( + id_expression_node const& n, + bool is_local_name = true + ) + -> void + { STACKINSTR + try_emit<id_expression_node::qualified >(n.id); + try_emit<id_expression_node::unqualified>(n.id, false, is_local_name); + } + + + auto emit_prolog_mem_inits( + function_prolog const& prolog, + colno_t indent + ) + -> void + { STACKINSTR + for (auto& line : prolog.mem_inits) { + printer.print_extra("\n"); + printer.print_extra(pad(indent-1)); + printer.print_extra(line); + } + } + + auto emit_prolog_statements( + function_prolog const& prolog, + colno_t indent + ) + -> void + { STACKINSTR + for (auto& line : prolog.statements) { + printer.print_extra("\n"); + printer.print_extra(pad(indent-1)); + printer.print_extra(line); + } + } + + auto emit_epilog_statements( + std::vector<std::string> const& epilog, + colno_t indent + ) + -> void + { STACKINSTR + for (auto& line : epilog) { + printer.print_extra("\n"); + printer.print_extra(pad(indent-1)); + printer.print_extra(line); + } + } + + //----------------------------------------------------------------------- + // + auto emit( + compound_statement_node const& n, + function_prolog const& function_prolog = {}, + std::vector<std::string> const& function_epilog = {} + ) + -> void + { STACKINSTR + emit_prolog_mem_inits(function_prolog, n.body_indent+1); + + printer.print_cpp2( "{", n.open_brace ); + + emit_prolog_statements(function_prolog, n.body_indent+1); + + for (auto const& x : n.statements) { + assert(x); + emit(*x); + } + + emit_epilog_statements( function_epilog, n.body_indent+1); + + printer.print_cpp2( "}", n.close_brace ); + } + + + //----------------------------------------------------------------------- + // + auto emit( + inspect_expression_node const& n, + bool is_expression + ) + -> void + { STACKINSTR + auto constexpr_qualifier = std::string{}; + if (n.is_constexpr) { + constexpr_qualifier = "constexpr "; + } + + // If this is an expression, it will have an explicit result type, + // and we need to start the lambda that we'll immediately invoke + auto result_type = std::string{}; + if (is_expression) { + assert(n.result_type); + printer.emit_to_string(&result_type); + emit(*n.result_type); + printer.emit_to_string(); + printer.print_cpp2("[&] () -> " + result_type + " ", n.position()); + } + printer.print_cpp2("{ " + constexpr_qualifier + "auto&& _expr = ", n.position()); + + assert(n.expression); + emit(*n.expression); + printer.print_cpp2(";", n.position()); + + assert( + n.identifier + && *n.identifier == "inspect" + ); + + assert(!n.alternatives.empty()); + auto found_wildcard = false; + + for (auto first = true; auto&& alt : n.alternatives) + { + assert(alt && alt->is_as_keyword); + if (!first) { + printer.print_cpp2("else ", alt->position()); + } + first = false; + + auto id = std::string{}; + printer.emit_to_string(&id); + + if (alt->type_id) { + emit(*alt->type_id); + } + else { + assert(alt->value); + emit(*alt->value); + } + printer.emit_to_string(); + + assert ( + *alt->is_as_keyword == "is" + || *alt->is_as_keyword == "as" + ); + // TODO: pick up 'as' next, for now just do 'is' + + if (*alt->is_as_keyword == "is") + { + // Stringize the expression-statement now... + auto statement = std::string{}; + printer.emit_to_string(&statement); + emit(*alt->statement); + printer.emit_to_string(); + // ... and jettison the final ; for an expression-statement + while ( + !statement.empty() + && ( + statement.back() == ';' + || isspace(statement.back()) + ) + ) + { + statement.pop_back(); + } + + replace_all( statement, "cpp2::as_<", "cpp2::as<" ); + + // If this is an inspect-expression, we'll have to wrap each alternative + // in an 'if constexpr' so that its type is ignored for mismatches with + // the inspect-expression's type + auto return_prefix = std::string{}; + auto return_suffix = std::string{";"}; // use this to tack the ; back on in the alternative body + if (is_expression) { + return_prefix = "{ if constexpr( requires{" + statement + ";} ) if constexpr( std::is_convertible_v<CPP2_TYPEOF((" + statement + "))," + result_type + "> ) return "; + return_suffix += " }"; + } + + if (id == "auto") { + found_wildcard = true; + if (is_expression) { + printer.print_cpp2("return ", alt->position()); + } + } + else { + printer.print_cpp2("if " + constexpr_qualifier, alt->position()); + if (alt->type_id) { + printer.print_cpp2("(cpp2::is<" + id + ">(_expr)) ", alt->position()); + } + else { + assert (alt->value); + printer.print_cpp2("(cpp2::is(_expr, " + id + ")) ", alt->position()); + } + printer.print_cpp2(return_prefix, alt->position()); + } + + printer.print_cpp2(statement, alt->position()); + + if ( + is_expression + && id != "auto" + ) + { + assert(alt->statement->is_expression()); + printer.print_cpp2("; else return " + result_type + "{}", alt->position()); + printer.print_cpp2("; else return " + result_type + "{}", alt->position()); + } + + printer.print_cpp2(return_suffix, alt->position()); + } + else { + errors.emplace_back( + alt->position(), + "(temporary alpha limitation) cppfront is still learning 'inspect' - only simple 'is' alternatives are currently supported" + ); + return; + } + } + + if (is_expression) { + if (!found_wildcard) { + errors.emplace_back( + n.position(), + "an inspect expression must have an `is _` match-anything wildcard alternative" + ); + return; + } + } + else { + printer.print_cpp2("}", n.close_brace); + } + + // If this is an expression, finally actually invoke the lambda + if (is_expression) { + printer.print_cpp2("()", n.close_brace); + } + } + + + //----------------------------------------------------------------------- + // + auto emit(selection_statement_node const& n) + -> void + { STACKINSTR + assert(n.identifier); + emit(*n.identifier); + + if (n.is_constexpr) { + printer.print_cpp2(" constexpr", n.position()); + } + + printer.print_cpp2(" (", n.position()); + printer.add_pad_in_this_line(1); + + assert(n.expression); + emit(*n.expression); + + printer.print_cpp2(") ", n.position()); + printer.add_pad_in_this_line(1); + + assert(n.true_branch); + emit(*n.true_branch); + + if (n.has_source_false_branch) { + printer.print_cpp2("else ", n.else_pos); + emit(*n.false_branch); + } + } + + + //----------------------------------------------------------------------- + // + auto emit(iteration_statement_node const& n) + -> void + { STACKINSTR + assert(n.identifier); + in_non_rvalue_context.push_back(true); + auto guard = finally([&]{ in_non_rvalue_context.pop_back(); }); + + iteration_statements.push_back({ &n, false}); + auto labelname = labelized_position(n.label); + + // Handle while + // + if (*n.identifier == "while") { + assert( + n.condition + && n.statements + && !n.range + && !n.body + ); + + // We emit Cpp2 while loops as Cpp2 for loops if there's a "next" clause + if (!n.next_expression) { + printer.print_cpp2("while( ", n.position()); + emit(*n.condition); + } + else { + printer.print_cpp2("for( ; ", n.position()); + emit(*n.condition); + printer.print_cpp2("; ", n.position()); + printer.add_pad_in_this_line(-10); + emit(*n.next_expression); + } + printer.print_cpp2(" ) ", n.position()); + if (!labelname.empty()) { + printer.print_extra("{"); + } + emit(*n.statements); + if (!labelname.empty()) { + printer.print_extra(" CPP2_CONTINUE_BREAK("+labelname+") }"); + } + } + + // Handle do + // + else if (*n.identifier == "do") { + assert( + n.condition + && n.statements + && !n.range + && !n.body + ); + + printer.print_cpp2("do ", n.position()); + if (!labelname.empty()) { + printer.print_extra("{"); + } + emit(*n.statements); + if (!labelname.empty()) { + printer.print_extra(" CPP2_CONTINUE_BREAK("+labelname+") }"); + } + printer.print_cpp2(" while ( ", n.position()); + if (n.next_expression) { + // Gotta say, this feels kind of nifty... short-circuit eval + // and smuggling work into a condition via a lambda, O my... + printer.print_cpp2("[&]{ ", n.position()); + emit(*n.next_expression); + printer.print_cpp2(" ; return true; }() && ", n.position()); + } + emit(*n.condition); + printer.print_cpp2(");", n.position()); + } + + // Handle for + // + else if (*n.identifier == "for") { + assert( + !n.condition + && !n.statements + && n.range + && n.parameter + && n.body + ); + + // Note: This used to emit cpp2_range as a range-for-loop scope variable, + // but some major compilers seem to have random troubles with that; + // the workaround to avoid their bugs for now is to emit a { } block + // around the Cpp1 range-for and make the scope variable a normal local + + printer.print_cpp2("for ( ", n.position()); + + emit(*n.parameter); + + printer.print_cpp2(" : ", n.position()); + + // If this expression is just a single expression-list, we can + // take over direct control of emitting it without needing to + // go through the whole grammar, and surround it with braces + if (n.range->is_expression_list()) { + printer.print_cpp2( "{ ", n.position() ); + emit(*n.range->get_expression_list(), false); + printer.print_cpp2( " }", n.position() ); + } + // Otherwise, just emit the general expression as usual + else { + emit(*n.range); + } + + printer.print_cpp2(" ) ", n.position()); + if (!labelname.empty()) { + printer.print_extra("{"); + } + + // If there's a next-expression, smuggle it in via a nested do/while(false) loop + // (nested "continue" will work, but "break" won't until we do extra work to implement + // that using a flag and implementing "break" as "_for_break = true; continue;") + if (n.next_expression) { + printer.print_cpp2(" { do ", n.position()); + } + + assert(n.body); + emit(*n.body); + + if (n.next_expression) { + printer.print_cpp2(" while (false); ", n.position()); + emit(*n.next_expression); + printer.print_cpp2("; }", n.position()); + } + + printer.print_cpp2("", n.position()); + if (!labelname.empty()) { + printer.print_extra(" CPP2_CONTINUE_BREAK("+labelname+") }"); + } + } + + else { + assert(!"ICE: unexpected case"); + } + + assert (iteration_statements.back().stmt); + if ( + iteration_statements.back().stmt->label + && !iteration_statements.back().used + ) + { + auto name = iteration_statements.back().stmt->label->to_string(); + errors.emplace_back( + iteration_statements.back().stmt->position(), + name + ": a named loop must have its name used (did you forget 'break " + name + ";' or 'continue " + name + "';?)" + ); + } + + iteration_statements.pop_back(); + } + + + //----------------------------------------------------------------------- + // + auto emit(return_statement_node const& n) + -> void + { STACKINSTR + assert (!current_functions.empty()); + if (current_functions.back().func->has_postconditions()) { + printer.print_cpp2( "cpp2_finally_presuccess.run(); ", n.position() ); + } + + assert(n.identifier); + assert(*n.identifier == "return"); + printer.print_cpp2("return ", n.position()); + + // Return with expression == single anonymous return type + // + if (n.expression) + { + assert(!current_functions.empty()); + auto is_forward_return = + !function_returns.empty() + && function_returns.back().pass == passing_style::forward; + auto is_deduced_return = + !function_returns.empty() + && function_returns.back().is_deduced; + + // If we're doing a forward return of a single-token name + if (auto tok = n.expression->expr->get_postfix_expression_node()->expr->get_token(); + tok + && is_forward_return + ) + { + // Ensure we're not returning a local or an in/move parameter + auto is_parameter_name = current_functions.back().decl->has_parameter_named(*tok); + if ( + is_parameter_name + && ( + current_functions.back().decl->has_in_parameter_named(*tok) + || current_functions.back().decl->has_move_parameter_named(*tok) + ) + ) + { + errors.emplace_back( + n.position(), + "a 'forward' return type cannot return an 'in' or 'move' parameter" + ); + return; + } + else if ( + !is_parameter_name + && sema.get_declaration_of(*tok) + ) + { + errors.emplace_back( + n.position(), + "a 'forward' return type cannot return a local variable" + ); + return; + } else if ( + is_literal(tok->type()) || n.expression->expr->is_result_a_temporary_variable() + ) + { + errors.emplace_back( + n.position(), + "a 'forward' return type cannot return a temporary variable" + ); + return; + } + } + + // If this expression is just a single expression-list, we can + // take over direct control of emitting it without needing to + // go through the whole grammar, and surround it with braces + if (n.expression->is_expression_list()) { + if (!is_deduced_return) { + printer.print_cpp2( "{ ", n.position() ); + } + emit(*n.expression->get_expression_list(), false); + if (!is_deduced_return) { + printer.print_cpp2( " }", n.position() ); + } + } + // Otherwise, just emit the general expression as usual + else { + emit(*n.expression); + } + + if ( + function_returns.empty() + || function_returns.back().param_list != &single_anon + ) + { + errors.emplace_back( + n.position(), + "return statement with expression must be in a function with a single anonymous return value" + ); + return; + } + } + + else if ( + !function_returns.empty() + && function_returns.back().param_list == &single_anon + ) + { + errors.emplace_back( + n.position(), + "return statement must have an expression in a function with a single anonymous return value" + ); + } + + // Return without expression, could be assignment operator + // + else if (generating_assignment_from == current_functions.back().decl) + { + printer.print_cpp2("*this", n.position()); + } + + // Otherwise, zero or named return values + // + else if ( + !function_returns.empty() + && function_returns.back().param_list + ) + { + auto& parameters = function_returns.back().param_list->parameters; + + auto stmt = std::string{}; + + // Put braces only around multiple named returns, which are a struct + // - single named returns are emitted as ordinary returns, and extra + // { } would be legal but generate a noisy warning on some compilers + if (std::ssize(parameters) > 1) { + stmt += std::string(" { "); + } + + for (bool first = true; auto& param : parameters) { + if (!first) { + stmt += ", "; + } + first = false; + assert(param->declaration->identifier); + + printer.emit_to_string(&stmt); + emit(*param->declaration->identifier, true); + printer.emit_to_string(); + } + + if (std::ssize(parameters) > 1) { + stmt += std::string(" }"); + } + + printer.print_cpp2(stmt, n.position()); + } + + printer.print_cpp2("; ", n.position()); + } + + + //----------------------------------------------------------------------- + // + auto emit(jump_statement_node const& n) + -> void + { STACKINSTR + assert(n.keyword); + + if (n.label) { + auto iter_stmt = + std::find_if( + iteration_statements.begin(), + iteration_statements.end(), + [&](auto& s){ + assert(s.stmt); + return + s.stmt->label + && std::string_view{*s.stmt->label} == std::string_view{*n.label} + ; + } + ); + if (iter_stmt == iteration_statements.end()) + { + errors.emplace_back( + n.position(), + "a named " + n.keyword->to_string() + " must use the name of an enclosing local loop label" + ); + return; + } + iter_stmt->used = true; + assert((*iter_stmt).stmt->label); + printer.print_cpp2( + "goto " + to_upper_and_underbar(*n.keyword) + "_" + labelized_position((*iter_stmt).stmt->label) + ";", + n.position() + ); + } + else { + emit(*n.keyword); + printer.print_cpp2(";", n.position()); + } + } + + + //----------------------------------------------------------------------- + // + auto emit(using_statement_node const& n) + -> void + { STACKINSTR + assert(n.keyword); + emit(*n.keyword); + + if (n.for_namespace) { + printer.print_cpp2(" namespace", n.position()); + } else { + current_names.push_back(active_using_declaration{n}); + } + + printer.print_cpp2(" " + print_to_string(*n.id) + ";", n.position()); + } + + + //----------------------------------------------------------------------- + // + auto build_capture_lambda_intro_for( + capture_group& captures, + source_position pos, + bool include_default_reference_capture = false + ) + -> std::string + { + // First calculate the stringized version of each capture expression + // This will let us compare and de-duplicate repeated capture expressions + for (auto& cap : captures.members) + { + assert(cap.capture_expr->cap_grp == &captures); + if (cap.str.empty()) { + print_to_string(&cap.str, *cap.capture_expr, true); + suppress_move_from_last_use = true; + print_to_string(&cap.str_suppressed_move, *cap.capture_expr, true); + suppress_move_from_last_use = false; + } + } + + // If move from last use was used on the variable we need to rewrite the str to add std::move + // to earlier use of the variable. That will save us from capturing one variable two times + // (one with copy and one with std::move). + for (auto rit = captures.members.rbegin(); rit != captures.members.rend(); ++rit) + { + auto is_same_str_suppressed_move = [s=rit->str_suppressed_move](auto& cap){ + return cap.str_suppressed_move == s; + }; + + auto rit2 = std::find_if(rit+1, captures.members.rend(), is_same_str_suppressed_move); + while (rit2 != captures.members.rend()) + { + rit2->str = rit->str; + rit2 = std::find_if(rit2+1, captures.members.rend(), is_same_str_suppressed_move); + } + } + + // Then build the capture list, ignoring duplicated expressions + auto lambda_intro = std::string("["); + auto num_captures = 0; + + if ( + (!current_functions.empty() + && current_functions.back().decl->is_function_with_this() + && !current_functions.back().decl->parent_is_namespace() + ) + || include_default_reference_capture + ) + { + // Note: & is needed (when allowed, not at namespace scope) because a + // nested UFCS might be viewed as trying to capture 'this' + lambda_intro += "&"; + ++num_captures; + } + + printer.emit_to_string(&lambda_intro); + + auto handled = std::vector<std::string>{}; + for (auto& cap : captures.members) + { + // If we haven't handled a capture that looks like this one + if (std::find(handled.begin(), handled.end(), cap.str) == handled.end()) + { + // Remember it + handled.push_back(cap.str); + + // And handle it + if (num_captures != 0) { // not first + lambda_intro += ", "; + } + cap.cap_sym = "_"+std::to_string(num_captures); + printer.print_cpp2(cap.cap_sym + " = " + cap.str, pos); + } + ++num_captures; + } + printer.emit_to_string(); + lambda_intro += "]"; + + return lambda_intro; + } + + + //----------------------------------------------------------------------- + // + auto emit(primary_expression_node const& n) + -> void + { STACKINSTR + try_emit<primary_expression_node::identifier >(n.expr); + try_emit<primary_expression_node::expression_list>(n.expr); + try_emit<primary_expression_node::id_expression >(n.expr); + try_emit<primary_expression_node::inspect >(n.expr, true); + try_emit<primary_expression_node::literal >(n.expr); + + if (n.expr.index() == primary_expression_node::declaration) + { + // This must be an anonymous declaration + auto& decl = std::get<primary_expression_node::declaration>(n.expr); + assert( + decl + && !decl->identifier + ); + + // Handle an anonymous function + if (decl->is_function()) { + auto lambda_intro = build_capture_lambda_intro_for(decl->captures, n.position()); + + // Handle an anonymous generic function with explicit type list + if (decl->template_parameters) { + print_to_string(&lambda_intro, *decl->template_parameters, false, true); + } + + emit(*decl, lambda_intro); + } + // Else an anonymous object as 'typeid { initializer }' + else { + assert(decl->is_object()); + auto& type_id = std::get<declaration_node::an_object>(decl->type); + + printer.add_pad_in_this_line( -5 ); + + emit(*type_id); + printer.print_cpp2("{", decl->position()); + + if (!decl->initializer) { + errors.emplace_back( + decl->position(), + "an anonymous object declaration must have '=' and an initializer" + ); + return; + } + + emit(*decl->initializer, false); + + printer.print_cpp2("}", decl->position()); + } + } + } + + // Not yet implemented. TODO: finalize deducing pointer types from parameter lists + auto is_pointer_declaration( + parameter_declaration_list_node const*, + int, + int + ) + -> bool + { + return false; + } + + auto is_pointer_declaration( + declaration_node const* decl_node, + int deref_cnt, + int addr_cnt + ) + -> bool + { + if (!decl_node) { + return false; + } + if (addr_cnt > deref_cnt) { + return true; + } + + return std::visit([&](auto const& type){ + return is_pointer_declaration(type.get(), deref_cnt, addr_cnt); + }, decl_node->type); + } + + auto is_pointer_declaration( + function_type_node const* fun_node, + int deref_cnt, + int addr_cnt + ) + -> bool + { + if (!fun_node) { + return false; + } + if (addr_cnt > deref_cnt) { + return true; + } + + return std::visit([&]<typename T>(T const& type){ + if constexpr (std::is_same_v<T, std::monostate>) { + return false; + } + else if constexpr (std::is_same_v<T, function_type_node::single_type_id>) { + return is_pointer_declaration(type.type.get(), deref_cnt, addr_cnt); + } + else { + return is_pointer_declaration(type.get(), deref_cnt, addr_cnt); + } + }, fun_node->returns); + } + + auto is_pointer_declaration( + type_id_node const* type_id_node, + int deref_cnt, + int addr_cnt + ) + -> bool + { + if (!type_id_node) { + return false; + } + if (addr_cnt > deref_cnt) { + return true; + } + + if ( type_id_node->dereference_of ) { + return is_pointer_declaration(type_id_node->dereference_of, deref_cnt + type_id_node->dereference_cnt, addr_cnt); + } else if ( type_id_node->address_of ) { + return is_pointer_declaration(type_id_node->address_of, deref_cnt, addr_cnt + 1); + } + + int pointer_declarators_cnt = std::count_if(std::cbegin(type_id_node->pc_qualifiers), std::cend(type_id_node->pc_qualifiers), [](auto* q) { + return q->type() == lexeme::Multiply; + }); + + if ( + pointer_declarators_cnt == 0 + && type_id_node->suspicious_initialization + ) + { + return is_pointer_declaration(type_id_node->suspicious_initialization, deref_cnt, addr_cnt); + } + + return (pointer_declarators_cnt + addr_cnt - deref_cnt) > 0; + } + + auto is_pointer_declaration( + type_node const*, + int, + int + ) + -> bool + { + return false; + } + + auto is_pointer_declaration( + namespace_node const*, + int, + int + ) + -> bool + { + return false; + } + + auto is_pointer_declaration( + alias_node const*, + int, + int + ) + -> bool + { + return false; + } + + auto is_pointer_declaration( + declaration_sym const* decl, + int deref_cnt, + int addr_cnt + ) + -> bool + { + if (!decl) { + return false; + } + if (addr_cnt > deref_cnt) { + return true; + } + return is_pointer_declaration(decl->declaration, deref_cnt, addr_cnt); + } + + auto is_pointer_declaration( + token const* t, + int deref_cnt = 0, + int addr_cnt = 0 + ) + -> bool + { + if (!t) { + return false; + } + if (addr_cnt > deref_cnt) { + return true; + } + auto decl = sema.get_declaration_of(*t, true); + return is_pointer_declaration(decl, deref_cnt, addr_cnt); + } + + + auto source_order_name_lookup(unqualified_id_node const& id) + -> source_order_name_lookup_res + { + for ( + auto first = current_names.rbegin(), last = current_names.rend() - 1; + first != last; + ++first + ) + { + if ( + auto decl = get_if<declaration_node const*>(&*first); + decl + && *decl + && (*decl)->has_name(*id.identifier) + ) + { + return *decl; + } + else if ( + auto using_ = get_if<active_using_declaration>(&*first); + using_ + && using_->identifier + && *using_->identifier == *id.identifier + ) + { + return *using_; + } + } + + return {}; + } + + + auto lookup_finds_variable_with_placeholder_type_under_initialization(id_expression_node const& n) + -> bool + { + if (!n.is_unqualified()) + { + return false; + } + + auto const& id = *get<id_expression_node::unqualified>(n.id); + auto lookup = source_order_name_lookup(id); + + if ( + !lookup + || get_if<active_using_declaration>(&*lookup) + ) + { + return false; + } + + auto decl = get<declaration_node const*>(*lookup); + if ( + decl + && decl->has_name(*id.identifier) + ) + { + if ( + !decl->is_object() + && !decl->is_object_alias() + ) + { + return false; + } + + if (decl->is_object()) { + auto type = &**get_if<declaration_node::an_object>(&decl->type); + return type->is_wildcard() + && contains(current_declarations, decl); + } + auto const& type = (**get_if<declaration_node::an_alias>(&decl->type)).type_id; + return ( + !type + || type->is_wildcard() + ) + && contains(current_declarations, decl); + } + + return false; + } + + //----------------------------------------------------------------------- + // + auto emit( + // Note: parameter is not const as we'll fill in the capture .str info + postfix_expression_node& n, + bool for_lambda_capture = false + ) + -> void + { STACKINSTR + if (!sema.check(n)) { + return; + } + + assert(n.expr); + last_postfix_expr_was_pointer = false; + + // For a 'move that' parameter, track the members we already moved from + // so we can diagnose attempts to move from the same member twice + if ( + emitting_move_that_function + && n.expr->get_token() + && *n.expr->get_token() == "that" + ) + { + if (n.ops.empty()) { + if (!already_moved_that_members.empty()) { + errors.emplace_back( + n.position(), + "attempting to move from whole 'that' object after a 'that.member' was already moved from" + ); + return; + } + // push a sentinel for "all members" + already_moved_that_members.push_back(nullptr); + } + else { + auto member = n.ops[0].id_expr->get_token(); + assert(member); + + for ( + auto i = already_moved_that_members.begin(); + i != already_moved_that_members.end(); + ++i + ) + { + if ( + !*i + || **i == *member + ) + { + errors.emplace_back( + n.position(), + "attempting to move twice from 'that." + member->to_string() + "'" + ); + return; + } + } + + already_moved_that_members.push_back(member); + } + } + + // Ensure that forwarding postfix-expressions start with a forwarded parameter name + // + assert (!current_args.empty()); + if (current_args.back().pass == passing_style::forward) + { + assert (n.expr->get_token()); + assert (!current_args.back().ptoken); + current_args.back().ptoken = n.expr->get_token(); + auto decl = sema.get_declaration_of(*current_args.back().ptoken); + if (!(decl && decl->parameter && decl->parameter->pass == passing_style::forward)) + { + errors.emplace_back( + n.position(), + n.expr->get_token()->to_string() + " is not a forwarding parameter name" + ); + } + } + + // Check that this isn't pointer arithmentic + // (initial partial implementation) + if (n.expr->expr.index() == primary_expression_node::id_expression) + { + auto& id = std::get<primary_expression_node::id_expression>(n.expr->expr); + assert(id); + if (id->id.index() == id_expression_node::unqualified) + { + auto& unqual = std::get<id_expression_node::unqualified>(id->id); + assert(unqual); + // TODO: Generalize this: + // - we don't recognize pointer types from Cpp1 + // - we don't deduce pointer types from parameter_declaration_list_node + if ( is_pointer_declaration(unqual->identifier) ) { + if (n.ops.empty()) { + last_postfix_expr_was_pointer = true; + } + else + { + auto op = [&]{ + if ( + n.ops.size() >= 2 + && n.ops[0].op->type() == lexeme::LeftParen + ) + { + return n.ops[1].op; + } + else + { + return n.ops.front().op; + } + }(); + + if ( + op->type() == lexeme::PlusPlus + || op->type() == lexeme::MinusMinus + || op->type() == lexeme::LeftBracket + ) + { + errors.emplace_back( + op->position(), + op->to_string() + " - pointer arithmetic is illegal - use std::span or gsl::span instead" + ); + violates_bounds_safety = true; + } + else if ( + op->type() == lexeme::Tilde + ) + { + errors.emplace_back( + op->position(), + op->to_string() + " - pointer bitwise manipulation is illegal - use std::bit_cast to convert to raw bytes first" + ); + } + } + } + } + } + + // Simple case: If there are no .ops, just emit the expression + if (n.ops.empty()) { + emit(*n.expr); + return; + } + + // Check to see if it's a capture expression that contains $, + // and if we're not capturing the expression for the lambda + // introducer replace it with the capture name + auto captured_part = std::string{}; + if ( + n.cap_grp + && !for_lambda_capture + ) + { + // First stringize ourselves so that we compare equal against + // the first *cap_grp .str_suppressed_move that matches us (which is what the + // lambda introducer generator used to create a lambda capture) + suppress_move_from_last_use = true; + auto my_sym = print_to_string(n, true); + suppress_move_from_last_use = false; + + auto found = std::find_if(n.cap_grp->members.cbegin(), n.cap_grp->members.cend(), [my_sym](auto& cap) { + return cap.str_suppressed_move == my_sym; + }); + + assert( + found != n.cap_grp->members.cend() + && "ICE: could not find this postfix-expression in capture group" + ); + // And then emit that capture symbol with number + assert (!found->cap_sym.empty()); + captured_part += found->cap_sym; + } + + // Otherwise, we're going to have to potentially do some work to change + // some Cpp2 postfix operators to Cpp1 prefix operators, so let's set up... + auto prefix = std::vector<text_with_pos>{}; + auto suffix = std::vector<text_with_pos>{}; + + auto last_was_prefixed = false; + auto saw_dollar = false; + + struct text_chunks_with_parens_position { + std::vector<text_with_pos> text_chunks; + source_position open_pos; + source_position close_pos; + }; + + auto args = std::optional<text_chunks_with_parens_position>{}; + + auto flush_args = [&] { + if (args) { + suffix.emplace_back(")", args.value().close_pos); + for (auto&& e: args.value().text_chunks) { + suffix.push_back(e); + } + suffix.emplace_back("(", args.value().open_pos); + args.reset(); + } + }; + + auto print_to_text_chunks = [&](auto& i, auto... more) { + auto text = std::vector<text_with_pos>{}; + printer.emit_to_text_chunks(&text); + push_need_expression_list_parens(false); + emit(i, more...); + pop_need_expression_list_parens(); + printer.emit_to_text_chunks(); + return text; + }; + + for (auto i = n.ops.rbegin(); i != n.ops.rend(); ++i) + { + assert(i->op); + + // If we already captured a part as a _## lambda capture, + // skip the part of this expression before the $ symbol + // + if (!captured_part.empty()) { + if (i->op->type() == lexeme::Dollar) { + break; + } + } + // Else skip the part of this expression after the $ symbol + else if (for_lambda_capture) { + if (i->op->type() == lexeme::Dollar) { + saw_dollar = true; + continue; + } + if (!saw_dollar) { + continue; + } + } + + // Going backwards if we found LeftParen it might be UFCS + // expr_list is emitted to 'args' for future use + if (i->op->type() == lexeme::LeftParen) { + + assert(i->op); + assert(i->op_close); + auto local_args = text_chunks_with_parens_position{{}, i->op->position(), i->op_close->position()}; + + assert (i->expr_list); + if (!i->expr_list->expressions.empty()) { + local_args.text_chunks = print_to_text_chunks(*i->expr_list); + } + + flush_args(); + args.emplace(std::move(local_args)); + } + // Going backwards if we found Dot and there is args variable + // it means that it should be handled by UFCS + else if( + i->op->type() == lexeme::Dot + && args + // Disable UFCS if name lookup would hard-error (#550). + // That happens when it finds that the function identifier being called is the name + // of a variable with deduced type and we are in its initializer (e.g., x := y.x();) + // So lower it to a member call instead, the only possible valid meaning. + && !lookup_finds_variable_with_placeholder_type_under_initialization(*i->id_expr) + ) + { + // The function name is the argument to the macro + auto funcname = print_to_string(*i->id_expr); + + // First, build the UFCS macro name + + auto ufcs_string = std::string("CPP2_UFCS"); + + // If there are template arguments, use the _TEMPLATE version + if (std::ssize(i->id_expr->template_arguments()) > 0) { + // If it is qualified, use the _QUALIFIED version + if (i->id_expr->is_qualified()) { + ufcs_string += "_QUALIFIED"; + // And split the unqualified id in the function name as two macro arguments + auto& id = *get<id_expression_node::qualified>(i->id_expr->id); + funcname = + "(" + + print_to_string(id, false) + + "::)," + + print_to_string(*cpp2::assert_not_null(id.ids.back().id), false, true, true); + } + ufcs_string += "_TEMPLATE"; + } + + // If we're in an object declaration (i.e., initializer) + // at namespace scope, use the _NONLOCAL version + // + // Note: If there are other cases where code could execute + // in a non-local scope where a capture-default for the UFCS + // lambda would not be allowed, then add them here + if ( + current_declarations.back()->is_namespace() + || ( + current_declarations.back()->is_object() + && current_declarations.back()->parent_is_namespace() + ) + || ( + ( + current_declarations.back()->is_alias() + || ( + current_declarations.back()->is_function() + && current_declarations.back() == having_signature_emitted + ) + ) + && ( + current_declarations.back()->parent_is_namespace() + || current_declarations.back()->parent_is_type() + ) + ) + ) + { + ufcs_string += "_NONLOCAL"; + } + + // Second, emit the UFCS argument list + + prefix.emplace_back(ufcs_string + "(" + funcname + ")(", args.value().open_pos ); + suffix.emplace_back(")", args.value().close_pos ); + if (!args.value().text_chunks.empty()) { + for (auto&& e: args.value().text_chunks) { + suffix.push_back(e); + } + suffix.emplace_back(", ", i->op->position()); + } + args.reset(); + } + + // Handle the Cpp2 postfix operators that are prefix in Cpp1 + // + else if ( + i->op->type() == lexeme::MinusMinus + || i->op->type() == lexeme::PlusPlus + || i->op->type() == lexeme::Multiply + || i->op->type() == lexeme::Ampersand + || i->op->type() == lexeme::Tilde + ) + { + // omit some needless parens + if ( + !last_was_prefixed + && i != n.ops.rbegin() + ) + { + prefix.emplace_back( "(", i->op->position() ); + } + prefix.emplace_back( i->op->to_string(), i->op->position()); + + // Enable null dereference checks + if ( + flag_safe_null_pointers + && i->op->type() == lexeme::Multiply + ) + { + prefix.emplace_back( "cpp2::assert_not_null(", i->op->position() ); + } + if ( + flag_safe_null_pointers + && i->op->type() == lexeme::Multiply + ) + { + suffix.emplace_back( ")", i->op->position() ); + } + + // omit some needless parens + if ( + !last_was_prefixed + && i != n.ops.rbegin() + ) + { + suffix.emplace_back( ")", i->op->position() ); + } + last_was_prefixed = true; + } + + // Handle the other Cpp2 postfix operators that stay postfix in Cpp1 (currently: '...') + else if (is_postfix_operator(i->op->type())) { + flush_args(); + suffix.emplace_back( i->op->to_string(), i->op->position()); + } + + // Handle the suffix operators that remain suffix + // + else { + assert(i->op); + last_was_prefixed = false; + + // Enable subscript bounds checks + if ( + flag_safe_subscripts + && i->op->type() == lexeme::LeftBracket + && std::ssize(i->expr_list->expressions) == 1 + ) + { + suffix.emplace_back( ")", i->op->position() ); + } + else if (i->op_close) { + suffix.emplace_back( i->op_close->to_string(), i->op_close->position() ); + } + + if (i->id_expr) + { + if (args) { + // If args are stored it means that this is function or method + // that is not handled by UFCS and args need to be printed + suffix.emplace_back(")", args.value().close_pos); + for (auto&& e: args.value().text_chunks) { + suffix.push_back(e); + } + suffix.emplace_back("(", args.value().open_pos); + args.reset(); + } + + auto print = print_to_string(*i->id_expr, false /*not a local name*/); + suffix.emplace_back( print, i->id_expr->position() ); + } + + if (i->expr_list) { + auto text = print_to_text_chunks(*i->expr_list); + for (auto&& e: text) { + suffix.push_back(e); + } + } + + // Enable subscript bounds checks + if ( + flag_safe_subscripts + && i->op->type() == lexeme::LeftBracket + && std::ssize(i->expr_list->expressions) == 1 + ) + { + prefix.emplace_back( "CPP2_ASSERT_IN_BOUNDS(", i->op->position() ); + suffix.emplace_back( ", ", i->op->position() ); + } + else { + suffix.emplace_back( i->op->to_string(), i->op->position() ); + } + } + } + + // Print the prefixes (in forward order) + for (auto& e : prefix) { + printer.print_cpp2(e.text, n.position()); + } + + // If this is an --, ++, or &, don't add std::move on the lhs + // even if this is a definite last use (only do that when an rvalue is okay) + if ( + n.ops.front().op->type() == lexeme::MinusMinus + || n.ops.front().op->type() == lexeme::PlusPlus + || n.ops.front().op->type() == lexeme::Ampersand + ) + { + suppress_move_from_last_use = true; + } + + // Now print the core expression -- or the captured_part in its place + if (captured_part.empty()) { + emit(*n.expr); + } + else { + printer.print_cpp2(captured_part, n.position()); + } + suppress_move_from_last_use = false; + + flush_args(); + + // Print the suffixes (in reverse order) + while (!suffix.empty()) { + printer.print_cpp2(suffix.back().text, suffix.back().pos); + suffix.pop_back(); + } + } + + + //----------------------------------------------------------------------- + // + auto emit(prefix_expression_node const& n) + -> void + { STACKINSTR + auto suffix = std::string{}; + for (auto const& x : n.ops) { + assert(x); + if (x->type() == lexeme::Not) { + printer.print_cpp2("!(", n.position()); + printer.add_pad_in_this_line(-3); + suffix += ")"; + } + else { + printer.print_cpp2(*x, x->position()); + } + } + assert(n.expr); + push_need_expression_list_parens(true); + emit(*n.expr); + pop_need_expression_list_parens(); + printer.print_cpp2(suffix, n.position()); + } + + + //----------------------------------------------------------------------- + // + auto emit(is_as_expression_node const& n) + -> void + { STACKINSTR + std::string prefix = {}; + std::string suffix = {}; + + auto wildcard_found = false; + bool as_on_literal = false; + + assert( + n.expr + && n.expr->get_postfix_expression_node() + && n.expr->get_postfix_expression_node()->expr + ); + { + auto& p = n.expr->get_postfix_expression_node()->expr; + if (auto t = p->get_token(); + t + && is_literal(t->type()) + && t->type() != lexeme::StringLiteral + && t->type() != lexeme::FloatLiteral + && !std::get<primary_expression_node::literal>(p->expr)->user_defined_suffix + && std::ssize(n.ops) > 0 + && *n.ops[0].op == "as" + ) + { + as_on_literal = true; + } + } + + for ( + auto i = n.ops.rbegin(); + i != n.ops.rend(); + ++i + ) + { + // If it's "ISORAS type", emit "cpp2::ISORAS<type>(expr)" + if (i->type) + { + if (i->type->is_wildcard()) { + wildcard_found = true; + if (*i->op != "is") { + errors.emplace_back( + n.position(), + "'as _' wildcard is not allowed, specify a concrete target type instead" + ); + } + else if (std::ssize(n.ops) > 1) { + errors.emplace_back( + n.position(), + "an 'is _' wildcard may only be used on its own, not in a chain with other 'is'/'as' in the same subexpression" + ); + } + } + else { + auto op_name = i->op->to_string(); + if (op_name == "as") { + op_name = "as_"; // use the static_assert-checked 'as' by default... + } // we'll override this inside inspect-expressions + prefix += "cpp2::" + op_name + "<" + print_to_string(*i->type) + ">("; + suffix = ")" + suffix; + } + } + // Else it's "is value", emit "cpp2::is(expr, value)" + else + { + assert(i->expr); + prefix += "cpp2::" + i->op->to_string() + "("; + suffix = ", " + print_to_string(*i->expr) + ")" + suffix; + } + } + + if (as_on_literal) { + auto last_pos = prefix.rfind('>'); assert(last_pos != prefix.npos); + prefix.insert(last_pos, ", " + print_to_string(*n.expr)); + } + + printer.print_cpp2(prefix, n.position()); + if (wildcard_found) { + printer.print_cpp2("true", n.position()); + } + else if(!as_on_literal) { + emit(*n.expr); + } + printer.print_cpp2(suffix, n.position()); + } + + + //----------------------------------------------------------------------- + // + template< + String Name, + typename Term + > + auto emit(binary_expression_node<Name,Term> const& n) + -> void + { STACKINSTR + assert(n.expr); + assert( + n.terms.empty() + || n.terms.front().op + ); + + // If this is relational comparison + if ( + !n.terms.empty() + && ( + n.terms.front().op->type() == lexeme::Less + || n.terms.front().op->type() == lexeme::LessEq + || n.terms.front().op->type() == lexeme::Greater + || n.terms.front().op->type() == lexeme::GreaterEq + || n.terms.front().op->type() == lexeme::EqualComparison + || n.terms.front().op->type() == lexeme::NotEqualComparison + ) + ) + { + auto const& op = *n.terms.front().op; + + // If this is one (non-chained) comparison, just emit it directly + if (std::ssize(n.terms) < 2) + { + assert (std::ssize(n.terms) == 1); + + // emit < <= >= > as cmp_*(a,b) calls (if selected) + if (flag_safe_comparisons) { + switch (op.type()) { + break;case lexeme::Less: + printer.print_cpp2( "cpp2::cmp_less(", n.position()); + break;case lexeme::LessEq: + printer.print_cpp2( "cpp2::cmp_less_eq(", n.position()); + break;case lexeme::Greater: + printer.print_cpp2( "cpp2::cmp_greater(", n.position()); + break;case lexeme::GreaterEq: + printer.print_cpp2( "cpp2::cmp_greater_eq(", n.position()); + break;default: + ; + } + } + + emit(*n.expr); + + // emit == and != as infix a ? b operators (since we don't have + // any checking/instrumentation we want to do for those) + if (flag_safe_comparisons) { + switch (op.type()) { + break;case lexeme::EqualComparison: + case lexeme::NotEqualComparison: + printer.print_cpp2( " ", n.position() ); + emit(op); + printer.print_cpp2( " ", n.position() ); + break;default: + printer.print_cpp2( ",", n.position() ); + } + } + else { + emit(op); + } + + emit(*n.terms.front().expr); + + if (flag_safe_comparisons) { + switch (op.type()) { + break;case lexeme::Less: + case lexeme::LessEq: + case lexeme::Greater: + case lexeme::GreaterEq: + printer.print_cpp2( ")", n.position() ); + break;default: + ; + } + } + + return; + } + + // Else if this is a chained comparison, emit it as a lambda, + // to get single evaluation via the lambda capture + else + { + // To check for the valid chains: all </<=, all >/>=, or all == + auto found_lt = 0; // < and <= + auto found_gt = 0; // > and >= + auto found_eq = 0; // == + auto count = 0; + + auto const* lhs = n.expr.get(); + auto lhs_name = "_" + std::to_string(count); + + auto lambda_capture = lhs_name + " = " + print_to_string(*lhs); + auto lambda_body = std::string{}; + + for (auto const& term : n.terms) + { + assert( + term.op + && term.expr + ); + ++count; + auto rhs_name = "_" + std::to_string(count); + + // Not the first expression? Insert a "&&" + if (found_lt + found_gt + found_eq > 0) { + lambda_body += " && "; + } + + // Remember what we've seen + switch (term.op->type()) { + break;case lexeme::Less: + case lexeme::LessEq: + found_lt = 1; + break;case lexeme::Greater: + case lexeme::GreaterEq: + found_gt = 1; + break;case lexeme::EqualComparison: + found_eq = 1; + break;default: + ; + } + + // emit < <= >= > as cmp_*(a,b) calls (if selected) + if (flag_safe_comparisons) { + switch (term.op->type()) { + break;case lexeme::Less: + lambda_body += "cpp2::cmp_less("; + break;case lexeme::LessEq: + lambda_body += "cpp2::cmp_less_eq("; + break;case lexeme::Greater: + lambda_body += "cpp2::cmp_greater("; + break;case lexeme::GreaterEq: + lambda_body += "cpp2::cmp_greater_eq("; + break;default: + ; + } + } + + auto rhs_expr = print_to_string(*term.expr); + + lambda_body += lhs_name; + + // emit == and != as infix a ? b operators (since we don't have + // any checking/instrumentation we want to do for those) + if (flag_safe_comparisons) { + switch (term.op->type()) { + break;case lexeme::EqualComparison: + lambda_body += *term.op; + break;case lexeme::NotEqualComparison: + errors.emplace_back( + n.position(), + "!= comparisons cannot appear in a comparison chain (see https://wg21.link/p0893)" + ); + return; + break;default: + lambda_body += ","; + } + } + else { + lambda_body += *term.op; + } + + lambda_capture += ", " + rhs_name + " = " + rhs_expr; + lambda_body += rhs_name; + + lhs = term.expr.get(); + lhs_name = rhs_name; + + if (flag_safe_comparisons) { + switch (term.op->type()) { + break;case lexeme::Less: + case lexeme::LessEq: + case lexeme::Greater: + case lexeme::GreaterEq: + lambda_body += ")"; + break;default: + ; + } + } + } + + assert(found_lt + found_gt + found_eq > 0); + if (found_lt + found_gt + found_eq != 1) { + errors.emplace_back( + n.position(), + "a comparison chain must be all < and <=, all > and >=, or all == (see https://wg21.link/p0893)" + ); + return; + } + + printer.print_cpp2( "[" + lambda_capture + "]{ return " + lambda_body + "; }()", n.position()); + + return; + } + } + + // Else if this is an assignment expression, don't add std::move on the lhs + // even if this is a definite last use (only do that when an rvalue is okay) + if ( + !n.terms.empty() + && is_assignment_operator(n.terms.front().op->type()) + ) + { + suppress_move_from_last_use = true; + } + // If it's "_ =" then emit static_cast<void>() + bool emit_discard = false; + if ( + !n.terms.empty() + && n.terms.front().op->type() == lexeme::Assignment + && n.expr->get_postfix_expression_node() + && n.expr->get_postfix_expression_node()->get_first_token_ignoring_this() + && *n.expr->get_postfix_expression_node()->get_first_token_ignoring_this() == "_" + ) + { + printer.print_cpp2( "static_cast<void>(", n.position() ); + emit_discard = true; + } + else + { + emit(*n.expr); + } + suppress_move_from_last_use = false; + + // Check that this isn't an illegal pointer operation + // (initial partial implementation) + if ( + !n.terms.empty() + && last_postfix_expr_was_pointer + ) + { + auto rhs_post = n.get_second_postfix_expression_node(); + assert( + rhs_post + && rhs_post->expr + ); + auto rhs_tok = rhs_post->expr->get_token(); + if ( + is_assignment_operator(n.terms.front().op->type()) + && rhs_tok + && ( + *rhs_tok == "nullptr" + || is_digit((rhs_tok->as_string_view())[0]) + ) + ) + { + errors.emplace_back( + n.terms.front().op->position(), + n.terms.front().op->to_string() + " - pointer assignment from null or integer is illegal" + ); + violates_lifetime_safety = true; + } + else if ( + *n.terms.front().op == "+" + || *n.terms.front().op == "+=" + || *n.terms.front().op == "-" + || *n.terms.front().op == "-=" + ) + { + errors.emplace_back( + n.terms.front().op->position(), + n.terms.front().op->to_string() + " - pointer arithmetic is illegal - use std::span or gsl::span instead" + ); + violates_bounds_safety = true; + } + } + + auto first = true; + for (auto const& x : n.terms) { + assert(x.op); + assert(x.expr); + + // Normally we'll just emit the operator, but if this is an + // assignment that's a definite initialization, change it to + // a .construct() call + if ( + x.op->type() == lexeme::Assignment + && in_definite_init + ) + { + printer.print_cpp2( ".construct(", n.position() ); + emit(*x.expr); + printer.print_cpp2( ")", n.position() ); + } + else + { + // For the first operator only, if we are emitting a "_ =" discard + // then we don't need the = + if ( + !emit_discard + || !first + ) { + printer.print_cpp2(" ", n.position()); + emit(*x.op); + printer.print_cpp2(" ", n.position()); + } + + // When assigning a single expression-list, we can + // take over direct control of emitting it without needing to + // go through the whole grammar, and surround it with braces + if ( + x.op->type() == lexeme::Assignment + && x.expr->is_expression_list() + ) + { + printer.print_cpp2( "{ ", n.position() ); + emit(*x.expr->get_expression_list(), false); + printer.print_cpp2( " }", n.position() ); + } + // Otherwise, just emit the general expression as usual + else { + emit(*x.expr); + } + } + + first = false; + } + // Finish emitting the "_ =" discard. + if (emit_discard) { + printer.print_cpp2( ")", n.position() ); + } + } + + + //----------------------------------------------------------------------- + // + auto emit(expression_node const& n) + -> void + { STACKINSTR + assert(n.expr); + push_need_expression_list_parens(true); + emit(*n.expr); + pop_need_expression_list_parens(); + } + + + //----------------------------------------------------------------------- + // + auto emit( + expression_list_node const& n, + bool parens_ok = true + ) + -> void + { STACKINSTR + auto add_parens = + should_add_expression_list_parens() + && !n.inside_initializer + && parens_ok + ; + add_parens |= + n.is_fold_expression() && + !(n.inside_initializer && current_declarations.back()->initializer->position() != n.open_paren->position()) + ; + if (add_parens) { + printer.print_cpp2( *n.open_paren, n.position()); + } + + auto first = true; + for (auto const& x : n.expressions) { + if (!first) { + printer.print_cpp2(", ", n.position()); + } + first = false; + auto is_out = false; + + if (x.pass != passing_style::in) { + assert( + x.pass == passing_style::out + || x.pass == passing_style::move + || x.pass == passing_style::forward + ); + if (x.pass == passing_style::out) { + is_out = true; + printer.print_cpp2("cpp2::out(&", n.position()); + } + else if (x.pass == passing_style::move) { + printer.print_cpp2("std::move(", n.position()); + } + } + + if (is_out) { + in_non_rvalue_context.push_back(true); + } + + assert(x.expr); + current_args.push_back( {x.pass} ); + emit(*x.expr); + current_args.pop_back(); + + if (is_out) { + in_non_rvalue_context.pop_back(); + } + + if ( + x.pass == passing_style::move + || x.pass == passing_style::out + ) + { + printer.print_cpp2(")", n.position()); + } + } + + if (add_parens) { + printer.print_cpp2( *n.close_paren, n.position()); + } + // We want to consume only one of these + consumed_expression_list_parens(); + } + + + //----------------------------------------------------------------------- + // + auto emit( + expression_statement_node const& n, + bool can_have_semicolon, + source_position function_body_start = {}, + bool function_void_ret = false, + function_prolog const& function_prolog = {}, + std::vector<std::string> const& function_epilog = {}, + bool emitted = false + ) + -> void + { STACKINSTR + assert(n.expr); + auto generating_return = false; + + if (function_body_start != source_position{}) { + emit_prolog_mem_inits(function_prolog, n.position().colno); + printer.print_cpp2(" { ", function_body_start); + emit_prolog_statements(function_prolog, n.position().colno); + if (!function_void_ret) { + printer.print_cpp2("return ", n.position()); + generating_return = true; + } + } + + if (!emitted) { + // When generating 'return' of a single expression-list, we can + // take over direct control of emitting it without needing to + // go through the whole grammar, and surround it with braces + if ( + generating_return + && n.expr->is_expression_list() + && !n.expr->get_expression_list()->is_fold_expression() + ) + { + auto is_deduced_return = + !function_returns.empty() + && function_returns.back().is_deduced; + + if (!is_deduced_return) { + printer.print_cpp2( "{ ", n.position() ); + } + emit(*n.expr->get_expression_list(), false); + if (!is_deduced_return) { + printer.print_cpp2( " }", n.position() ); + } + } + // Otherwise, just emit the general expression as usual + else { + emit(*n.expr); + } + if (can_have_semicolon) { + printer.print_cpp2(";", n.position()); + } + } + + if (function_body_start != source_position{}) { + emit_epilog_statements( function_epilog, n.position().colno); + printer.print_cpp2(" }", n.position()); + } + } + + + // Consider moving these `stack` functions to `common.h` to enable more general use. + + template<typename T> + auto stack_value( + T& var, + std::type_identity_t<T> const& value + ) + -> auto + { + return finally([&var, old = std::exchange(var, value)]() { + var = old; + }); + } + + template<typename T> + auto stack_element( + std::vector<T>& cont, + std::type_identity_t<T> const& value + ) + -> auto + { + cont.push_back(value); + return finally([&]{ cont.pop_back(); }); + } + + template<typename T> + auto stack_size(std::vector<T>& cont) + -> auto + { + return finally([&, size = cont.size()]{ cont.resize(size); }); + } + + template<typename T> + auto stack_size_if( + std::vector<T>& cont, + bool cond + ) + -> std::optional<decltype(stack_size(cont))> + { + if (cond) { + return stack_size(cont); + } + return {}; + } + + //----------------------------------------------------------------------- + // + auto emit( + statement_node const& n, + bool can_have_semicolon = true, + source_position function_body_start = {}, + bool function_void_ret = false, + function_prolog const& function_prolog = {}, + std::vector<std::string> const& function_epilog = {} + ) + -> void + { STACKINSTR + if (!sema.check(n)) { + return; + } + + auto emit_parameters = + !n.emitted + && n.parameters + ; + + auto guard = stack_size_if(current_names, emit_parameters); + if (emit_parameters) { + printer.print_extra( "\n"); + printer.print_extra( "{"); + for (auto& param : n.parameters->parameters) { + printer.print_extra( "\n"); + printer.print_extra( print_to_string(*param) ); + } + } + + // Do expression statement case first... it's the most complex + // because it's used for single-statement function bodies + try_emit<statement_node::expression >( + n.statement, + can_have_semicolon, + function_body_start, + function_void_ret, + function_prolog, + function_epilog, + n.emitted + ); + + // Otherwise, skip this one if it was already handled earlier (i.e., a constructor member init) + if (n.emitted) { + return; + } + + printer.disable_indent_heuristic_for_next_text(); + + try_emit<statement_node::compound >(n.statement, function_prolog, function_epilog); + + // NOTE: Reset preemption here because + // - for compound statements written as "= { ... }", we want to keep the + // preempted position which moves the { to where the = was + // - but for other statement types, we want to get rid of any leftover + // preemption (ideally there wouldn't be any, but sometimes there is + // and it should not apply to what we're about to emit) + printer.preempt_position_push({}); + // This only has a whitespace effect in the generated Cpp1 code, but it's + // aesthetic and aesthetics are important in this case -- we want to keep + // the original source's personal whitespace formatting style as much as we can + + try_emit<statement_node::selection >(n.statement); + try_emit<statement_node::declaration>(n.statement); + try_emit<statement_node::return_ >(n.statement); + try_emit<statement_node::iteration >(n.statement); + try_emit<statement_node::using_ >(n.statement); + try_emit<statement_node::contract >(n.statement); + try_emit<statement_node::inspect >(n.statement, false); + try_emit<statement_node::jump >(n.statement); + + printer.preempt_position_pop(); + + if (emit_parameters) { + printer.print_extra( "\n"); + printer.print_extra( "}"); + } + } + + + //----------------------------------------------------------------------- + // Within a type scope implementation, disallow declaring a name that + // is the same as (i.e., shadows) a type scope name... this is a + // convenient place to check because we have the decls stack + // + auto check_shadowing_of_type_scope_names( + declaration_node const& decl + ) + -> bool + { + if ( + decl.has_name() // this is a named declaration + && !decl.has_name("this") // that's not 'this' + && !decl.parent_is_type() // and the type isn't the direct parent + && is_name_declared_in_current_type_scope(*decl.name()) + ) // and it shadows a name + { + errors.emplace_back( + decl.position(), + "a type's implementation may not declare a name that is the same as (i.e., shadows) a type scope name - for example, a type scope function's local variable may not have the same as one of the type's members" + ); + return false; + } + + return true; + } + + + //----------------------------------------------------------------------- + // + auto emit( + parameter_declaration_node const& n, + bool is_returns = false, + bool is_template_parameter = false + ) + -> void + { STACKINSTR + if (!sema.check(n)) { + return; + } + + // Can't declare functions as parameters -- only pointers to functions which are objects + assert( n.declaration ); + assert( !n.declaration->is_function() ); + + if (!check_shadowing_of_type_scope_names(*n.declaration)) { + return; + } + + assert( n.declaration->identifier ); + auto identifier = print_to_string( *n.declaration->identifier ); + auto identifier_pos = n.position(); + + if (n.mod == parameter_declaration_node::modifier::implicit) + { + assert(!current_functions.empty()); + if ( + n.pass != passing_style::out + || !current_functions.back().decl->has_name("operator=") + ) + { + errors.emplace_back( + n.position(), + "only an 'out this' parameter of an 'operator=' function may be declared implicit" + ); + } + } + + current_names.push_back(&*n.declaration); + + //----------------------------------------------------------------------- + // Skip 'this' parameters + + if (n.declaration->has_name("this")) + { + // Since we're skipping "out this," plus possibly "implicit " and + // whitespace, any following parameters on the same line can shift left + printer.add_pad_in_this_line(-18); + + return; + } + + //----------------------------------------------------------------------- + // Handle 'that' parameters + + if (n.declaration->has_name("that")) + { + emitting_that_function = true; + assert( + n.pass == passing_style::in + || n.pass == passing_style::move + ); + auto pass = std::string{" const&"}; + if ( + n.pass == passing_style::move + || emitting_move_that_function + ) + { + pass = "&&"; + } + + auto func_name = get_enclosing_function_name(); + assert(func_name); + + auto type_name = get_enclosing_type_name(); + assert(type_name); + + // If we're in an empty type that has no member object, mark 'that' as + // [[maybe_unused]] to silence Cpp1 compiler warnings + assert(!current_functions.empty()); + auto maybe_unused = std::string{}; + if (current_functions.back().decl->get_parent()->get_type_scope_declarations(declaration_node::objects).empty()) { + maybe_unused = "[[maybe_unused]] "; + } + + printer.print_cpp2( + maybe_unused + print_to_string( *type_name ) + pass + " that", + n.position() + ); + return; + } + + //----------------------------------------------------------------------- + // Handle type parameters + + if (n.declaration->is_type()) { + assert( is_template_parameter ); + printer.print_cpp2("typename ", identifier_pos); + if (n.declaration->is_variadic) { + printer.print_cpp2( + "...", + identifier_pos + ); + } + + if (identifier == "_") { + printer.print_cpp2( "UnnamedTypeParam" + std::to_string(n.ordinal), identifier_pos ); + } + else { + printer.print_cpp2( identifier, identifier_pos ); + } + + return; + } + + //----------------------------------------------------------------------- + // Else handle template non-type parameters + + assert( n.declaration->is_object() ); + auto const& type_id = *std::get<declaration_node::an_object>(n.declaration->type); + + if (is_template_parameter) { + emit( type_id ); + printer.print_cpp2(" ", type_id.position()); + printer.print_cpp2( identifier, identifier_pos ); + return; + } + + //----------------------------------------------------------------------- + // Else handle ordinary parameters + + auto param_type = print_to_string(type_id); + + // If there are template parameters on this function or its enclosing + // type, see if this parameter's name is an unqualified-id with a + // template parameter name, or mentions a template parameter as a + // template argument + auto has_template_parameter_type_named = []( + declaration_node const& decl, + std::string_view name + ) + -> bool + { + if (decl.template_parameters) { + for (auto& tparam : decl.template_parameters->parameters) + { + assert( + tparam + && tparam->name() + ); + // For now just do a quick string match + auto tparam_name = tparam->name()->to_string(); + if ( + tparam->declaration->is_type() + && ( + name == tparam_name + || name.find("<"+tparam_name) != std::string_view::npos + || name.find(","+tparam_name) != std::string_view::npos + ) + ) + { + return true; + } + } + } + return false; + }; + + assert( current_declarations.back() ); + auto is_dependent_parameter_type = + has_template_parameter_type_named( *current_declarations.back(), param_type ) + || ( + current_declarations.back()->parent_is_type() + && current_declarations.back()->has_name("operator=") + && has_template_parameter_type_named( *current_declarations.back()->get_parent(), param_type) + ) + ; + + // First any prefix + + if (identifier == "_") { + printer.print_cpp2( "[[maybe_unused]] ", identifier_pos ); + identifier = "unnamed_param_" + std::to_string(n.ordinal); + } + + if ( + !is_returns + && !n.declaration->is_variadic + && !type_id.is_wildcard() + && !is_dependent_parameter_type + && !type_id.is_pointer_qualified() + ) + { + switch (n.pass) { + break;case passing_style::in : printer.print_cpp2( "cpp2::in<", n.position() ); + break;case passing_style::out : printer.print_cpp2( "cpp2::out<", n.position() ); + break;default: ; + } + } + + printer.preempt_position_push( n.position() ); + + if ( + type_id.is_pointer_qualified() + && n.pass == passing_style::in + ) + { + printer.print_cpp2( param_type, n.position() ); + } + else if ( + type_id.is_wildcard() + || is_dependent_parameter_type + || n.declaration->is_variadic + ) + { + auto name = std::string{"auto"}; + if (is_dependent_parameter_type) { + name = param_type; + } + else if ( + n.declaration->is_variadic + && !type_id.is_wildcard() + ) + { + auto name = n.declaration->identifier->get_token(); + assert(name); + auto req = std::string{"(std::is_convertible_v<CPP2_TYPEOF("}; + req += *name; + req += "), "; + req += param_type; + req += "> && ...)"; + function_requires_conditions.push_back(req); + } + + switch (n.pass) { + break;case passing_style::in : printer.print_cpp2( name+" const&", n.position() ); + break;case passing_style::copy : printer.print_cpp2( name, n.position() ); + break;case passing_style::inout : printer.print_cpp2( name+"&", n.position() ); + + // For generic out parameters, we take a pointer to anything with paramater named "identifier_" + // and then generate the out<> as a stack local with the expected name "identifier" + break;case passing_style::out : printer.print_cpp2( name, n.position() ); + current_functions.back().prolog.statements.push_back( + "auto " + identifier + " = cpp2::out(" + identifier + "_); " + ); + identifier += "_"; + + break;case passing_style::move : printer.print_cpp2( name+"&&", n.position() ); + break;case passing_style::forward: printer.print_cpp2( name+"&&", n.position() ); + break;default: ; + } + } + else if (n.pass == passing_style::forward) { + printer.print_cpp2("auto", n.position()); + + auto name = n.declaration->identifier->get_token(); + assert(name); + auto req = std::string{"std::is_same_v<CPP2_TYPEOF("}; + req += *name; + req += "), "; + req += param_type; + req += ">"; + function_requires_conditions.push_back(req); + } + else { + if (is_returns) { + printer.print_extra( param_type ); + } + else { + printer.print_cpp2( param_type, type_id.position() ); + } + } + + printer.preempt_position_pop(); + + // Then any suffix + + if ( + !is_returns + && !type_id.is_wildcard() + && !is_dependent_parameter_type + && !type_id.is_pointer_qualified() + && !n.declaration->is_variadic + ) + { + switch (n.pass) { + break;case passing_style::in : printer.print_cpp2( ">", n.position() ); + break;case passing_style::copy : printer.print_cpp2( "", n.position() ); + break;case passing_style::inout : printer.print_cpp2( "&", n.position() ); + break;case passing_style::out : printer.print_cpp2( ">", n.position() ); + break;case passing_style::move : printer.print_cpp2( "&&", n.position() ); + break;case passing_style::forward: printer.print_cpp2( "&&", n.position() ); + break;default: ; + } + } + + if (is_returns) { + printer.print_extra( " " + identifier ); + } + else { + printer.print_cpp2( " ", identifier_pos ); + if (n.declaration->is_variadic) + { + if (n.direction() == passing_style::out) { + errors.emplace_back( + n.declaration->position(), + "a variadic parameter cannot be 'out'" + ); + return; + } + + printer.print_cpp2( + "...", + identifier_pos + ); + } + printer.print_cpp2( identifier, identifier_pos ); + } + + if ( + !is_returns + && n.declaration->initializer + ) + { + auto guard = stack_element(current_declarations, &*n.declaration); + printer.print_cpp2( " = ", n.declaration->initializer->position() ); + emit(*n.declaration->initializer); + } + } + + + //----------------------------------------------------------------------- + // + auto emit( + parameter_declaration_list_node const& n, + bool is_returns = false, + bool is_template_parameter = false, + bool generating_postfix_inc_dec = false + ) + -> void + { STACKINSTR + in_parameter_list = true; + + if (is_returns) { + printer.print_extra( "{ " ); + } + else { + assert(n.open_paren); + emit(*n.open_paren); + } + + // So we don't get cute about text-aligning the first parameter when it's on a new line + printer.disable_indent_heuristic_for_next_text(); + + auto prev_pos = n.position(); + auto first = true; + for (auto const& x : n.parameters) { + if ( + !first + && !is_returns + ) + { + printer.print_cpp2( ", ", prev_pos ); + } + prev_pos = x->position(); + assert(x); + emit(*x, is_returns, is_template_parameter); + if (!x->declaration->has_name("this")) { + first = false; + } + if (is_returns) { + printer.print_extra( "; " ); + } + } + + if (is_returns) { + printer.print_extra( "};\n" ); + } + else { + // If we're generating Cpp1 postfix ++ or --, add the dummy int parameter + if (generating_postfix_inc_dec) { + if (!first) { + printer.print_cpp2( ",", n.position() ); + } + printer.print_cpp2( "int", n.position() ); + } + + // Position heuristic (aka hack): Avoid emitting extra whitespace before ) + // beyond column 10 + assert(n.close_paren); + auto col = std::min( n.close_paren->position().colno, colno_t{10}); + printer.preempt_position_push({ n.close_paren->position().lineno, col}); + emit(*n.close_paren); + printer.preempt_position_pop(); + } + + in_parameter_list = false; + } + + + //----------------------------------------------------------------------- + // + auto emit( + // note: parameter is deliberately not const because we will fill + // in the capture .str information + contract_node& n + ) + -> void + { STACKINSTR + assert (n.kind); + + // If this is one of Cpp2's predefined contract groups, + // make it convenient to use without cpp2:: qualification + auto name = std::string{"cpp2::Default"}; + if (n.group) + { + auto group = print_to_string(*n.group); + if (group != "_") { + name = group; + } + if ( + name == "Default" + || name == "Bounds" + || name == "Null" + || name == "Type" + || name == "Testing" + ) + { + name.insert(0, "cpp2::"); + } + } + + // "Unevaluated" is for static analysis only, and are never evaluated, so just skip them + // (The only requirement for an Unevaluated condition is that it parses; and even that's + // easy to relax if we ever want to allow arbitrary tokens in an Unevaluated condition) + if (n.group && n.group->to_string() == "Unevaluated") { + return; + } + + // For a postcondition, we'll wrap it in a lambda and register it + // + if (*n.kind == "post") { + auto lambda_intro = build_capture_lambda_intro_for(n.captures, n.position(), true); + printer.print_cpp2( + "cpp2_finally_presuccess.add(" + + lambda_intro + "{", + n.position() + ); + } + + // Emit the contract group name, and report any violation to that group + // + assert(n.condition); + auto message = std::string{"\"\""}; + if (n.message) { + message = "CPP2_CONTRACT_MSG(" + print_to_string(*n.message) + ")"; + } + + printer.print_cpp2( + "if (" + name + ".has_handler()", + n.position() + ); + for (auto const& flag : n.flags) { + printer.print_cpp2( + " && " + print_to_string(*flag), + n.position() + ); + } + printer.print_cpp2( + " && !(" + print_to_string(*n.condition) + ") ) " + + "{ " + name + ".report_violation(" + message + "); }", + n.position() + ); + + // For a postcondition, close out the lambda + // + if (*n.kind == "post") { + printer.print_cpp2( "} );", n.position() + ); + } + } + + + //----------------------------------------------------------------------- + // + auto emit( + function_type_node const& n, + token const* ident, + bool is_main = false, + bool is_ctor_or_dtor = false, + std::string suffix1 = {}, + bool generating_postfix_inc_dec = false + ) + -> void + { STACKINSTR + if (!sema.check(n)) { + return; + } + + if ( + is_main + && n.parameters->parameters.size() > 0 + ) + { + printer.print_cpp2( + "(int const argc_, char** argv_)", + n.parameters->position() + ); + current_functions.back().prolog.statements.push_back( + "auto const args = cpp2::make_args(argc_, argv_); " + ); + } + else { + emit(*n.parameters, false, false, generating_postfix_inc_dec); + } + + // For an anonymous function, the emitted lambda is 'constexpr' or 'mutable' + if (!n.my_decl->has_name()) + { + if (n.my_decl->is_constexpr) { + // The current design path we're trying out is for all '==' functions to be + // emitted as Cpp1 'constexpr', including anonymous functions. For anonymous + // functions that have captures, the intent is that '==' implies "the result + // always the same (depends only on the arguments)." Specifically, the result + // doesn't depend on the captured state, so the captured state should be const. + // But until we want to take a dependency on post-C++20 constexpr relaxation + // to make more code work with 'constexpr' even when not invoked in constexpr + // contexts, we will emit it as const/whitespace instead for now. + // + // printer.print_cpp2( " constexpr", n.position() ); + // // consider enabling when P2242, P2280, and similar papers are widely implemented + } + else { + printer.print_cpp2( " mutable", n.position() ); + } + } + + // For now, adding implicit noexcept only for move/swap/dtor functions + if ( + n.is_move() + || n.is_swap() + || n.is_destructor() + || generating_move_from == n.my_decl + ) + { + printer.print_cpp2( " noexcept", n.position() ); + } + + printer.print_cpp2( suffix1, n.position() ); + + // Handle a special member function + if ( + n.is_assignment() + || generating_assignment_from == n.my_decl + ) + { + assert( + n.returns.index() == function_type_node::empty + && n.my_decl->parent_declaration->name() + ); + printer.print_cpp2( + " -> " + print_to_string( *n.my_decl->parent_declaration->name() ) + "& ", + n.position() + ); + } + + // Otherwise, handle a default return type + else if (n.returns.index() == function_type_node::empty) + { + if (is_main) + { + printer.print_cpp2( " -> int", n.position() ); + } + else if(!is_ctor_or_dtor) + { + printer.print_cpp2( " -> void", n.position() ); + } + } + + // Otherwise, handle a single anonymous return type + else if (n.returns.index() == function_type_node::id) + { + auto is_type_scope_function_with_in_this = + n.my_decl->parent_is_type() + && n.parameters->ssize() > 0 + && (*n.parameters)[0]->direction() == passing_style::in + ; + + printer.print_cpp2( " -> ", n.position() ); + auto& r = std::get<function_type_node::id>(n.returns); + assert(r.type); + + auto return_type = print_to_string(*r.type); + + if (r.pass == passing_style::forward) { + if (r.type->is_wildcard()) { + printer.print_cpp2( "auto&&", n.position() ); + } + else { + printer.print_cpp2( return_type, n.position() ); + if (is_type_scope_function_with_in_this) { + printer.print_cpp2( " const&", n.position() ); + } + else if (!generating_postfix_inc_dec) { + printer.print_cpp2( "&", n.position() ); + } + } + } + else { + printer.print_cpp2( return_type, n.position() ); + } + } + + // Otherwise, handle multiple/named returns + else { + printer.print_cpp2( " -> ", n.position() ); + function_return_name = {}; + printer.emit_to_string(&function_return_name); + assert(ident); + printer.print_cpp2( *ident, ident->position() ); + printer.print_cpp2( "_ret", ident->position() ); + printer.emit_to_string(); + printer.print_cpp2( function_return_name, ident->position() ); + } + } + + + //----------------------------------------------------------------------- + // + auto is_name_declared_in_current_type_scope(std::string_view s) + -> bool + { + if (!s.empty()) + { + // Navigate to the enclosing type, if there is one... + for (auto parent = current_declarations.rbegin(); + parent != current_declarations.rend(); + ++parent + ) + { + if ( + *parent + && (*parent)->is_namespace() + ) + { + break; + } + // ... and here it is, so... + if ( + *parent + && (*parent)->is_type() + ) + { + // ... for each of its type scope decls... + for (auto const& decl : (*parent)->get_type_scope_declarations()) + { + // ... check the name + if (decl->has_name(s)) + { + return true; + } + } + break; + } + } + } + return false; + } + + + //----------------------------------------------------------------------- + // + auto get_enclosing_type_name() + -> token const* + { + // Navigate to the enclosing type, if there is one... + for (auto parent = current_declarations.rbegin(); + parent != current_declarations.rend(); + ++parent + ) + { + if ( + *parent + && (*parent)->is_namespace() + ) + { + break; + } + // ... and here it is, so... + if ( + *parent + && (*parent)->is_type() + ) + { + return (*parent)->name(); + } + } + return {}; + } + + + //----------------------------------------------------------------------- + // + auto get_enclosing_function_name() + -> token const* + { + // Navigate to the enclosing function, if there is one... + for (auto parent = current_declarations.rbegin(); + parent != current_declarations.rend(); + ++parent + ) + { + if ( + *parent + && (*parent)->is_namespace() + ) + { + break; + } + // ... and here it is, so... + if ( + *parent + && (*parent)->is_function() + ) + { + return (*parent)->name(); + } + } + return {}; + } + + + //----------------------------------------------------------------------- + // Helper to emit type-qualified names for member functions + // + auto type_qualification_if_any_for( + declaration_node const& n + ) + -> std::string + { + auto ret = std::string{}; + + if ( + printer.get_phase() == printer.phase2_func_defs + && n.parent_is_type() +// && !n.name()->as_string_view().starts_with("operator") + ) + { + // If this function is inside templated type(s), + // emit those outer template parameter lists too + auto parent = n.parent_declaration; + while ( + parent + && parent->is_type() + ) + { + auto list = std::string{""}; + if (parent->template_parameters) { + auto separator = std::string{"<"}; + for (auto& tparam : parent->template_parameters->parameters) { + assert (tparam->has_name()); + list += separator + tparam->name()->to_string(); + separator = ","; + } + list += ">"; + } + ret = print_to_string(*parent->identifier) + list + "::" + ret; + parent = parent->parent_declaration; + } + } + + return ret; + } + + //----------------------------------------------------------------------- + // Constructors and assignment operators + // + auto emit_special_member_function( + declaration_node const& n, + std::string prefix + ) + -> void + { STACKINSTR + assert(n.is_function()); + auto& func = std::get<declaration_node::a_function>(n.type); + assert(func); + + auto is_assignment = + generating_assignment_from == &n + || (*func->parameters)[0]->pass == passing_style::inout; + + if ( + func->parameters->ssize() > 1 + && (*func->parameters)[1]->has_name("that") + ) + { + emitting_that_function = true; + if ( + (*func->parameters)[1]->pass == passing_style::move + || generating_move_from == &n + ) + { + emitting_move_that_function = true; + } + } + + // Do the 'out' param and member init work only in the definition phase + if (printer.get_phase() == printer.phase2_func_defs) + { + auto canonize_object_name = [&]( declaration_node const* obj ) + -> std::string + { + assert(obj->has_name()); + auto ret = obj->name()->to_string(); + if (ret == "this") { + ret = print_to_string( *obj->get_object_type() ); + } + return ret; + }; + + // We'll use this common guidance in several errors, + // so write it once to keep the guidance consistent + assert (n.parent_declaration && n.parent_declaration->name()); + auto error_msg = "an operator= body must start with a series of 'member = value;' initialization statements for each of the type-scope objects in the same order they are declared, or the member must have a default initializer (in type '" + n.parent_declaration->name()->to_string() + "')"; + + // If this constructor's type has data members, handle their initialization + // - objects is the list of this type's declarations + // - statements is the list of this constructor's statements + auto objects = n.parent_declaration->get_type_scope_declarations(n.objects); + auto statements = n.get_initializer_statements(); + auto out_inits = std::vector<std::string>{}; + + auto object = objects.begin(); + auto statement = statements.begin(); + auto separator = std::string{": "}; + + while (object != objects.end()) + { + auto object_name = canonize_object_name(*object); + + auto is_object_before_base = + n.get_decl_if_type_scope_object_name_before_a_base_type(*(*object)->name()); + + auto found_explicit_init = false; + auto found_default_init = false; + auto stmt_pos = n.position(); + + auto initializer = std::string{}; + + // If we're at an assignment statement, get the lhs and rhs + if (statement != statements.end()) + { + assert (*statement); + stmt_pos = (*statement)->position(); + if (stmt_pos.lineno < 0) { + stmt_pos = n.position(); + } + + auto lhs = std::string{}; + auto rhs = std::string{}; + { + auto exprs = (*statement)->get_lhs_rhs_if_simple_assignment(); + if (exprs.lhs) { + if (auto tok = exprs.lhs->get_first_token_ignoring_this()) { + lhs = *tok; + } + else { + lhs = print_to_string( *exprs.lhs ); + } + } + if (exprs.rhs) { + rhs = print_to_string( *exprs.rhs ); + } + } + + // If this is an initialization of an 'out' parameter, stash it + if (n.has_out_parameter_named(lhs)){ + out_inits.push_back( print_to_string(**statement, false) ); + (*statement)->emitted = true; + ++statement; + continue; + } + + // Now we're ready to check whether this is an assignment to *object + + if (!lhs.empty()) + { + // First, see if it's an assignment 'name = something' + found_explicit_init = object_name == lhs; + + // Otherwise, see if it's 'this.name = something' + if (!found_explicit_init) + { + // If it's of the form 'this.name', check 'name' + if ( + starts_with( lhs, "(*this).") + && object_name == lhs.substr(8) + ) + { + found_explicit_init = true; + } + } + + if (found_explicit_init) + { + initializer = rhs; + + // We've used this statement, so note it + // and move 'statement' forward + (*statement)->emitted = true; + ++statement; + } + } + } + + // Otherwise, use a default... for a non-copy/move that's the member initializer + // (for which we don't need to emit anything special because it will get used), + // and for a copy/move function we default to "= that.same_member" (or, if this + // is a base type, to assigning from the lowered base subobject) + if (!found_explicit_init) + { + if (emitting_that_function && (*object)->has_name("this")) + { + auto pass = std::string{" const&"}; + if (emitting_move_that_function) { + pass = "&&"; + } + initializer = + "static_cast<" + + object_name + + pass + + ">(that)"; + found_default_init = true; + } + else if (emitting_move_that_function) + { + initializer = + "std::move(that)." + + object_name; + found_default_init = true; + } + else if (emitting_that_function) + { + initializer = + "that." + + object_name; + found_default_init = true; + } + else if ((*object)->initializer) + { + initializer = print_to_string(*(*object)->initializer, false); + found_default_init = true; + } + } + + // If this is not an assignment to *object, + // and there was no member initializer, complain + if ( + !found_explicit_init + && !found_default_init + ) + { + errors.emplace_back( + stmt_pos, + "in operator=, expected '" + object_name + " = ...' initialization statement (because type scope object '" + object_name + "' does not have a default initializer)" + ); + errors.emplace_back( + (*object)->position(), + "see declaration for '" + object_name + "' here" + ); + errors.emplace_back( + stmt_pos, + error_msg + ); + return; + } + + assert( + found_explicit_init + || found_default_init + ); + + // Emit the initializer if it it isn't '_' (don't care) and ... + if (initializer == "_") { + // And on to the next data member... + ++object; + continue; + } + + if (initializer.empty()) { + initializer = "{}"; + } + + // (a) ... if this is assignment, emit it in all cases + if (is_assignment) + { + assert ((*object)->name()); + + // Flush any 'out' parameter initializations + for (auto& init : out_inits) { + current_functions.back().prolog.statements.push_back(init + ";"); + } + out_inits = {}; + + // Then add this statement + + // Use ::operator= for base classes + if ((*object)->has_name("this")) { + current_functions.back().prolog.statements.push_back( + print_to_string( *(*object)->get_object_type() ) + + "::operator= ( " + + initializer + + " );" + ); + } + // Else just use infix assignment + else { + current_functions.back().prolog.statements.push_back( + object_name + + " = " + + initializer + + ";" + ); + } + } + // (b) ... if this isn't assignment, only need to emit it if it was + // explicit, or is a base type or 'that' initializer + else if ( + found_explicit_init + || is_object_before_base + || ( + (*object)->has_name("this") + && !initializer.empty() + ) + || emitting_that_function + ) + { + if (is_object_before_base) { + assert (is_object_before_base->name()); + object_name = + print_to_string( *is_object_before_base->parent_declaration->name() ) + + "_" + + (*object)->name()->to_string() + + "_as_base"; + } + + // Flush any 'out' parameter initializations + auto out_inits_with_commas = [&]() -> std::string { + auto ret = std::string{}; + for (auto& init : out_inits) { + ret += init + ", "; + } + out_inits = {}; + return ret; + }(); + + // If there were any, wedge them into this initializer + // using (holds nose) the comma operator and extra parens + // as we add this statement + if (!out_inits_with_commas.empty()) { + current_functions.back().prolog.mem_inits.push_back( + separator + + object_name + + "{(" + + out_inits_with_commas + + initializer + + " )}" + ); + } + else { + if (initializer == "{}") { + initializer = ""; + } + current_functions.back().prolog.mem_inits.push_back( + separator + + object_name + + "{ " + + initializer + + " }" + ); + } + separator = ", "; + } + + // And on to the next data member... + ++object; + } + + // Now no data members should be left over + if (object != objects.end()) + { + errors.emplace_back( + (*object)->position(), + canonize_object_name(*object) + " was not initialized - did you forget to write a default initializer, or assign to it in the operator= body?" + ); + errors.emplace_back( + (*object)->position(), + "see declaration for '" + canonize_object_name(*object) + "' here" + ); + errors.emplace_back( + (*object)->position(), + error_msg + ); + return; + } + + // Flush any possible remaining 'out' parameters + for (auto& init : out_inits) { + current_functions.back().prolog.statements.push_back(init + ";"); + } + } + + // For a constructor, print the type name instead of the operator= function name + assert(n.parent_is_type()); + if (!is_assignment) + { + printer.print_cpp2( prefix, n.position() ); + printer.print_cpp2( type_qualification_if_any_for(n), n.position() ); + printer.print_cpp2( print_to_string( *n.parent_declaration->name() ), n.position() ); + emit( *func, n.name(), false, true ); + } + // For an assignment operator, similar to emitting an ordinary function + else + { + assert (!current_functions.empty()); + current_functions.back().epilog.push_back( "return *this;"); + printer.print_cpp2( prefix, n.position() ); + printer.print_cpp2( "auto " + type_qualification_if_any_for(n) + print_to_string( *n.name() ), n.position()); + emit( *func, n.name() ); + } + } + + + //----------------------------------------------------------------------- + // + auto emit( + declaration_node const& n, + std::string const& capture_intro = {} + ) + -> void + { STACKINSTR + // Helper for declarations with parent *template-head*s. + auto const emit_parent_template_parameters = [&]() { + auto parent_template_parameters = std::string{}; + auto parent = n.parent_declaration; + while ( + parent + && parent->is_type() + ) + { + if (parent->requires_clause_expression) { + parent_template_parameters = + "requires( " + print_to_string(*parent->requires_clause_expression) + " )\n" + + parent_template_parameters; + } + if (parent->template_parameters) { + parent_template_parameters = + "template " + print_to_string( *parent->template_parameters, false, true ) + + " " + parent_template_parameters; + } + parent = parent->parent_declaration; + } + printer.print_cpp2(parent_template_parameters, n.position()); + }; + + // Helper for declarations that can have requires-clauses + auto const emit_requires_clause = [&]() { + if ( + n.requires_clause_expression + || !function_requires_conditions.empty() + ) + { + printer.print_extra("\n"); + printer.ignore_alignment( true, n.position().colno + 4 ); + if (printer.get_phase() == printer.phase1_type_defs_func_decls) { + // Workaround GCC 10 not supporting requires in forward declarations in some cases. + // See commit 5a0d77f8e297902c0b9712c5aafb6208cfa4c139. + if (n.is_object() || n.parent_is_type()) { + printer.print_extra("CPP2_REQUIRES_ ("); + } + else { + printer.print_extra("CPP2_REQUIRES ("); + } + } + else { + printer.print_extra("requires ("); + } + + if (n.requires_clause_expression) { + emit(*n.requires_clause_expression); + if (!function_requires_conditions.empty()) { + printer.print_extra(" && "); + } + } + + if (!function_requires_conditions.empty()) { + printer.print_extra(function_requires_conditions.front()); + for (auto it = std::cbegin(function_requires_conditions)+1; it != std::cend(function_requires_conditions); ++it) { + printer.print_extra(" && " + *it); + } + } + + printer.print_extra(") "); + function_requires_conditions = {}; + printer.ignore_alignment( false ); + } + }; + + + // Declarations are handled in multiple passes, + // but we only want to do the sema checks once + if ( + printer.get_phase() == printer.phase2_func_defs + && !sema.check(n) + ) + { + return; + } + + // In phase 0, only need to consider namespaces and types + + if ( + printer.get_phase() == printer.phase0_type_decls + && !n.is_namespace() + && !n.is_type() + ) + { + return; + } + + // If this is a generated declaration (negative source line number), + // add a line break before + if ( + printer.get_phase() == printer.phase2_func_defs + && n.position().lineno < 1 + ) + { + printer.print_extra("\n"); + } + + auto guard0 = stack_value(having_signature_emitted, &n); + auto guard1 = stack_element(current_declarations, &n); + current_names.push_back(&n); + auto guard2 = stack_size_if(current_names, n.is_function()); + + // Handle aliases + + if (n.is_alias()) + { + auto& a = std::get<declaration_node::an_alias>(n.type); + assert(a); + + // Namespace-scope aliases are emitted in phase 1, + // type-scope object aliases in both phases 1 and 2, and + // function-scope aliases in phase 2 + if ( + ( + !n.parent_is_function() + && printer.get_phase() == printer.phase1_type_defs_func_decls + ) + || + ( + n.parent_is_type() + && n.is_object_alias() + && printer.get_phase() == printer.phase2_func_defs + ) + || + ( + n.parent_is_function() + && printer.get_phase() == printer.phase2_func_defs + ) + ) + { + assert( + a->is_type_alias() + || a->is_namespace_alias() + || a->is_object_alias() + ); + + // If we're in a type scope, handle the access specifier + if ( + n.parent_is_type() + && printer.get_phase() == printer.phase1_type_defs_func_decls + ) + { + if (!n.is_default_access()) { + printer.print_cpp2(to_string(n.access) + ": ", n.position()); + } + else { + printer.print_cpp2("public: ", n.position()); + } + } + + // Emit template parameters if any + if ( + a->is_object_alias() + && n.parent_is_type() + && printer.get_phase() == printer.phase2_func_defs + ) + { + emit_parent_template_parameters(); + } + + if (n.template_parameters) { + printer.print_cpp2("template", n.position()); + emit(*n.template_parameters, false, true); + printer.print_cpp2(" ", n.position()); + } + + // Emit requires clause if any + emit_requires_clause(); + + // Handle type aliases + if (a->is_type_alias()) { + printer.print_cpp2( + "using " + + print_to_string(*n.identifier) + + " = " + + print_to_string( *std::get<alias_node::a_type>(a->initializer) ) + + ";\n", + n.position() + ); + } + + // Handle namespace aliases + else if (a->is_namespace_alias()) { + printer.print_cpp2( + "namespace " + + print_to_string(*n.identifier) + + " = " + + print_to_string( *std::get<alias_node::a_namespace>(a->initializer) ) + + ";\n", + n.position() + ); + } + + // Handle object aliases: + // - at function scope, it's const& + // - at namespace scope, it's inline constexpr + // - at type scope, it's also inline constexpr but see note (*) below + else if (a->is_object_alias()) + { + auto type = std::string{"auto"}; + if (a->type_id) { + type = print_to_string(*a->type_id); + } + + // (*) If this is at type scope, Cpp1 requires an out-of-line declaration dance + // for some cases to work - see https://stackoverflow.com/questions/11928089/ + if (n.parent_is_type()) + { + assert (n.parent_declaration->name()); + + if (printer.get_phase() == printer.phase1_type_defs_func_decls) { + printer.print_cpp2( + "static const " + + type + " " + + print_to_string(*n.identifier) + + ";\n", + n.position() + ); + } + else if (printer.get_phase() == printer.phase2_func_defs) { + // The following logic is not yet complete, so give a diagnostic for now + if (n.parent_declaration->parent_is_type()) { + errors.emplace_back( + n.position(), + "(temporary alpha limitation) an object alias cannot yet appear inside a nested type" + ); + return; + } + + printer.print_cpp2( + "inline CPP2_CONSTEXPR " + + type + + " " + + type_qualification_if_any_for(n) + + print_to_string(*n.identifier) + + " = " + + print_to_string( *std::get<alias_node::an_object>(a->initializer) ) + + ";\n", + n.position() + ); + } + } + // Otherwise, at function and namespace scope we can just define + else + { + auto intro = std::string{}; + if (n.parent_is_function()) { + intro = "constexpr"; + } + else if (n.parent_is_namespace()) { + intro = "inline constexpr"; + } + + printer.print_cpp2( + type + " " + + intro + " " + + print_to_string(*n.identifier) + + " = " + + print_to_string( *std::get<alias_node::an_object>(a->initializer) ) + + ";\n", + n.position() + ); + } + } + + else { + assert(!"ICE: should be unreachable - invalid alias"); + } + + return; + } + } + + + // Handle other declarations + + auto need_to_generate_assignment = false; + auto need_to_generate_move = false; + auto need_to_generate_postfix_inc_dec = false; + + if ( + n.is_function() + && n.has_name() + ) + { // reset the 'that' flags + emitting_that_function = false; + emitting_move_that_function = false; + already_moved_that_members = {}; + } + + auto is_main = + !n.parent_declaration + && n.has_name("main") + ; + auto is_in_type = n.parent_is_type(); + + if (!check_shadowing_of_type_scope_names(n)) { + return; + } + + + // If this is a function that has multiple return values, + // first we need to emit the struct that contains the returns + if ( + printer.get_phase() == printer.phase1_type_defs_func_decls + && n.is_function() + ) + { + auto& func = std::get<declaration_node::a_function>(n.type); + assert(func); + + if (func->returns.index() == function_type_node::list) + { + auto& r = std::get<function_type_node::list>(func->returns); + assert(r); + assert(std::ssize(r->parameters) > 0); + + auto func_name = n.name()->to_string(); + + // If it's a single named value, emit it as an anonymous return value + if (std::ssize(r->parameters) == 1) + { + printer.print_extra( + "\nusing " + + func_name + "_ret = " + + r->parameters[0]->declaration->get_object_type()->to_string() + + ";" + ); + } + // Else just emit it as an ordinary struct + else + { + printer.print_extra( + "\nstruct " + + n.name()->to_string() + + "_ret " + ); + emit(*r, true); + } + printer.print_extra( "\n" ); + } + } + + // If this is a class definition that has data members before bases, + // first we need to emit the aggregate that contains the members + if ( + n.is_type() + && printer.get_phase() == printer.phase1_type_defs_func_decls + ) + { + assert( + n.initializer + && n.initializer->is_compound() + ); + auto& compound_stmt = std::get<statement_node::compound>(n.initializer->statement); + + assert(compound_stmt); + auto found = false; + + for (auto& stmt : compound_stmt->statements) + { + if (stmt->is_declaration()) + { + auto& decl = std::get<statement_node::declaration>(stmt->statement); + assert(decl); + assert(decl->name()); + + auto emit_as_base = + decl->get_decl_if_type_scope_object_name_before_a_base_type(*decl->name()); + + if (emit_as_base) { + printer.print_extra( + "\nstruct " + + print_to_string(*decl->parent_declaration->name()) + + "_" + + decl->name()->to_string() + + "_as_base { " + + print_to_string( *decl->get_object_type() ) + + " " + + decl->name()->to_string() + + "; };" + ); + found = true; + } + } + } + + if (found) { + printer.print_extra("\n"); + } + } + + // In class definitions, emit the explicit access specifier if there + // is one, or default to private for data and public for functions + if (printer.get_phase() == printer.phase1_type_defs_func_decls) + { + if (!n.is_default_access()) { + assert (is_in_type); + printer.print_cpp2(to_string(n.access) + ": ", n.position()); + } + else if (is_in_type) { + if (n.is_object()) { + printer.print_cpp2("private: ", n.position()); + } + else { + printer.print_cpp2("public: ", n.position()); + } + } + } + + // If this is a function definition and the function is inside + // type(s) that have template parameters and/or requires clauses, + // emit those outer template parameters and requires clauses too + if ( + printer.get_phase() == printer.phase2_func_defs + && n.is_function() + && n.initializer // only if the function has a definition (is not abstract) + ) + { + emit_parent_template_parameters(); + } + + // Now, emit our own template parameters + if ( + n.template_parameters + && ( + printer.get_phase() < printer.phase2_func_defs + || n.is_object() + || ( + n.is_function() + && n.has_name() // only if it is not unnamed function aka lambda + && n.initializer // only if the function has a definition (is not abstract) + && printer.get_phase() == printer.phase2_func_defs + ) + ) + && ( + !n.is_concept() + || printer.get_phase() == printer.phase1_type_defs_func_decls + ) + ) + { + printer.print_cpp2("template", n.position()); + emit(*n.template_parameters, false, true); + printer.print_cpp2(" ", n.position()); + } + + // User-defined type + if (n.is_type()) + { + assert( + n.initializer + && n.initializer->is_compound() + ); + auto& compound_stmt = std::get<statement_node::compound>(n.initializer->statement); + + if (printer.get_phase() != printer.phase2_func_defs) + { + if (n.requires_clause_expression) { + printer.print_cpp2("requires( ", n.requires_pos); + emit(*n.requires_clause_expression); + printer.print_cpp2(" )\n", n.requires_pos); + } + + printer.print_cpp2("class ", n.position()); + emit(*n.identifier); + + // Type declaration + if (printer.get_phase() == printer.phase0_type_decls) { + printer.print_cpp2( ";\n", n.position() ); + return; + } + } + + if ( + n.is_type_final() + && printer.get_phase() == printer.phase1_type_defs_func_decls + ) + { + printer.print_cpp2( " final", n.position() ); + } + + // Type definition + auto separator = std::string{":"}; + auto started_body = false; + auto saved_for_body = std::vector<std::pair<std::string, source_position>>{}; + auto found_constructor = false; + auto found_that_constructor = false; + assert(compound_stmt); + + auto start_body = [&]{ + if (!started_body) { + printer.print_cpp2(" {", compound_stmt->position()); + started_body = true; + for (auto& [line, pos] : saved_for_body) { + printer.print_cpp2(line + "\n", pos); + } + } + }; + + for (auto& stmt : compound_stmt->statements) + { + assert(stmt); + if ( + !stmt->is_declaration() + && !stmt->is_using() + ) + { + // We will already have emitted an error for this in sema.check + return; + } + + // If it's a using statement, save it up if we haven't started the body yet + + if (stmt->is_using()) { + auto& use = std::get<statement_node::using_>(stmt->statement); + assert(use); + if (started_body) { + emit(*use); + } + else { + saved_for_body.emplace_back( print_to_string(*use), use->position() ); + } + continue; + } + + // Else it's a declaration... + + auto& decl = std::get<statement_node::declaration>(stmt->statement); + assert(decl); + + if ( + decl->is_alias() + && printer.get_phase() == printer.phase1_type_defs_func_decls + ) + { + if (started_body) { + emit(*decl); + } + else { + saved_for_body.emplace_back( print_to_string(*decl), decl->position() ); + } + continue; + } + + if (decl->is_constructor()) { + found_constructor = true; + } + if (decl->is_constructor_with_that()) { + found_that_constructor = true; + } + + // First we'll encounter the base types == subobjects named "this" + // and any data members declared before them that we push into private bases + assert(decl->name()); + auto emit_as_base = + decl->get_decl_if_type_scope_object_name_before_a_base_type(*decl->name()) + || decl->has_name("this") + ; + if (emit_as_base) + { + // Do the sema check for these declarations here, because we're + // handling them here instead of going through emit() for them + if (!sema.check(*decl)) { + return; + } + + if (decl->has_name("this")) { + if (printer.get_phase() == printer.phase1_type_defs_func_decls) { + printer.print_cpp2( + separator + " public " + print_to_string(*decl->get_object_type()), + compound_stmt->position() + ); + separator = ","; + } + } + else + { + if (printer.get_phase() == printer.phase1_type_defs_func_decls) { + printer.print_cpp2( + separator + + " public " + + print_to_string(*decl->parent_declaration->name()) + + "_" + + decl->name()->to_string() + + "_as_base", + compound_stmt->position() + ); + separator = ","; + } + } + } + // Then we'll switch to start the body == other members + else + { + if (printer.get_phase() == printer.phase1_type_defs_func_decls) { + start_body(); + } + emit(*decl); + } + } + + if (printer.get_phase() == printer.phase1_type_defs_func_decls) + { + // Ensure we emit the { even if there are only bases in the type + start_body(); + + auto id = print_to_string(*n.identifier); + auto indent = static_cast<size_t>( + std::clamp( + compound_stmt->body_indent, + n.position().colno, + n.position().colno + 5 // sanity check + ) + ); + auto prefix = "\n" + std::string( indent, ' ' ) + "public: "; + + if (n.member_function_generation) + { + // If no constructor was defined, there should only be + // a default constructor, so generate that + if (!found_constructor) { + printer.print_extra( prefix + id + "() = default;" ); + } + + // If no 'that' constructor was defined, disable copy/move + // so that Cpp1 doesn't silently generate it anyway + if (!found_that_constructor) { + printer.print_extra( prefix + id + "(" + id + " const&) = delete; /* No 'that' constructor, suppress copy */" ); + printer.print_extra( prefix + "auto operator=(" + id + " const&) -> void = delete;" ); + } + + if (!found_constructor || !found_that_constructor) { + printer.print_extra( "\n" ); + } + } + + printer.print_cpp2("};\n", compound_stmt->close_brace); + } + } + + + // Namespace + if (n.is_namespace()) + { + printer.print_cpp2("namespace ", n.position()); + + // "_" is the anonymous namespace, which is just whitespace in Cpp1 + if (auto tok = n.identifier->get_token(); + tok + && *tok != "_" + ) + { + emit(*n.identifier); + } + + assert( + n.initializer + && n.initializer->is_compound() + ); + auto& compound_stmt = std::get<statement_node::compound>(n.initializer->statement); + + printer.print_cpp2(" {", compound_stmt->position()); + + assert(compound_stmt); + for (auto& stmt : compound_stmt->statements) { + assert(stmt); + if (stmt->is_declaration()) { + auto& decl = std::get<statement_node::declaration>(stmt->statement); + assert(decl); + emit(*decl); + } + else if (stmt->is_using()) { + auto& use = std::get<statement_node::using_>(stmt->statement); + assert(use); + emit(*use); + } + else { + errors.emplace_back( + stmt->position(), + "a namespace scope must contain only declarations or 'using' statements, not other code" + ); + return; + } + } + + printer.print_cpp2("}\n", compound_stmt->close_brace); + } + + // Function + else if ( + n.is_function() + && ( + printer.get_phase() < printer.phase2_func_defs + || n.initializer // only emit definition if the function has one (is not abstract) + || n.is_defaultable_function() + ) + ) + { + auto is_streaming_operator = [](std::string_view sv) { + return + sv == "operator<<" + || sv == "operator>>" + ; + }; + + auto is_binary_arithmetic_operator = [](std::string_view sv) { + return + sv == "operator+" + || sv == "operator-" + || sv == "operator*" + || sv == "operator/" + || sv == "operator%" + ; + }; + + auto emit_as_friend = + n.name() + && ( + is_streaming_operator( n.name()->as_string_view() ) + || (!n.is_function_with_this() && is_binary_arithmetic_operator( n.name()->as_string_view() )) + ) + ; + + // Start fresh (there may be one spurious leftover + // requires-condition created during the declarations pass) + function_requires_conditions = {}; + + auto& func = std::get<declaration_node::a_function>(n.type); + assert(func); + + current_functions.push( + &n, + func.get(), + n.find_parent_declared_value_set_functions() + ); + auto guard0 = finally([&]{ current_functions.pop(); }); + + auto guard1 = stack_size(current_names); + + // If this is at expression scope, we can't emit "[[nodiscard]] auto name" + // so print the provided intro instead, which will be a Cpp1 lambda-introducer + if (capture_intro != "") + { + assert (!n.identifier); + printer.print_cpp2(capture_intro, n.position()); + emit( *func, nullptr, is_main); + } + + // Else start introducing a normal function + else + { + assert (n.identifier); + + // Handle member functions + std::string prefix = {}; + std::string suffix1 = {}; + std::string suffix2 = {}; + + if (n.is_constexpr) { + prefix += "constexpr "; + } + + if ( + !n.has_initializer() + && n.is_defaultable_function() + ) + { + suffix2 += " = default"; + } + + // If there's a 'this' parameter, handle it here (the parameter emission will skip it) + // because Cpp1 syntax requires its information to be spread around the declaration syntax + assert (func->parameters); + if ( + !func->parameters->parameters.empty() + && func->parameters->parameters[0]->declaration->has_name("this") + ) + { + assert (is_in_type); + auto& this_ = func->parameters->parameters[0]; + + switch (this_->pass) { + break;case passing_style::in: + suffix1 += " const"; + // Cpp1 ref-qualifiers don't belong on virtual functions + if (!this_->is_polymorphic()) { + suffix1 += "&"; + } + break;case passing_style::inout: + // Cpp1 ref-qualifiers don't belong on virtual functions + if (!this_->is_polymorphic()) { + suffix1 += " &"; + } + break;case passing_style::out: + ; // constructor is handled below + break;case passing_style::move: + suffix1 += " &&"; + + // We shouldn't be able to get into a state where these values + // exist here, if we did it's our compiler bug + break;case passing_style::copy: + case passing_style::forward: + default: + errors.emplace_back( n.position(), "ICE: invalid parameter passing style, should have been rejected", true); + } + + // Note: Include a phase check because Cpp1 does not allow + // these on out-of-line definitions + if (printer.get_phase() != printer.phase2_func_defs) + { + switch (this_->mod) { + break;case parameter_declaration_node::modifier::implicit: + ; + break;case parameter_declaration_node::modifier::virtual_: + prefix += "virtual "; + if (!n.initializer) { + suffix2 += " = 0"; + } + break;case parameter_declaration_node::modifier::override_: + suffix2 += " override"; + break;case parameter_declaration_node::modifier::final_: + suffix2 += " final"; + break;default: + if ( + func->is_constructor() + && !func->is_constructor_with_that() + && generating_assignment_from != &n + ) + { + prefix += "explicit "; + } + } + } + } + // Else if there isn't a 'this' parameter, but this function is in a type scope, + // it's a Cpp1 non-member function so we need to say so (on the declaration only) + else if ( + is_in_type + && printer.get_phase() != printer.phase2_func_defs + ) { + if (emit_as_friend) { + prefix += "friend "; + } + else { + prefix += "static "; + } + } + + // If there's a return type, it's [[nodiscard]] implicitly and all the time + // -- for now there's no opt-out, wait and see whether we actually need one + if ( + func->has_non_void_return_type() + && !func->is_assignment() + && !func->is_compound_assignment() + && !func->is_increment_or_decrement() + && ( + printer.get_phase() == printer.phase1_type_defs_func_decls + || n.has_initializer() // so we're printing it in phase 2 + ) + && ( + !emit_as_friend // can't have an attribute on a friend declaration-not-definition + || printer.get_phase() != printer.phase1_type_defs_func_decls + ) + && !( + n.name() + && is_streaming_operator(n.name()->as_string_view()) + ) + ) + { + printer.print_cpp2( "[[nodiscard]] ", n.position() ); + } + + // Now we have all the pieces we need for the Cpp1 function declaration + + // For a special member function, we need to do more work to translate + // in-body initialization statements to the Cpp1 mem-init-list syntax + if ( + n.is_constructor() + || n.is_assignment() + ) + { + assert( + !is_main + && suffix2.empty() + && "ICE: an operator= shouldn't have been able to generate a suffix (or be main)" + ); + + emit_special_member_function( + n, + prefix + ); + + // If there's no inheritance and this operator= has two parameters, + // it's setting from a single value -- either from the same type + // (aka copy/move) or another type (a conversion) -- so recurse to + // emit related functions if the user didn't write them by hand + if ( + !n.parent_is_polymorphic() + && func->parameters->ssize() == 2 + && generating_assignment_from != &n + ) + { + assert(!current_functions.empty()); + + // A) Generate (A)ssignment from a constructor, + // if the user didn't write the assignment function themselves + if ( + // A1) This is '(out this, that)' + // and no '(inout this, that)' was written by the user + ( + &n == current_functions.back().declared_value_set_functions.out_this_in_that + && !current_functions.back().declared_value_set_functions.inout_this_in_that + ) + || + // A2) This is '(out this, move that)' + // and no '(inout this, move that)' was written by the user + // (*) and no '(inout this, that)' was written by the user (*) + // + // (*) This third test is to tie-break M2 and A2 in favor of M2. Both M2 and A2 + // can generate a missing '(inout this, move that)', and if we have both + // options then we should prefer to use M2 (generate move assignment from + // copy assignment) rather than A2 (generate move assignment from move + // construction) as M2 is a better fit (move assignment is more like copy + // assignment than like move construction, because assignments are designed + // structurally to set the value of an existing 'this' object) + ( + &n == current_functions.back().declared_value_set_functions.out_this_move_that + && !current_functions.back().declared_value_set_functions.inout_this_move_that + && !current_functions.back().declared_value_set_functions.inout_this_in_that + ) + || + // A3) This is '(out this, something-other-than-that)' + ( + n.is_constructor() + && !n.is_constructor_with_that() + && !contains( current_functions.back().declared_value_set_functions.assignments_from, n.nth_parameter_type_name(2) ) + ) + ) + { + need_to_generate_assignment = true; + } + + if (generating_move_from != &n) { + + // M) Generate (M)ove from copy, + // if the user didn't write the move function themselves + if ( + // M1) This is '(out this, that)' + // and no '(out this, move that)' was written by the user + ( + &n == current_functions.back().declared_value_set_functions.out_this_in_that + && !current_functions.back().declared_value_set_functions.out_this_move_that + ) + || + // M2) This is '(inout this, that)' + // and no '(inout this, move that)' was written by the user + ( + &n == current_functions.back().declared_value_set_functions.inout_this_in_that + && !current_functions.back().declared_value_set_functions.inout_this_move_that + ) + ) + { + need_to_generate_move = true; + } + + } + } + } + + // For a destructor, we need to translate + else if (n.is_destructor()) + { + assert( + !is_main + // prefix can be "virtual" + // suffix1 will be " &&" though we'll ignore that + // suffix2 can be "= 0" + ); + + // Print the ~-prefixed type name instead of the operator= function name + assert( + n.parent_is_type() + && n.parent_declaration->name() + ); + printer.print_cpp2( + prefix + + type_qualification_if_any_for(n) + + "~" + print_to_string(*n.parent_declaration->name()), + n.position() + ); + emit( *func, n.name(), false, true); + printer.print_cpp2( suffix2, n.position() ); + } + + // Ordinary functions are easier, do all their declarations except + // don't emit abstract virtual functions in phase 2 + else if ( + n.initializer + || printer.get_phase() < printer.phase2_func_defs + ) + { + printer.print_cpp2( prefix, n.position() ); + printer.print_cpp2( "auto ", n.position() ); + if ( + !emit_as_friend + || printer.get_phase() != printer.phase2_func_defs + ) + { + printer.print_cpp2( type_qualification_if_any_for(n), n.position() ); + } + + emit( *n.name() ); + emit( *func, n.name(), is_main, false, suffix1, generating_postfix_inc_dec_from != nullptr ); + printer.print_cpp2( suffix2, n.position() ); + + // If this is ++ or --, also generate a Cpp1 postfix version of the operator + if (func->is_increment_or_decrement()) + { + if (generating_postfix_inc_dec_from) { + assert (generating_postfix_inc_dec_from == &n); + } + else { + need_to_generate_postfix_inc_dec = true; + } + } + } + } + + // If we're only emitting declarations, end the function declaration + if ( + printer.get_phase() == printer.phase1_type_defs_func_decls + && !n.is_function_expression() + ) + { + emit_requires_clause(); + if (n.position().lineno < 0) { + printer.print_cpp2( ";\n", n.position() ); + } + else { + printer.print_cpp2( ";", n.position() ); + } + + // Note: Not just early "return;" here because we may need + // to recurse to emit generated operator declarations too, + // so all the definition work goes into a big 'else' branch + } + + // Else emit the definition + else if (n.initializer) + { + if (func->returns.index() == function_type_node::list) { + auto& r = std::get<function_type_node::list>(func->returns); + function_returns.emplace_back(r.get()); + } + else if (func->returns.index() == function_type_node::id) { + function_returns.emplace_back( + &single_anon, // use special value as a note + std::get<function_type_node::id>(func->returns).pass, + std::get<function_type_node::id>(func->returns).type->is_wildcard() + ); + } + else { + function_returns.emplace_back(nullptr); // no return type at all + } + + if (func->has_postconditions()) { + current_functions.back().prolog.statements.push_back("cpp2::finally_presuccess cpp2_finally_presuccess;"); + } + + if (func->returns.index() == function_type_node::list) + { + auto& r = std::get<function_type_node::list>(func->returns); + assert(r); + for (auto& param : r->parameters) + { + assert(param && param->declaration); + auto& decl = *param->declaration; + + assert(decl.is_object()); + auto& id_expr = std::get<declaration_node::an_object>(decl.type); + assert(id_expr); + + auto loc = std::string{}; + if (!decl.initializer) { + loc += (" cpp2::deferred_init<"); + } + + // For convenience, just capture the id-expression as a string + printer.emit_to_string(&loc); + emit(*id_expr); + printer.emit_to_string(); + + if (!decl.initializer) { + loc += (">"); + } + loc += " "; + loc += decl.name()->as_string_view(); + if (decl.initializer) + { + std::string init; + printer.emit_to_string(&init); + printer.print_cpp2 ( " {", decl.initializer->position() ); + if (!decl.initializer->is_expression()) { + errors.emplace_back( + decl.initializer->position(), + "return value initializer must be an expression" + ); + return; + } + auto& expr = std::get<statement_node::expression>(decl.initializer->statement); + assert(expr); + + emit(*expr, false); + printer.print_cpp2 ( "}", decl.initializer->position() ); + printer.emit_to_string(); + + loc += init; + } + loc += ";"; + current_functions.back().prolog.statements.push_back(loc); + } + } + + for (auto&& c : func->contracts) + { + auto print = std::string(); + printer.emit_to_string(&print); + auto guard = stack_value(having_signature_emitted, nullptr); + emit(*c); + printer.emit_to_string(); + current_functions.back().prolog.statements.push_back(print); + } + + printer.preempt_position_push( n.equal_sign ); + + emit_requires_clause(); + + having_signature_emitted = nullptr; + + // If this is ++ or --, also generate a Cpp1 postfix version of the operator + if (generating_postfix_inc_dec_from) + { + assert (generating_postfix_inc_dec_from == &n); + + auto param1 = std::string{"*this"}; + if ( + !n.parent_declaration + || !n.parent_declaration->is_type() + ) + { + param1 = n.first_parameter_name(); + } + + printer.print_cpp2( + " { auto ret = " + param1 + "; ++" + param1 + "; return ret; }", + n.position() + ); + } + // Else just emit the normal function body + else { + emit( + *n.initializer, + true, func->position(), func->returns.index() == function_type_node::empty, + current_functions.back().prolog, + current_functions.back().epilog + ); + } + + printer.preempt_position_pop(); + + function_returns.pop_back(); + } + + // Finally, do the potential recursions... + + // If this was a constructor and we want also want to emit + // it as an assignment operator, do it via a recursive call + if (need_to_generate_assignment) + { + // Reset the 'emitted' flags + for (auto& statement : n.get_initializer_statements()) { + statement->emitted = false; + } + + // Then reposition and do the recursive call + printer.reset_line_to(n.position().lineno); + generating_assignment_from = &n; + emit( n, capture_intro ); + generating_assignment_from = {}; + } + + // If this was a constructor and we want also want to emit + // it as an assignment operator, do it via a recursive call + if (need_to_generate_move) + { + // Reset the 'emitted' flags + for (auto& statement : n.get_initializer_statements()) { + statement->emitted = false; + } + + // Then reposition and do the recursive call + printer.reset_line_to(n.position().lineno); + generating_move_from = &n; + emit( n, capture_intro ); + generating_move_from = {}; + } + + // If this is ++ or --, emit the Cpp1 postfix version via a recursive call + if (need_to_generate_postfix_inc_dec) + { + // Reset the 'emitted' flags + for (auto& statement : n.get_initializer_statements()) { + statement->emitted = false; + } + + // Then reposition and do the recursive call + printer.reset_line_to(n.position().lineno); + generating_postfix_inc_dec_from = &n; + emit( n, capture_intro ); + generating_postfix_inc_dec_from = {}; + } + } + + // Object with optional initializer + else if ( + n.is_object() + && ( + ( + n.parent_is_namespace() + && printer.get_phase() >= printer.phase1_type_defs_func_decls + ) + || + ( + n.parent_is_type() + && printer.get_phase() == printer.phase1_type_defs_func_decls + ) + || + ( + n.parent_is_function() + && printer.get_phase() == printer.phase2_func_defs + ) + || + ( + n.is_inside_global_unnamed_function() + && printer.get_phase() == printer.phase1_type_defs_func_decls + ) + ) + ) + { + auto& type = std::get<declaration_node::an_object>(n.type); + if ( + printer.get_phase() == printer.phase2_func_defs + && type->is_concept() + ) + { + return; + } + + emit_requires_clause(); + + if ( + printer.get_phase() != printer.phase2_func_defs + && n.parent_is_namespace() + && !type->is_concept() + ) + { + printer.print_cpp2( "extern ", n.position() ); + } + + // Emit "auto" for deduced types (of course) + if (type->is_wildcard()) { + assert(n.initializer); + emit( *type, n.position() ); + } + // Otherwise, emit the type + else { + // If there isn't an initializer, use cpp2::deferred_init<T> + if (!n.initializer) { + if (n.parent_is_function()) { + printer.print_cpp2( "cpp2::deferred_init<", n.position() ); + } + else if (!n.parent_is_type()) { + errors.emplace_back( + n.position(), + "a namespace-scope object must have an initializer" + ); + return; + } + } + printer.preempt_position_push(n.position()); + emit( *type ); + printer.preempt_position_pop(); + // one pointer is enough for now, pointer-to-function fun can be later + if ( + !n.initializer + && n.parent_is_function() + ) + { + printer.print_cpp2( ">", n.position() ); + } + } + + printer.print_cpp2( " ", n.position()); + assert(n.identifier); + + // If this is anonymous object (named "_"), generate a unique name + if (n.has_name("_")) { + if (n.has_wildcard_type()) { + errors.emplace_back( + n.identifier->position(), + "an object can have an anonymous name or an anonymous type, but not both at the same type (rationale: if '_ := f();' were allowed to keep the returned object alive, that syntax would be dangerously close to '_ = f();' to discard the returned object, and such importantly opposite meanings deserve more than a one-character typo distance; and explicit discarding gets the nice syntax because it's likely more common)" + ); + return; + } + + printer.print_cpp2( + "auto_" + labelized_position(n.identifier->get_token()), + n.identifier->position() + ); + } + else { + emit(*n.identifier); + } + + if ( + n.parent_is_namespace() + && printer.get_phase() != printer.phase2_func_defs + && !type->is_concept() + ) + { + printer.print_cpp2( ";", n.position()); + return; + } + + // If there's an initializer, emit it + if (n.initializer) + { + printer.add_pad_in_this_line(-100); + if (type->is_concept()) { + printer.print_cpp2( " = ", n.position() ); + } else { + printer.print_cpp2( " {", n.position() ); + } + + push_need_expression_list_parens(false); + assert( n.initializer ); + emit( *n.initializer, false ); + pop_need_expression_list_parens(); + + if (!type->is_concept()) { + printer.print_cpp2( "}", n.position() ); + } + } + + printer.print_cpp2( "; ", n.position() ); + } + } + + + //----------------------------------------------------------------------- + // print_errors + // + auto print_errors() + -> void + { + if (!errors.empty()) { + // Delete the output file + printer.abandon(); + } + + error_entry const* prev = {}; + bool print_fallback_errors = true; // true until we find a non-fallback message + + for (auto&& error : errors) + { + // Only print fallback error messages if we + // haven't found a better (non-fallback) one yet + if (!error.fallback) { + print_fallback_errors = false; + } + if (error.fallback && !print_fallback_errors) { + continue; + } + + // Suppress adjacent duplicates (e.g., can arise when we + // reenter operator= to emit it as an assignment operator) + if ( + !prev + || error != *prev + ) + { + error.print(std::cerr, strip_path(sourcefile)); + } + prev = &error; + } + + if (violates_lifetime_safety) { + std::cerr << " ==> program violates lifetime safety guarantee - see previous errors\n"; + } + if (violates_bounds_safety) { + std::cerr << " ==> program violates bounds safety guarantee - see previous errors\n"; + } + if (violates_initialization_safety) { + std::cerr << " ==> program violates initialization safety guarantee - see previous errors\n"; + } + } + + auto had_no_errors() + -> bool + { + return errors.empty(); + } + + + //----------------------------------------------------------------------- + // debug_print + // + auto debug_print() + -> void + { + // Only create debug output files if we managed to load the source file. + // + if (source_loaded) + { + auto out_source = std::ofstream{ sourcefile+"-source" }; + source.debug_print( out_source ); + + auto out_tokens = std::ofstream{ sourcefile+"-tokens" }; + tokens.debug_print( out_tokens ); + + auto out_parse = std::ofstream{ sourcefile+"-parse" }; + parser.debug_print( out_parse ); + + auto out_symbols = std::ofstream{ sourcefile+"-symbols" }; + sema.debug_print ( out_symbols ); + } + } + + + //----------------------------------------------------------------------- + // has_cpp1: pass through + // + auto has_cpp1() const + -> bool + { + return source.has_cpp1(); + } + + + //----------------------------------------------------------------------- + // has_cpp2: pass through + // + auto has_cpp2() const + -> bool + { + return source.has_cpp2(); + } +}; + +} + + +#endif diff --git a/CompilerDriver/cc2/source/version.info b/CompilerDriver/cc2/source/version.info new file mode 100644 index 0000000..87fe59a --- /dev/null +++ b/CompilerDriver/cc2/source/version.info @@ -0,0 +1 @@ +"v0.3.0"
\ No newline at end of file |
