From b03f3d83efcbc012c4153da14eaf158bb50031d2 Mon Sep 17 00:00:00 2001 From: Amlal El Mahrouss Date: Wed, 3 Jan 2024 23:40:16 +0100 Subject: tools: incremental changes, support for a C compiler will soon be here. alongside the 32x0. Signed-off-by: Amlal El Mahrouss --- CompilerDriver/.gitignore | 1 + CompilerDriver/cc.cc | 1884 +++++++++++++++++++++++++++++++++++++++++++++ CompilerDriver/cc.cxx | 1883 -------------------------------------------- CompilerDriver/ccplus.cc | 640 +++++++++++++++ CompilerDriver/ccplus.cxx | 640 --------------- CompilerDriver/cpp.cc | 1024 ++++++++++++++++++++++++ CompilerDriver/cpp.cxx | 1024 ------------------------ CompilerDriver/ld.cc | 594 ++++++++++++++ CompilerDriver/ld.cxx | 587 -------------- CompilerDriver/makefile | 16 +- CompilerDriver/masm.cc | 957 +++++++++++++++++++++++ CompilerDriver/masm.cxx | 950 ----------------------- 12 files changed, 5109 insertions(+), 5091 deletions(-) create mode 100644 CompilerDriver/cc.cc delete mode 100644 CompilerDriver/cc.cxx create mode 100644 CompilerDriver/ccplus.cc delete mode 100644 CompilerDriver/ccplus.cxx create mode 100644 CompilerDriver/cpp.cc delete mode 100644 CompilerDriver/cpp.cxx create mode 100644 CompilerDriver/ld.cc delete mode 100644 CompilerDriver/ld.cxx create mode 100644 CompilerDriver/masm.cc delete mode 100644 CompilerDriver/masm.cxx (limited to 'CompilerDriver') diff --git a/CompilerDriver/.gitignore b/CompilerDriver/.gitignore index ce5ef93..23a095f 100644 --- a/CompilerDriver/.gitignore +++ b/CompilerDriver/.gitignore @@ -1,4 +1,5 @@ bin/ld +bin/mld bin/cpp bin/cc bin/masm diff --git a/CompilerDriver/cc.cc b/CompilerDriver/cc.cc new file mode 100644 index 0000000..899734b --- /dev/null +++ b/CompilerDriver/cc.cc @@ -0,0 +1,1884 @@ +/* + * ======================================================== + * + * cc + * Copyright Western Company, all rights reserved. + * + * ======================================================== + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define kOk 0 + +/* Optimized C driver */ +/* This is part of MP-UX C SDK. */ +/* (c) Western Company */ + +///////////////////// + +// ANSI ESCAPE CODES + +///////////////////// + +#define kBlank "\e[0;30m" +#define kRed "\e[0;31m" +#define kWhite "\e[0;97m" + +///////////////////////////////////// + +// INTERNAL STUFF OF THE C COMPILER + +///////////////////////////////////// + +namespace detail +{ + struct CompilerRegisterMap + { + std::string fName; + std::string fRegister; + }; + + struct CompilerState + { + std::vector fSyntaxTreeList; + std::vector kStackFrame; + ParserKit::SyntaxLeafList* fSyntaxTree{ nullptr }; + std::unique_ptr fOutputAssembly; + std::string fLastFile; + std::string fLastError; + bool kVerbose; + }; +} + +static detail::CompilerState kState; +static SizeType kErrorLimit = 100; + +static Int32 kAcceptableErrors = 0; + +namespace detail +{ + void print_error(std::string reason, std::string file) noexcept + { + if (reason[0] == '\n') + reason.erase(0, 1); + + if (file.find(".pp") != std::string::npos) + { + file.erase(file.find(".pp"), 3); + } + + if (kState.fLastFile != file) + { + std::cout << kRed << "[ cc ] " << kWhite << ((file == "cc") ? "internal compiler error " : ("in file, " + file)) << kBlank << std::endl; + std::cout << kRed << "[ cc ] " << kWhite << reason << kBlank << std::endl; + + kState.fLastFile = file; + } + else + { + std::cout << kRed << "[ cc ] [ " << kState.fLastFile << " ] " << kWhite << reason << kBlank << std::endl; + } + + if (kAcceptableErrors > kErrorLimit) + std::exit(3); + + ++kAcceptableErrors; + } + + struct CompilerType + { + std::string fName; + std::string fValue; + }; +} + +///////////////////////////////////////////////////////////////////////////////////////// + +// Target architecture. +static int kMachine = 0; + +///////////////////////////////////////// + +// REGISTERS ACCORDING TO USED ASSEMBLER + +///////////////////////////////////////// + +static size_t kRegisterCnt = kAsmRegisterLimit; +static size_t kStartUsable = 6; +static size_t kUsableLimit = 14; +static size_t kRegisterCounter = kStartUsable; +static std::string kRegisterPrefix = kAsmRegisterPrefix; + +///////////////////////////////////////// + +// COMPILER PARSING UTILITIES/STATES. + +///////////////////////////////////////// + +static std::vector kFileList; +static CxxKit::AssemblyFactory kFactory; +static bool kInStruct = false; +static bool kOnWhileLoop = false; +static bool kOnForLoop = false; +static bool kInBraces = false; +static size_t kBracesCount = 0UL; + +/* @brief C compiler backend for Optimized C */ +class CompilerBackendClang final : public ParserKit::CompilerBackend +{ +public: + explicit CompilerBackendClang() = default; + ~CompilerBackendClang() override = default; + + CXXKIT_COPY_DEFAULT(CompilerBackendClang); + + std::string Check(const char* text, const char* file); + bool Compile(const std::string& text, const char* file) override; + + const char* Language() override { return "Optimized 64x0 C"; } + +}; + +static CompilerBackendClang* kCompilerBackend = nullptr; +static std::vector kCompilerVariables; +static std::vector kCompilerFunctions; +static std::vector kCompilerTypes; + +// @brief this hook code before the begin/end command. +static std::string kAddIfAnyBegin; +static std::string kAddIfAnyEnd; +static std::string kLatestVar; + +// \brief parse a function call +static std::string cc_parse_function_call(std::string& _text) +{ + if (_text[0] == '(') + { + std::string substr; + std::string args_buffer; + std::string args; + + bool type_crossed = false; + + for (char substr_first_index: _text) + { + args_buffer += substr_first_index; + + if (substr_first_index == ';') + { + args_buffer = args_buffer.erase(0, args_buffer.find('(')); + args_buffer = args_buffer.erase(args_buffer.find(';'), 1); + args_buffer = args_buffer.erase(args_buffer.find(')'), 1); + args_buffer = args_buffer.erase(args_buffer.find('('), 1); + + if (!args_buffer.empty()) + args += "\tpsh "; + + while (args_buffer.find(',') != std::string::npos) + { + args_buffer.replace(args_buffer.find(','), 1, "\n\tpsh "); + } + + args += args_buffer; + args += "\n\tjb __import "; + } + } + + return args; + } + + return ""; +} + +namespace detail +{ + union number_cast + { + number_cast(UInt64 raw) + : raw(raw) + {} + + char number[8]; + UInt64 raw; + }; +} + +///////////////////////////////////////////////////////////////////////////////////////// + +// @name Compile +// @brief Generate MASM from a C assignement. + +///////////////////////////////////////////////////////////////////////////////////////// + +bool CompilerBackendClang::Compile(const std::string& text, const char* file) +{ + std::string _text = text; + + auto syntax_tree = ParserKit::SyntaxLeafList::SyntaxLeaf(); + bool type_found = false; + bool function_found = false; + + // start parsing + for (size_t text_index = 0; text_index < _text.size(); ++text_index) + { + uuid_t out{0}; + + uuid_generate_random(out); + detail::number_cast time_off = (UInt64)out; + + if (!type_found) + { + auto substr = _text.substr(text_index); + std::string match_type; + + for (size_t y = 0; y < substr.size(); ++y) + { + if (substr[y] == ' ') + { + while (match_type.find(' ') != std::string::npos) { + match_type.erase(match_type.find(' ')); + } + + for (auto& clType : kCompilerTypes) + { + if (clType.fName == match_type) + { + match_type.clear(); + + std::string buf; + + buf += clType.fValue; + buf += ' '; + + if (clType.fName == "struct" || + clType.fName == "union") + { + for (size_t a = y + 1; a < substr.size(); a++) + { + if (substr[a] == ' ') + { + break; + } + + if (substr[a] == '\n') + break; + + buf += substr[a]; + } + } + + if (substr.find('=') != std::string::npos) + { + break; + } + + if (_text.find('(') != std::string::npos) + { + syntax_tree.fUserValue = buf; + + kState.fSyntaxTree->fLeafList.push_back(syntax_tree); + } + + type_found = true; + break; + } + } + + break; + } + + match_type += substr[y]; + } + } + + if (_text[text_index] == '{') + { + if (kInStruct) + { + continue; + } + + kInBraces = true; + ++kBracesCount; + + if (kOnWhileLoop || + kOnForLoop) + { + syntax_tree.fUserValue = "void __export .text _L"; + syntax_tree.fUserValue += std::to_string(kBracesCount) + "_" + std::to_string(time_off.raw); + } + + kState.fSyntaxTree->fLeafList.push_back(syntax_tree); + } + + // return keyword handler + if (_text[text_index] == 'r') + { + std::string return_keyword; + return_keyword += "return"; + + std::size_t index = 0UL; + + std::string value; + + for (size_t return_index = text_index; return_index < _text.size(); ++return_index) + { + if (_text[return_index] != return_keyword[index]) + { + for (size_t value_index = return_index; value_index < _text.size(); ++value_index) + { + if (_text[value_index] == ';') + break; + + value += _text[value_index]; + } + + break; + } + + ++index; + } + + if (index == return_keyword.size()) + { + if (!value.empty()) + { + if (value.find('(') != std::string::npos) + { + value.erase(value.find('(')); + } + + if (!isdigit(value[value.find('(') + 2])) + { + std::string tmp = value; + bool reg_to_reg = false; + + value.clear(); + + value += " __import"; + value += tmp; + } + + syntax_tree.fUserValue = "\tldw r19, "; + + // make it pretty. + if (value.find('\t') != std::string::npos) + value.erase(value.find('\t'), 1); + + syntax_tree.fUserValue += value + "\n"; + } + + syntax_tree.fUserValue += "\tjlr"; + + kState.fSyntaxTree->fLeafList.push_back(syntax_tree); + + break; + } + } + + if (_text[text_index] == 'i' && + _text[text_index + 1] == 'f') + { + std::string format = "ldw r15, %s\nldw r16, %s2\n"; + std::string expr = format; + + if (ParserKit::find_word(_text, "==")) + { + expr += "\nbeq"; + } + + if (ParserKit::find_word(_text, "!=")) + { + expr += "\nbneq"; + } + + if (ParserKit::find_word(_text, ">=")) + { + expr += "\nbge"; + } + else if (ParserKit::find_word(_text, ">")) + { + expr += "\nbg"; + } + + if (ParserKit::find_word(_text, "<=")) + { + expr += "\nble"; + } + else if (ParserKit::find_word(_text, "<")) + { + expr += "\nbl"; + } + + std::string substr = expr; + + std::string buf; + + for (size_t text_index_2 = (_text.find("if") + std::string("if").size()); text_index_2 < _text.size(); ++text_index_2) + { + if (_text[text_index_2] == ';') + { + buf.clear(); + + for (size_t text_index_3 = text_index_2 + 1; text_index_3 < _text.size(); text_index_3++) + { + if (_text[text_index_3] == '{') + continue; + + if (_text[text_index_3] == '}') + continue; + + if (_text[text_index_3] == ' ') + continue; + + if (_text[text_index_3] == '=') + continue; + + if (_text[text_index_3] == '<' && + _text[text_index_3+1] == '=' || + _text[text_index_3] == '=' && + _text[text_index_3+1] == '=' || + _text[text_index_3] == '>' && + _text[text_index_3+1] == '=' || + _text[text_index_3] == '>' || + _text[text_index_3] == '<' && + _text[text_index_3+1] == '=' || + _text[text_index_3] == '!') + { + buf += ", "; + continue; + } + else if (_text[text_index_3] == '=') + { + continue; + } + + buf += _text[text_index_3]; + } + + break; + } + + if (_text[text_index_2] == '{') + continue; + + if (_text[text_index_2] == '}') + continue; + + if (_text[text_index_2] == '<' && + _text[text_index_2+1] == '=' || + _text[text_index_2] == '=' && + _text[text_index_2+1] == '=' || + _text[text_index_2] == '>' && + _text[text_index_2+1] == '=' || + _text[text_index_2] == '>' || + _text[text_index_2] == '<' && + _text[text_index_2+1] == '=' || + _text[text_index_2] == '!') + { + buf += ", "; + continue; + } + else if (_text[text_index_2] == '=') + { + continue; + } + + buf += _text[text_index_2]; + } + + if (buf.find(",") == std::string::npos && + buf.find("(") != std::string::npos && + buf.find(")") != std::string::npos ) + { + + std::string cond = buf.substr(buf.find("(") + 1, buf.find(")") - 1); + cond.erase(cond.find("(")); + + std::string cond2 = buf.substr(buf.find("(") + 1, buf.find(")") - 1); + cond2.erase(cond2.find(")")); + + substr.replace(substr.find("%s"), 2, cond); + substr.replace(substr.find("%s2"), 3, cond2); + + buf.replace(buf.find(cond), cond.size(), "r15"); + buf.replace(buf.find(cond2), cond2.size(), "r16"); + + substr += buf; + + syntax_tree.fUserValue = substr + "\n"; + + kState.fSyntaxTree->fLeafList.push_back(syntax_tree); + + break; + } + else + { + continue; + } + + // dealing with pointer + if (buf.find("*") != std::string::npos) + { + buf.erase(buf.find("*"), 1); + } + + std::string cond = buf.substr(buf.find("(") + 1, buf.find(",") - 1); + cond.erase(cond.find(",")); + + std::string cond2 = buf.substr(buf.find(",") + 1, buf.find(")") - 1); + cond2.erase(cond2.find(")")); + + substr.replace(substr.find("%s"), 2, cond); + substr.replace(substr.find("%s2"), 3, cond2); + + buf.replace(buf.find(cond), cond.size(), "r15"); + buf.replace(buf.find(cond2), cond2.size(), "r16"); + + substr += buf; + + syntax_tree.fUserValue = substr + "\n"; + + kState.fSyntaxTree->fLeafList.push_back(syntax_tree); + + break; + } + + // Parse expressions and instructions here. + // what does this mean? + // we encounter an assignment, or we reached the end of an expression. + if (_text[text_index] == '=' || + _text[text_index] == ';') + { + if (function_found) + continue; + + if (_text[text_index] == ';' && + kInStruct) + continue; + + if (_text.find("typedef ") != std::string::npos) + continue; + + if (_text[text_index] == '=' && + kInStruct) + { + continue; + } + + if (_text[text_index+1] == '=' || + _text[text_index-1] == '!' || + _text[text_index-1] == '<' || + _text[text_index-1] == '>') + { + continue; + } + + std::string substr; + + if (_text.find('=') != std::string::npos && + kInBraces) + { + if (_text.find("*") != std::string::npos) + { + if (_text.find("=") > _text.find("*")) + substr += "\tlda "; + else + substr += "\tldw "; + } + else + { + substr += "\tldw "; + } + } + else if (_text.find('=') != std::string::npos && + !kInBraces) + { + substr += "stw __export .data "; + } + + int first_encountered = 0; + + std::string str_name; + + for (size_t text_index_2 = 0; text_index_2 < _text.size(); ++text_index_2) + { + if (_text[text_index_2] == '\"') + { + ++text_index_2; + + // want to add this, so that the parser recognizes that this is a string. + substr += '"'; + + for (; text_index_2 < _text.size(); ++text_index_2) + { + if (_text[text_index_2] == '\"') + break; + + kLatestVar += _text[text_index_2]; + substr += _text[text_index_2]; + } + } + + if (_text[text_index_2] == '{' || + _text[text_index_2] == '}') + continue; + + if (_text[text_index_2] == ';') + { + break; + } + + if (_text[text_index_2] == ' ' || + _text[text_index_2] == '\t') + { + if (first_encountered != 2) + { + if (_text[text_index] != '=' && + substr.find("__export .data") == std::string::npos && + !kInStruct && + _text.find("struct") == std::string::npos && + _text.find("extern") == std::string::npos && + _text.find("union") == std::string::npos && + _text.find("typedef") == std::string::npos) + substr += "__export .data "; + } + + ++first_encountered; + + continue; + } + + if (_text[text_index_2] == '=') + { + if (!kInBraces) + { + substr.replace(substr.find("__export .data"), strlen("__export .data"), "__export .page_zero "); + } + + substr += ","; + continue; + } + + kLatestVar += _text[text_index_2]; + substr += _text[text_index_2]; + } + + for (auto& clType : kCompilerTypes) + { + if (substr.find(clType.fName) != std::string::npos) + { + if (substr.find(clType.fName) > substr.find('"')) + continue; + + substr.erase(substr.find(clType.fName), clType.fName.size()); + } + else if (substr.find(clType.fValue) != std::string::npos) + { + if (substr.find(clType.fValue) > substr.find('"')) + continue; + + if (clType.fName == "const") + continue; + + substr.erase(substr.find(clType.fValue), clType.fValue.size()); + } + } + + if (substr.find("struct") != std::string::npos) + { + substr.replace(substr.find("struct"), strlen("struct"), "ldw "); + substr += ", 0"; + } + + if (substr.find("union") != std::string::npos) + { + substr.replace(substr.find("union"), strlen("union"), "ldw "); + substr += ", 0"; + } + + if (substr.find("static") != std::string::npos) + { + substr.replace(substr.find("static"), strlen("static"), "__export .data "); + } + else if (substr.find("extern") != std::string::npos) + { + substr.replace(substr.find("extern"), strlen("extern"), "__import "); + + if (substr.find("__export .data") != std::string::npos) + substr.erase(substr.find("__export .data"), strlen("__export .data")); + } + + auto var_to_find = std::find_if(kCompilerVariables.cbegin(), kCompilerVariables.cend(), [&](detail::CompilerType type) { + return type.fName.find(substr) != std::string::npos; + }); + + std::string reg = kAsmRegisterPrefix; + reg += std::to_string(kRegisterCounter); + + if (var_to_find == kCompilerVariables.cend()) + { + ++kRegisterCounter; + + kState.kStackFrame.push_back({ .fName = substr, .fRegister = reg }); + kCompilerVariables.push_back({ .fName = substr }); + } + + syntax_tree.fUserValue += substr; + kState.fSyntaxTree->fLeafList.push_back(syntax_tree); + + if (_text[text_index] == '=') + break; + } + + // function handler. + + if (_text[text_index] == '(' && + !function_found) + { + std::string substr; + std::string args_buffer; + std::string args; + + bool type_crossed = false; + + for (size_t idx = _text.find('(') + 1; idx < _text.size(); ++idx) + { + if (_text[idx] == ',') + continue; + + if (_text[idx] == ' ') + continue; + + if (_text[idx] == ')') + break; + } + + for (char substr_first_index : _text) + { + args_buffer += substr_first_index; + + if (substr_first_index == ';') + { + args_buffer = args_buffer.erase(0, args_buffer.find('(')); + args_buffer = args_buffer.erase(args_buffer.find(';'), 1); + args_buffer = args_buffer.erase(args_buffer.find(')'), 1); + args_buffer = args_buffer.erase(args_buffer.find('('), 1); + + if (!args_buffer.empty()) + args += "\tldw r6, "; + + std::size_t index = 0UL; + + while (ParserKit::find_word(args_buffer, ",")) + { + std::string register_type = kRegisterPrefix; + register_type += std::to_string(index); + + args_buffer.replace(args_buffer.find(','), 1, "\n\tldw " + register_type + ","); + } + + args += args_buffer; + args += "\n\tjb __import "; + } + } + + for (char _text_i : _text) + { + if (_text_i == '\t' || + _text_i == ' ') + { + if (!type_crossed) + { + substr.clear(); + type_crossed = true; + } + + continue; + } + + if (_text_i == '(') + break; + + substr += _text_i; + } + + if (kInBraces) + { + syntax_tree.fUserValue = args; + + syntax_tree.fUserValue += substr; + + kState.fSyntaxTree->fLeafList.push_back(syntax_tree); + + function_found = true; + } + else + { + syntax_tree.fUserValue.clear(); + + syntax_tree.fUserValue += "__export .text "; + + syntax_tree.fUserValue += substr; + syntax_tree.fUserValue += "\n"; + + kState.fSyntaxTree->fLeafList.push_back(syntax_tree); + + function_found = true; + } + + kCompilerFunctions.push_back(_text); + } + + if (_text[text_index] == 's') + { + if (_text.find("struct") != text_index) + continue; + + if (_text.find(";") == std::string::npos) + kInStruct = true; + } + + if (_text[text_index] == 'u') + { + if (_text.find("union") != text_index) + continue; + + if (_text.find(";") == std::string::npos) + kInStruct = true; + } + + if (_text[text_index] == 'e') + { + if (_text.find("enum") != text_index) + continue; + + if (_text.find(";") == std::string::npos) + kInStruct = true; + } + + if (_text[text_index] == '-' && + _text[text_index+1] == '-') + { + _text = _text.replace(_text.find("--"), strlen("--"), ""); + + for (int _text_i = 0; _text_i < _text.size(); ++_text_i) + { + if (_text[_text_i] == '\t' || + _text[_text_i] == ' ') + _text.erase(_text_i, 1); + } + + syntax_tree.fUserValue += "dec "; + syntax_tree.fUserValue += _text; + + kState.fSyntaxTree->fLeafList.push_back(syntax_tree); + break; + } + + // while loop + if (_text[text_index] == 'w') + { + if (_text.find("while") == std::string::npos) + continue; + + if (_text.find("while") != text_index) + continue; + + syntax_tree.fUserValue = "jrl [r32+0x04]"; + + std::string symbol_loop = "_loop_while_"; + symbol_loop += std::to_string(time_off.raw); + symbol_loop += " "; + + syntax_tree.fUserValue = "beq "; + syntax_tree.fUserValue += kState.kStackFrame[kState.kStackFrame.size() - 2].fRegister; + syntax_tree.fUserValue += ","; + syntax_tree.fUserValue += kState.kStackFrame[kState.kStackFrame.size() - 1].fRegister; + syntax_tree.fUserValue += ", __end%s\njb __continue%s\n__export .text __end%s\njlr\nvoid __export .text __continue%s\njb _L"; + syntax_tree.fUserValue += std::to_string(kBracesCount + 1) + "_" + std::to_string(time_off.raw); + + while (syntax_tree.fUserValue.find("%s") != std::string::npos) + { + syntax_tree.fUserValue.replace(syntax_tree.fUserValue.find("%s"), strlen("%s"), symbol_loop); + } + + kState.fSyntaxTree->fLeafList.push_back(syntax_tree); + + kOnWhileLoop = true; + + break; + } + + if (_text[text_index] == 'f') + { + if (_text.find("for") == std::string::npos) + continue; + + if (_text.find("for") != text_index) + continue; + + syntax_tree.fUserValue = "jrl [r32+0x1]\n"; + + // actually set registers now. + + auto expr = _text.substr(_text.find("for") + strlen("for")); + + kLatestVar.clear(); + + kState.fSyntaxTree->fLeafList.push_back(syntax_tree); + + kOnForLoop = true; + break; + } + + if (_text[text_index] == '+' && + _text[text_index+1] == '+') + { + _text = _text.replace(_text.find("++"), strlen("++"), ""); + + for (int _text_i = 0; _text_i < _text.size(); ++_text_i) + { + if (_text[_text_i] == '\t' || + _text[_text_i] == ' ') + _text.erase(_text_i, 1); + } + + syntax_tree.fUserValue += "add "; + syntax_tree.fUserValue += _text; + + if (syntax_tree.fUserValue.find(";") != std::string::npos) + syntax_tree.fUserValue.erase(syntax_tree.fUserValue.find(";"), 1); + + kState.fSyntaxTree->fLeafList.push_back(syntax_tree); + } + + if (_text[text_index] == '}') + { + kRegisterCounter = kStartUsable; + + --kBracesCount; + + if (kBracesCount < 1) + { + kInBraces = false; + kBracesCount = 0; + } + + if (kInStruct) + kInStruct = false; + + if (!kInBraces) + { + syntax_tree.fUserValue += kAddIfAnyEnd; + + kAddIfAnyEnd.clear(); + + kState.fSyntaxTree->fLeafList.push_back(syntax_tree); + } + else + { + if (kOnWhileLoop || + kOnForLoop) + { + if (kOnForLoop) + kOnForLoop = false; + + if (kOnWhileLoop) + kOnWhileLoop = false; + + std::string symbol_loop = "_loop_for_"; + symbol_loop += std::to_string(time_off.raw); + symbol_loop += " "; + + syntax_tree.fUserValue = "beq "; + syntax_tree.fUserValue += kState.kStackFrame[kState.kStackFrame.size() - 2].fRegister; + syntax_tree.fUserValue += ","; + syntax_tree.fUserValue += kState.kStackFrame[kState.kStackFrame.size() - 1].fRegister; + syntax_tree.fUserValue += ", __end%s\njb __continue%s\n__export .text __end%s\njlr\nvoid __export .text __continue%s\njb _L"; + syntax_tree.fUserValue += std::to_string(kBracesCount + 1) + "_" + std::to_string(time_off.raw); + + while (syntax_tree.fUserValue.find("%s") != std::string::npos) + { + syntax_tree.fUserValue.replace(syntax_tree.fUserValue.find("%s"), strlen("%s"), symbol_loop); + } + + kState.fSyntaxTree->fLeafList.push_back(syntax_tree); + } + else + { + kState.fSyntaxTree->fLeafList.push_back(syntax_tree); + } + } + } + + syntax_tree.fUserValue.clear(); + } + + syntax_tree.fUserValue = "\n"; + kState.fSyntaxTree->fLeafList.push_back(syntax_tree); + + return true; +} + +static bool kShouldHaveBraces = false; +static std::string kFnName; + +std::string CompilerBackendClang::Check(const char* text, const char* file) +{ + std::string err_str; + std::string ln = text; + + if (ln.empty()) + { + return err_str; + } + + bool non_ascii_found = false; + + for (int i = 0; i < ln.size(); ++i) { + if (isalnum(ln[i])) + { + non_ascii_found = true; + break; + } + } + + if (kShouldHaveBraces && + ln.find('{') != std::string::npos) { + kShouldHaveBraces = false; + } + + if (!non_ascii_found) + return err_str; + + size_t string_index = 1UL; + + if (ln.find('\'') != std::string::npos) + { + string_index = ln.find('\'') + 1; + + for (; string_index < ln.size(); ++string_index) + { + if (ln[string_index] == '\'') + { + if (ln[string_index + 1] != ';') + { + ln.erase(string_index, 1); + } + + return err_str; + } + } + } + else if (ln.find('"') != std::string::npos) + { + string_index = ln.find('"') + 1; + + for (; string_index < ln.size(); ++string_index) + { + if (ln[string_index] == '"') + { + if (ln[string_index + 1] != ';') + { + ln.erase(string_index, 1); + } + else + { + break; + } + } + } + } + else if (ln.find('"') == std::string::npos && + ln.find('\'') == std::string::npos) + { + std::vector forbidden_words; + + forbidden_words.push_back("\\"); + forbidden_words.push_back("?"); + forbidden_words.push_back("@"); + forbidden_words.push_back("~"); + forbidden_words.push_back("::"); + forbidden_words.push_back("/*"); + forbidden_words.push_back("*/"); + + // add them to avoid stupid mistakes. + forbidden_words.push_back("namespace"); + forbidden_words.push_back("class"); + forbidden_words.push_back("extern \"C\""); + + for (auto& forbidden : forbidden_words) + { + if (ParserKit::find_word(ln, forbidden)) + { + err_str += "\nForbidden character detected: "; + err_str += forbidden; + + return err_str; + } + } + } + + struct CompilerVariableRange final + { + std::string fBegin; + std::string fEnd; + }; + + const std::vector variables_list = { + { .fBegin = "static ", .fEnd = "="}, + { .fBegin = "=", .fEnd = ";"}, + { .fBegin = "if(", .fEnd = "="}, + { .fBegin = "if (", .fEnd = "="}, + { .fBegin = "if(", .fEnd = "<"}, + { .fBegin = "if (", .fEnd = "<"}, + { .fBegin = "if(", .fEnd = ">"}, + { .fBegin = "if (", .fEnd = ">"}, + { .fBegin = "if(", .fEnd = ")"}, + { .fBegin = "if (", .fEnd = ")"}, + + { .fBegin = "else(", .fEnd = "="}, + { .fBegin = "else (", .fEnd = "="}, + { .fBegin = "else(", .fEnd = "<"}, + { .fBegin = "else (", .fEnd = "<"}, + { .fBegin = "else(", .fEnd = ">"}, + { .fBegin = "else (", .fEnd = ">"}, + { .fBegin = "else(", .fEnd = ")"}, + { .fBegin = "else (", .fEnd = ")"}, + }; + + for (auto& variable : variables_list) + { + if (ln.find(variable.fBegin) != std::string::npos) + { + string_index = ln.find(variable.fBegin) + variable.fBegin.size(); + + while (ln[string_index] == ' ') + ++string_index; + + std::string keyword; + + for (; string_index < ln.size(); ++string_index) + { + if (ln[string_index] == variable.fEnd[0]) + { + std::string varname = ""; + + for (size_t index_keyword = ln.find(' '); ln[index_keyword] != variable.fBegin[0]; + ++index_keyword) + { + if (ln[index_keyword] == ' ') + { + continue; + } + + if (isdigit(ln[index_keyword])) + { + goto cc_next_loop; + } + + varname += ln[index_keyword]; + } + + if (varname.find(' ') != std::string::npos) + { + varname.erase(0, varname.find(' ')); + + if (variable.fBegin == "extern") + { + varname.erase(0, varname.find(' ')); + } + } + + std::string reg = kAsmRegisterPrefix; + reg += std::to_string(kRegisterCounter); + + kCompilerVariables.push_back({ .fValue = varname }); + goto cc_check_done; + } + + keyword.push_back(ln[string_index]); + } + + goto cc_next_loop; + + cc_check_done: + + // skip digit value. + if (isdigit(keyword[0]) || + keyword[0] == '"') + { + goto cc_next_loop; + } + + while (keyword.find(' ') != std::string::npos) + keyword.erase(keyword.find(' '), 1); + + for (auto& var : kCompilerVariables) + { + if (var.fValue.find(keyword) != std::string::npos) + { + err_str.clear(); + goto cc_next; + } + } + + for (auto& fn : kCompilerFunctions) + { + if (fn.find(keyword[0]) != std::string::npos) + { + auto where_begin = fn.find(keyword[0]); + auto keyword_begin = 0UL; + auto failed = false; + + for (; where_begin < keyword.size(); ++where_begin) + { + if (fn[where_begin] == '(' && + keyword[keyword_begin] == '(') + break; + + if (fn[where_begin] != keyword[keyword_begin]) + { + failed = true; + break; + } + + ++keyword_begin; + } + + if (!failed) + { + err_str.clear(); + goto cc_next; + } + else + { + continue; + } + } + } + +cc_error_value: + if (keyword.find("->") != std::string::npos) + return err_str; + + if (keyword.find(".") != std::string::npos) + return err_str; + + + if (isalnum(keyword[0])) + err_str += "\nUndefined value: " + keyword; + + return err_str; + } + +cc_next_loop: + continue; + } + +cc_next: + + // extern doesnt declare anything, it imports a variable. + // so that's why it's not declare upper. + if (ParserKit::find_word(ln, "extern")) + { + auto substr = ln.substr(ln.find("extern") + strlen("extern")); + kCompilerVariables.push_back({ .fValue = substr }); + } + + if (kShouldHaveBraces && + ln.find('{') == std::string::npos) + { + err_str += "Missing '{' for function "; + err_str += kFnName; + err_str += "\n"; + + kShouldHaveBraces = false; + kFnName.clear(); + } + else if (kShouldHaveBraces && + ln.find('{') != std::string::npos) + { + kShouldHaveBraces = false; + kFnName.clear(); + } + + bool type_not_found = true; + + if (ln.find('\'') != std::string::npos) + { + ln.replace(ln.find('\''), 3, "0"); + } + + auto first = ln.find('"'); + if (first != std::string::npos) + { + auto second = 0UL; + bool found_second_quote = false; + + for (size_t i = first + 1; i < ln.size(); ++i) + { + if (ln[i] == '\"') + { + found_second_quote = true; + second = i; + + break; + } + } + + if (!found_second_quote) + { + err_str += "Missing terminating \"."; + err_str += " here -> " + ln.substr(ln.find('"'), second); + } + } + + if (ln.find(')') != std::string::npos && + ln.find(';') == std::string::npos) + { + if (ln.find('{') == std::string::npos) + { + kFnName = ln; + kShouldHaveBraces = true; + + goto skip_braces_check; + } + else if (ln.find('{') != std::string::npos) + { + kShouldHaveBraces = false; + } + } + +skip_braces_check: + + for (auto& key : kCompilerTypes) + { + if (ParserKit::find_word(ln, key.fName)) + { + if (isdigit(ln[ln.find(key.fName) + key.fName.size() + 1])) + { + err_str += "\nNumber cannot be set for "; + err_str += key.fName; + err_str += "'s name. here -> "; + err_str += ln; + } + + if (ln.find(key.fName) == 0 || + ln[ln.find(key.fName) - 1] == ' ' || + ln[ln.find(key.fName) - 1] == '\t') + { + type_not_found = false; + + if (ln[ln.find(key.fName) + key.fName.size()] != ' ') + { + type_not_found = true; + + if (ln[ln.find(key.fName) + key.fName.size()] == '\t') + type_not_found = false; + + goto next; + } + else if (ln[ln.find(key.fName) + key.fName.size()] != '\t') + { + type_not_found = true; + + if (ln[ln.find(key.fName) + key.fName.size()] == ' ') + type_not_found = false; + + } + } + +next: + + if (key.fName != "struct" || + key.fName != "enum" || + key.fName != "union") + { + if (ln.find(';') == std::string::npos) + { + if (ln.find('(') != std::string::npos) + { + if (ln.find('=') == std::string::npos) + continue; + } + + err_str += "\nMissing ';', here -> "; + err_str += ln; + } + else + { + continue; + } + + if (ln.find('=') != std::string::npos) + { + if (ln.find('(') != std::string::npos) + { + if (ln.find(')') == std::string::npos) + { + err_str += "\nMissing ')', after '(' here -> "; + err_str += ln.substr(ln.find('(')); + } + } + } + } + } + } + + if (kInBraces && + ln.find("struct") != std::string::npos && + ln.find("union") != std::string::npos && + ln.find("enum") != std::string::npos && + ln.find('=') != std::string::npos) + { + if (ln.find(';') == std::string::npos) + { + err_str += "\nMissing ';' after struct/union/enum declaration, here -> "; + err_str += ln; + } + } + + if (ln.find(';') != std::string::npos && + ln.find("for") == std::string::npos) + { + if (ln.find(';') + 1 != ln.size()) + { + for (int i = 0; i < ln.substr(ln.find(';') + 1).size(); ++i) + { + if ((ln.substr(ln.find(';') + 1)[i] != ' ') || + (ln.substr(ln.find(';') + 1)[i] != '\t')) + { + if (auto err = this->Check(ln.substr(ln.find(';') + 1).c_str(), file); + !err.empty()) + { + err_str += "\nUnexpected text after ';' -> "; + err_str += ln.substr(ln.find(';')); + err_str += err; + } + } + } + } + } + + if (ln.find('(') != std::string::npos) + { + if (ln.find(';') == std::string::npos && + !ParserKit::find_word(ln, "|") && + !ParserKit::find_word(ln, "||") && + !ParserKit::find_word(ln, "&") && + !ParserKit::find_word(ln, "&&") && + !ParserKit::find_word(ln, "~")) + { + bool found_func = false; + size_t i = ln.find('('); + std::vector opens; + std::vector closes; + + for (; i < ln.size(); ++i) + { + if (ln[i] == ')') + { + closes.push_back(1); + } + + if (ln[i] == '(') + { + opens.push_back(1); + } + } + + if (closes.size() != opens.size()) + err_str += "Unterminated (), here -> " + ln; + + bool space_found = false; + + for (int i = 0; i < ln.size(); ++i) + { + if (ln[i] == ')' && + !space_found) + { + space_found = true; + continue; + } + + if (space_found) + { + if (ln[i] == ' ' && + isalnum(ln[i+1])) + { + err_str += "\nBad function format here -> "; + err_str += ln; + } + } + } + } + + if (ln.find('(') < 1) + { + err_str += "\nMissing identifier before '(' here -> "; + err_str += ln; + } + else + { + if (type_not_found && + ln.find(';') == std::string::npos && + ln.find("if") == std::string::npos && + ln.find("while") == std::string::npos && + ln.find("for") == std::string::npos && + ln.find("static") == std::string::npos && + ln.find("inline") == std::string::npos && + ln.find("|") == std::string::npos && + ln.find("&") == std::string::npos && + ln.find("(") == std::string::npos && + ln.find(")") == std::string::npos) + { + err_str += "\n Missing ';' or type, here -> "; + err_str += ln; + } + } + + if (ln.find(')') == std::string::npos) + { + err_str += "\nMissing ')', after '(' here -> "; + err_str += ln.substr(ln.find('(')); + } + } + else + { + if (ln.find("for") != std::string::npos || + ln.find("while") != std::string::npos) + { + err_str += "\nMissing '(', after \"for\", here -> "; + err_str += ln; + } + } + + if (ln.find('}') != std::string::npos && + !kInBraces) + { + if (!kInStruct && + ln.find(';') == std::string::npos) + { + err_str += "\nMismatched '}', here -> "; + err_str += ln; + } + } + + if (!ln.empty()) + { + if (ln.find(';') == std::string::npos && + ln.find("struct") == std::string::npos && + ln.find("enum") == std::string::npos && + ln.find("union") == std::string::npos && + ln.find("for") == std::string::npos && + ln.find("while") == std::string::npos && + ln.find('{') == std::string::npos && + ln.find('}') == std::string::npos && + ln.find(')') == std::string::npos && + ln.find('(') == std::string::npos && + ln.find(',') == std::string::npos && + ln.find("typedef") == std::string::npos) + { + if (ln.size() <= 2) + return err_str; + + err_str += "\nMissing ';', here -> "; + err_str += ln; + } + } + + return err_str; +} + +///////////////////////////////////////////////////////////////////////////////////////// + +/** + * @brief C To Assembly mount-point. + */ + +///////////////////////////////////////////////////////////////////////////////////////// + +class AssemblyMountpointClang final : public CxxKit::AssemblyMountpoint +{ +public: + explicit AssemblyMountpointClang() = default; + ~AssemblyMountpointClang() override = default; + + CXXKIT_COPY_DEFAULT(AssemblyMountpointClang); + + [[maybe_unused]] static Int32 Arch() noexcept { return CxxKit::AssemblyFactory::kArchRISCV; } + + Int32 CompileToFormat(CxxKit::StringView& src, Int32 arch) override + { + if (arch != AssemblyMountpointClang::Arch()) + return -1; + + if (kCompilerBackend == nullptr) + return -1; + + /* @brief copy contents wihtout extension */ + std::string src_file = src.CData(); + std::ifstream src_fp = std::ifstream(src_file, std::ios::in); + std::string dest; + + for (auto& ch : src_file) + { + if (ch == '.') + { + break; + } + + dest += ch; + } + + /* According to pef abi. */ + dest += kAsmFileExt64x0; + + kState.fOutputAssembly = std::make_unique(dest); + + auto fmt = CxxKit::current_date(); + + (*kState.fOutputAssembly) << "# Path: " << src_file << "\n"; + (*kState.fOutputAssembly) << "# Language: MP-UX Assembly\n"; + (*kState.fOutputAssembly) << "# Build Date: " << fmt << "\n\n"; + + ParserKit::SyntaxLeafList syntax; + + kState.fSyntaxTreeList.push_back(syntax); + kState.fSyntaxTree = &kState.fSyntaxTreeList[kState.fSyntaxTreeList.size() - 1]; + + std::string line_src; + + while (std::getline(src_fp, line_src)) + { + if (auto err = kCompilerBackend->Check(line_src.c_str(), src.CData()); + err.empty()) + { + kCompilerBackend->Compile(line_src.c_str(), src.CData()); + } + else + { + detail::print_error(err, src.CData()); + } + } + + if (kAcceptableErrors > 0) + return -1; + + std::vector keywords = { "ldw", "stw", "lda", "sta", "add", "dec", "mv"}; + + for (auto& leaf : kState.fSyntaxTree->fLeafList) + { + for (auto& keyword : keywords) + { + if (ParserKit::find_word(leaf.fUserValue, keyword)) + { + std::size_t cnt = 0UL; + + for (auto & reg : kState.kStackFrame) + { + std::string needle; + + for (size_t i = 0; i < reg.fName.size(); i++) + { + if (reg.fName[i] == ' ') + { + ++i; + + for (; i < reg.fName.size(); i++) + { + if (reg.fName[i] == ',') + { + break; + } + + if (reg.fName[i] == ' ') + continue; + + needle += reg.fName[i]; + } + + break; + } + } + + if (ParserKit::find_word(leaf.fUserValue, needle)) + { + leaf.fUserValue.replace(leaf.fUserValue.find(needle), + needle.size(), reg.fRegister); + + if (leaf.fUserValue.find("__import") != std::string::npos) + { + if (leaf.fUserValue.find("__import") < leaf.fUserValue.find(needle)) + { + leaf.fUserValue.erase(leaf.fUserValue.find("__import"), strlen("__import")); + } + } + + ++cnt; + } + } + + if (cnt > 1 && keyword != "mv" && keyword != "add" && keyword != "dec") + { + leaf.fUserValue.replace(leaf.fUserValue.find(keyword), keyword.size(), "mv"); + } + } + } + } + + for (auto& leaf : kState.fSyntaxTree->fLeafList) + { + (*kState.fOutputAssembly) << leaf.fUserValue; + } + + kState.fSyntaxTree = nullptr; + + kState.fOutputAssembly->flush(); + kState.fOutputAssembly.reset(); + + return kOk; + } + +}; + +///////////////////////////////////////////////////////////////////////////////////////// + +#define kPrintF printf +#define kSplashCxx() kPrintF(kWhite "%s\n", "cc, v1.14, (c) Western Company") + +static void cc_print_help() +{ + kSplashCxx(); + + kPrintF(kWhite "--asm={MACHINE}: %s\n", "Compile with a specific syntax. (64x0, 32x0)"); + kPrintF(kWhite "--compiler={COMPILER}: %s\n", "Select compiler engine (builtin -> dolvik)."); +} + +///////////////////////////////////////////////////////////////////////////////////////// + +#define kExt ".c" + +int main(int argc, char** argv) +{ + kCompilerTypes.push_back({ .fName = "void", .fValue = "void" }); + kCompilerTypes.push_back({ .fName = "char", .fValue = "byte" }); + kCompilerTypes.push_back({ .fName = "short", .fValue = "hword" }); + kCompilerTypes.push_back({ .fName = "int", .fValue = "dword" }); + kCompilerTypes.push_back({ .fName = "long", .fValue = "qword" }); + + bool skip = false; + + for (auto index = 1UL; index < argc; ++index) + { + if (skip) + { + skip = false; + continue; + } + + if (argv[index][0] == '-') + { + if (strcmp(argv[index], "-v") == 0 || + strcmp(argv[index], "--version") == 0) + { + kSplashCxx(); + return kOk; + } + + if (strcmp(argv[index], "-verbose") == 0) + { + kState.kVerbose = true; + + continue; + } + + if (strcmp(argv[index], "-h") == 0 || + strcmp(argv[index], "--help") == 0) + { + cc_print_help(); + + return kOk; + } + + if (strcmp(argv[index], "--dialect") == 0) + { + if (kCompilerBackend) + std::cout << kCompilerBackend->Language() << "\n"; + + return kOk; + } + + if (strcmp(argv[index], "--asm=masm") == 0) + { + delete kFactory.Unmount(); + + kFactory.Mount(new AssemblyMountpointClang()); + kMachine = CxxKit::AssemblyFactory::kArchRISCV; + + continue; + } + + if (strcmp(argv[index], "--compiler=dolvik") == 0) + { + if (!kCompilerBackend) + kCompilerBackend = new CompilerBackendClang(); + + continue; + } + + if (strcmp(argv[index], "-fmax-exceptions") == 0) + { + try + { + kErrorLimit = std::strtol(argv[index + 1], nullptr, 10); + } + // catch anything here + catch (...) + { + kErrorLimit = 0; + } + + skip = true; + + continue; + } + + std::string err = "Unknown command: "; + err += argv[index]; + + detail::print_error(err, "cc"); + + continue; + } + + kFileList.emplace_back(argv[index]); + + CxxKit::StringView srcFile = CxxKit::StringBuilder::Construct(argv[index]); + + if (strstr(argv[index], kExt) == nullptr) + { + if (kState.kVerbose) + { + std::cerr << argv[index] << " is not a valid C line_src.\n"; + } + + return -1; + } + + if (kFactory.Compile(srcFile, kMachine) != kOk) + return -1; + } + + return kOk; +} diff --git a/CompilerDriver/cc.cxx b/CompilerDriver/cc.cxx deleted file mode 100644 index 0a8647d..0000000 --- a/CompilerDriver/cc.cxx +++ /dev/null @@ -1,1883 +0,0 @@ -/* - * ======================================================== - * - * cc - * Copyright Western Company, all rights reserved. - * - * ======================================================== - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#define kOk 0 - -/* Optimized C driver */ -/* This is part of MP-UX C SDK. */ -/* (c) Western Company */ - -///////////////////// - -// ANSI ESCAPE CODES - -///////////////////// - -#define kBlank "\e[0;30m" -#define kRed "\e[0;31m" -#define kWhite "\e[0;97m" - -///////////////////////////////////// - -// INTERNAL STUFF OF THE C COMPILER - -///////////////////////////////////// - -namespace detail -{ - struct CompilerRegisterMap - { - std::string fName; - std::string fRegister; - }; - - struct CompilerState - { - std::vector fSyntaxTreeList; - std::vector kStackFrame; - ParserKit::SyntaxLeafList* fSyntaxTree{ nullptr }; - std::unique_ptr fOutputAssembly; - std::string fLastFile; - std::string fLastError; - bool kVerbose; - }; -} - -static detail::CompilerState kState; -static SizeType kErrorLimit = 100; - -static Int32 kAcceptableErrors = 0; - -namespace detail -{ - void print_error(std::string reason, std::string file) noexcept - { - if (reason[0] == '\n') - reason.erase(0, 1); - - if (file.find(".pp") != std::string::npos) - { - file.erase(file.find(".pp"), 3); - } - - if (kState.fLastFile != file) - { - std::cout << kRed << "[ cc ] " << kWhite << ((file == "cc") ? "internal compiler error " : ("in file, " + file)) << kBlank << std::endl; - std::cout << kRed << "[ cc ] " << kWhite << reason << kBlank << std::endl; - - kState.fLastFile = file; - } - else - { - std::cout << kRed << "[ cc ] [ " << kState.fLastFile << " ] " << kWhite << reason << kBlank << std::endl; - } - - if (kAcceptableErrors > kErrorLimit) - std::exit(3); - - ++kAcceptableErrors; - } - - struct CompilerType - { - std::string fName; - std::string fValue; - }; -} - -///////////////////////////////////////////////////////////////////////////////////////// - -// Target architecture. -static int kMachine = 0; - -///////////////////////////////////////// - -// REGISTERS ACCORDING TO USED ASSEMBLER - -///////////////////////////////////////// - -static size_t kRegisterCnt = kAsmRegisterLimit; -static size_t kStartUsable = 6; -static size_t kUsableLimit = 14; -static size_t kRegisterCounter = kStartUsable; -static std::string kRegisterPrefix = kAsmRegisterPrefix; - -///////////////////////////////////////// - -// COMPILER PARSING UTILITIES/STATES. - -///////////////////////////////////////// - -static std::vector kFileList; -static CxxKit::AssemblyFactory kFactory; -static bool kInStruct = false; -static bool kOnWhileLoop = false; -static bool kOnForLoop = false; -static bool kInBraces = false; -static size_t kBracesCount = 0UL; - -/* @brief C compiler backend for Optimized C */ -class CompilerBackendClang final : public ParserKit::CompilerBackend -{ -public: - explicit CompilerBackendClang() = default; - ~CompilerBackendClang() override = default; - - CXXKIT_COPY_DEFAULT(CompilerBackendClang); - - std::string Check(const char* text, const char* file); - bool Compile(const std::string& text, const char* file) override; - - const char* Language() override { return "Optimized 64x0 C"; } - -}; - -static CompilerBackendClang* kCompilerBackend = nullptr; -static std::vector kCompilerVariables; -static std::vector kCompilerFunctions; -static std::vector kCompilerTypes; - -// @brief this hook code before the begin/end command. -static std::string kAddIfAnyBegin; -static std::string kAddIfAnyEnd; -static std::string kLatestVar; - -// \brief parse a function call -static std::string cc_parse_function_call(std::string& _text) -{ - if (_text[0] == '(') - { - std::string substr; - std::string args_buffer; - std::string args; - - bool type_crossed = false; - - for (char substr_first_index: _text) - { - args_buffer += substr_first_index; - - if (substr_first_index == ';') - { - args_buffer = args_buffer.erase(0, args_buffer.find('(')); - args_buffer = args_buffer.erase(args_buffer.find(';'), 1); - args_buffer = args_buffer.erase(args_buffer.find(')'), 1); - args_buffer = args_buffer.erase(args_buffer.find('('), 1); - - if (!args_buffer.empty()) - args += "\tpsh "; - - while (args_buffer.find(',') != std::string::npos) - { - args_buffer.replace(args_buffer.find(','), 1, "\n\tpsh "); - } - - args += args_buffer; - args += "\n\tjb __import "; - } - } - - return args; - } - - return ""; -} - -namespace detail -{ - union number_cast - { - number_cast(UInt64 raw) - : raw(raw) - {} - - char number[8]; - UInt64 raw; - }; -} - -///////////////////////////////////////////////////////////////////////////////////////// - -// @name Compile -// @brief Generate MASM from a C assignement. - -///////////////////////////////////////////////////////////////////////////////////////// - -bool CompilerBackendClang::Compile(const std::string& text, const char* file) -{ - std::string _text = text; - - auto syntax_tree = ParserKit::SyntaxLeafList::SyntaxLeaf(); - bool type_found = false; - bool function_found = false; - - // start parsing - for (size_t text_index = 0; text_index < _text.size(); ++text_index) - { - uuid_t out{0}; - - uuid_generate_random(out); - detail::number_cast time_off = (UInt64)out; - - if (!type_found) - { - auto substr = _text.substr(text_index); - std::string match_type; - - for (size_t y = 0; y < substr.size(); ++y) - { - if (substr[y] == ' ') - { - while (match_type.find(' ') != std::string::npos) { - match_type.erase(match_type.find(' ')); - } - - for (auto& clType : kCompilerTypes) - { - if (clType.fName == match_type) - { - match_type.clear(); - - std::string buf; - - buf += clType.fValue; - buf += ' '; - - if (clType.fName == "struct" || - clType.fName == "union") - { - for (size_t a = y + 1; a < substr.size(); a++) - { - if (substr[a] == ' ') - { - break; - } - - if (substr[a] == '\n') - break; - - buf += substr[a]; - } - } - - if (substr.find('=') != std::string::npos) - { - break; - } - - if (_text.find('(') != std::string::npos) - { - syntax_tree.fUserValue = buf; - - kState.fSyntaxTree->fLeafList.push_back(syntax_tree); - } - - type_found = true; - break; - } - } - - break; - } - - match_type += substr[y]; - } - } - - if (_text[text_index] == '{') - { - if (kInStruct) - { - continue; - } - - kInBraces = true; - ++kBracesCount; - - if (kOnWhileLoop || - kOnForLoop) - { - syntax_tree.fUserValue = "void __export .text _L"; - syntax_tree.fUserValue += std::to_string(kBracesCount) + "_" + std::to_string(time_off.raw); - } - - kState.fSyntaxTree->fLeafList.push_back(syntax_tree); - } - - // return keyword handler - if (_text[text_index] == 'r') - { - std::string return_keyword; - return_keyword += "return"; - - std::size_t index = 0UL; - - std::string value; - - for (size_t return_index = text_index; return_index < _text.size(); ++return_index) - { - if (_text[return_index] != return_keyword[index]) - { - for (size_t value_index = return_index; value_index < _text.size(); ++value_index) - { - if (_text[value_index] == ';') - break; - - value += _text[value_index]; - } - - break; - } - - ++index; - } - - if (index == return_keyword.size()) - { - if (!value.empty()) - { - if (value.find('(') != std::string::npos) - { - value.erase(value.find('(')); - } - - if (!isdigit(value[value.find('(') + 2])) - { - std::string tmp = value; - bool reg_to_reg = false; - - value.clear(); - - value += " __import"; - value += tmp; - } - - syntax_tree.fUserValue = "\tldw r19, "; - - // make it pretty. - if (value.find('\t') != std::string::npos) - value.erase(value.find('\t'), 1); - - syntax_tree.fUserValue += value + "\n"; - } - - syntax_tree.fUserValue += "\tjlr"; - - kState.fSyntaxTree->fLeafList.push_back(syntax_tree); - - break; - } - } - - if (_text[text_index] == 'i' && - _text[text_index + 1] == 'f') - { - std::string format = "ldw r15, %s\nldw r16, %s2\n"; - std::string expr = format; - - if (ParserKit::find_word(_text, "==")) - { - expr += "\nbeq"; - } - - if (ParserKit::find_word(_text, "!=")) - { - expr += "\nbneq"; - } - - if (ParserKit::find_word(_text, ">=")) - { - expr += "\nbge"; - } - else if (ParserKit::find_word(_text, ">")) - { - expr += "\nbg"; - } - - if (ParserKit::find_word(_text, "<=")) - { - expr += "\nble"; - } - else if (ParserKit::find_word(_text, "<")) - { - expr += "\nbl"; - } - - std::string substr = expr; - - std::string buf; - - for (size_t text_index_2 = (_text.find("if") + std::string("if").size()); text_index_2 < _text.size(); ++text_index_2) - { - if (_text[text_index_2] == ';') - { - buf.clear(); - - for (size_t text_index_3 = text_index_2 + 1; text_index_3 < _text.size(); text_index_3++) - { - if (_text[text_index_3] == '{') - continue; - - if (_text[text_index_3] == '}') - continue; - - if (_text[text_index_3] == ' ') - continue; - - if (_text[text_index_3] == '=') - continue; - - if (_text[text_index_3] == '<' && - _text[text_index_3+1] == '=' || - _text[text_index_3] == '=' && - _text[text_index_3+1] == '=' || - _text[text_index_3] == '>' && - _text[text_index_3+1] == '=' || - _text[text_index_3] == '>' || - _text[text_index_3] == '<' && - _text[text_index_3+1] == '=' || - _text[text_index_3] == '!') - { - buf += ", "; - continue; - } - else if (_text[text_index_3] == '=') - { - continue; - } - - buf += _text[text_index_3]; - } - - break; - } - - if (_text[text_index_2] == '{') - continue; - - if (_text[text_index_2] == '}') - continue; - - if (_text[text_index_2] == '<' && - _text[text_index_2+1] == '=' || - _text[text_index_2] == '=' && - _text[text_index_2+1] == '=' || - _text[text_index_2] == '>' && - _text[text_index_2+1] == '=' || - _text[text_index_2] == '>' || - _text[text_index_2] == '<' && - _text[text_index_2+1] == '=' || - _text[text_index_2] == '!') - { - buf += ", "; - continue; - } - else if (_text[text_index_2] == '=') - { - continue; - } - - buf += _text[text_index_2]; - } - - if (buf.find(",") == std::string::npos && - buf.find("(") != std::string::npos && - buf.find(")") != std::string::npos ) - { - - std::string cond = buf.substr(buf.find("(") + 1, buf.find(")") - 1); - cond.erase(cond.find("(")); - - std::string cond2 = buf.substr(buf.find("(") + 1, buf.find(")") - 1); - cond2.erase(cond2.find(")")); - - substr.replace(substr.find("%s"), 2, cond); - substr.replace(substr.find("%s2"), 3, cond2); - - buf.replace(buf.find(cond), cond.size(), "r15"); - buf.replace(buf.find(cond2), cond2.size(), "r16"); - - substr += buf; - - syntax_tree.fUserValue = substr + "\n"; - - kState.fSyntaxTree->fLeafList.push_back(syntax_tree); - - break; - } - else - { - continue; - } - - // dealing with pointer - if (buf.find("*") != std::string::npos) - { - buf.erase(buf.find("*"), 1); - } - - std::string cond = buf.substr(buf.find("(") + 1, buf.find(",") - 1); - cond.erase(cond.find(",")); - - std::string cond2 = buf.substr(buf.find(",") + 1, buf.find(")") - 1); - cond2.erase(cond2.find(")")); - - substr.replace(substr.find("%s"), 2, cond); - substr.replace(substr.find("%s2"), 3, cond2); - - buf.replace(buf.find(cond), cond.size(), "r15"); - buf.replace(buf.find(cond2), cond2.size(), "r16"); - - substr += buf; - - syntax_tree.fUserValue = substr + "\n"; - - kState.fSyntaxTree->fLeafList.push_back(syntax_tree); - - break; - } - - // Parse expressions and instructions here. - // what does this mean? - // we encounter an assignment, or we reached the end of an expression. - if (_text[text_index] == '=' || - _text[text_index] == ';') - { - if (function_found) - continue; - - if (_text[text_index] == ';' && - kInStruct) - continue; - - if (_text.find("typedef ") != std::string::npos) - continue; - - if (_text[text_index] == '=' && - kInStruct) - { - continue; - } - - if (_text[text_index+1] == '=' || - _text[text_index-1] == '!' || - _text[text_index-1] == '<' || - _text[text_index-1] == '>') - { - continue; - } - - std::string substr; - - if (_text.find('=') != std::string::npos && - kInBraces) - { - if (_text.find("*") != std::string::npos) - { - if (_text.find("=") > _text.find("*")) - substr += "\tlda "; - else - substr += "\tldw "; - } - else - { - substr += "\tldw "; - } - } - else if (_text.find('=') != std::string::npos && - !kInBraces) - { - substr += "stw __export .data "; - } - - int first_encountered = 0; - - std::string str_name; - - for (size_t text_index_2 = 0; text_index_2 < _text.size(); ++text_index_2) - { - if (_text[text_index_2] == '\"') - { - ++text_index_2; - - // want to add this, so that the parser recognizes that this is a string. - substr += '"'; - - for (; text_index_2 < _text.size(); ++text_index_2) - { - if (_text[text_index_2] == '\"') - break; - - kLatestVar += _text[text_index_2]; - substr += _text[text_index_2]; - } - } - - if (_text[text_index_2] == '{' || - _text[text_index_2] == '}') - continue; - - if (_text[text_index_2] == ';') - { - break; - } - - if (_text[text_index_2] == ' ' || - _text[text_index_2] == '\t') - { - if (first_encountered != 2) - { - if (_text[text_index] != '=' && - substr.find("__export .data") == std::string::npos && - !kInStruct && - _text.find("struct") == std::string::npos && - _text.find("extern") == std::string::npos && - _text.find("union") == std::string::npos && - _text.find("typedef") == std::string::npos) - substr += "__export .data "; - } - - ++first_encountered; - - continue; - } - - if (_text[text_index_2] == '=') - { - if (!kInBraces) - { - substr.replace(substr.find("__export .data"), strlen("__export .data"), "__export .page_zero "); - } - - substr += ","; - continue; - } - - kLatestVar += _text[text_index_2]; - substr += _text[text_index_2]; - } - - for (auto& clType : kCompilerTypes) - { - if (substr.find(clType.fName) != std::string::npos) - { - if (substr.find(clType.fName) > substr.find('"')) - continue; - - substr.erase(substr.find(clType.fName), clType.fName.size()); - } - else if (substr.find(clType.fValue) != std::string::npos) - { - if (substr.find(clType.fValue) > substr.find('"')) - continue; - - if (clType.fName == "const") - continue; - - substr.erase(substr.find(clType.fValue), clType.fValue.size()); - } - } - - if (substr.find("struct") != std::string::npos) - { - substr.replace(substr.find("struct"), strlen("struct"), "ldw "); - substr += ", 0"; - } - - if (substr.find("union") != std::string::npos) - { - substr.replace(substr.find("union"), strlen("union"), "ldw "); - substr += ", 0"; - } - - if (substr.find("static") != std::string::npos) - { - substr.replace(substr.find("static"), strlen("static"), "__export .data "); - } - else if (substr.find("extern") != std::string::npos) - { - substr.replace(substr.find("extern"), strlen("extern"), "__import "); - - if (substr.find("__export .data") != std::string::npos) - substr.erase(substr.find("__export .data"), strlen("__export .data")); - } - - auto var_to_find = std::find_if(kCompilerVariables.cbegin(), kCompilerVariables.cend(), [&](detail::CompilerType type) { - return type.fName.find(substr) != std::string::npos; - }); - - std::string reg = kAsmRegisterPrefix; - reg += std::to_string(kRegisterCounter); - - if (var_to_find == kCompilerVariables.cend()) - { - ++kRegisterCounter; - - kState.kStackFrame.push_back({ .fName = substr, .fRegister = reg }); - kCompilerVariables.push_back({ .fName = substr }); - } - - syntax_tree.fUserValue += substr; - kState.fSyntaxTree->fLeafList.push_back(syntax_tree); - - if (_text[text_index] == '=') - break; - } - - // function handler. - - if (_text[text_index] == '(' && - !function_found) - { - std::string substr; - std::string args_buffer; - std::string args; - - bool type_crossed = false; - - for (size_t idx = _text.find('(') + 1; idx < _text.size(); ++idx) - { - if (_text[idx] == ',') - continue; - - if (_text[idx] == ' ') - continue; - - if (_text[idx] == ')') - break; - } - - for (char substr_first_index : _text) - { - args_buffer += substr_first_index; - - if (substr_first_index == ';') - { - args_buffer = args_buffer.erase(0, args_buffer.find('(')); - args_buffer = args_buffer.erase(args_buffer.find(';'), 1); - args_buffer = args_buffer.erase(args_buffer.find(')'), 1); - args_buffer = args_buffer.erase(args_buffer.find('('), 1); - - if (!args_buffer.empty()) - args += "\tldw r6, "; - - std::size_t index = 0UL; - - while (ParserKit::find_word(args_buffer, ",")) - { - std::string register_type = kRegisterPrefix; - register_type += std::to_string(index); - - args_buffer.replace(args_buffer.find(','), 1, "\n\tldw " + register_type + ","); - } - - args += args_buffer; - args += "\n\tjb __import "; - } - } - - for (char _text_i : _text) - { - if (_text_i == '\t' || - _text_i == ' ') - { - if (!type_crossed) - { - substr.clear(); - type_crossed = true; - } - - continue; - } - - if (_text_i == '(') - break; - - substr += _text_i; - } - - if (kInBraces) - { - syntax_tree.fUserValue = args; - - syntax_tree.fUserValue += substr; - - kState.fSyntaxTree->fLeafList.push_back(syntax_tree); - - function_found = true; - } - else - { - syntax_tree.fUserValue.clear(); - - syntax_tree.fUserValue += "__export .text "; - - syntax_tree.fUserValue += substr; - syntax_tree.fUserValue += "\n"; - - kState.fSyntaxTree->fLeafList.push_back(syntax_tree); - - function_found = true; - } - - kCompilerFunctions.push_back(_text); - } - - if (_text[text_index] == 's') - { - if (_text.find("struct") != text_index) - continue; - - if (_text.find(";") == std::string::npos) - kInStruct = true; - } - - if (_text[text_index] == 'u') - { - if (_text.find("union") != text_index) - continue; - - if (_text.find(";") == std::string::npos) - kInStruct = true; - } - - if (_text[text_index] == 'e') - { - if (_text.find("enum") != text_index) - continue; - - if (_text.find(";") == std::string::npos) - kInStruct = true; - } - - if (_text[text_index] == '-' && - _text[text_index+1] == '-') - { - _text = _text.replace(_text.find("--"), strlen("--"), ""); - - for (int _text_i = 0; _text_i < _text.size(); ++_text_i) - { - if (_text[_text_i] == '\t' || - _text[_text_i] == ' ') - _text.erase(_text_i, 1); - } - - syntax_tree.fUserValue += "dec "; - syntax_tree.fUserValue += _text; - - kState.fSyntaxTree->fLeafList.push_back(syntax_tree); - break; - } - - // while loop - if (_text[text_index] == 'w') - { - if (_text.find("while") == std::string::npos) - continue; - - if (_text.find("while") != text_index) - continue; - - syntax_tree.fUserValue = "jrl [r32+0x04]"; - - std::string symbol_loop = "_loop_while_"; - symbol_loop += std::to_string(time_off.raw); - symbol_loop += " "; - - syntax_tree.fUserValue = "beq "; - syntax_tree.fUserValue += kState.kStackFrame[kState.kStackFrame.size() - 2].fRegister; - syntax_tree.fUserValue += ","; - syntax_tree.fUserValue += kState.kStackFrame[kState.kStackFrame.size() - 1].fRegister; - syntax_tree.fUserValue += ", __end%s\njb __continue%s\n__export .text __end%s\njlr\nvoid __export .text __continue%s\njb _L"; - syntax_tree.fUserValue += std::to_string(kBracesCount + 1) + "_" + std::to_string(time_off.raw); - - while (syntax_tree.fUserValue.find("%s") != std::string::npos) - { - syntax_tree.fUserValue.replace(syntax_tree.fUserValue.find("%s"), strlen("%s"), symbol_loop); - } - - kState.fSyntaxTree->fLeafList.push_back(syntax_tree); - - kOnWhileLoop = true; - - break; - } - - if (_text[text_index] == 'f') - { - if (_text.find("for") == std::string::npos) - continue; - - if (_text.find("for") != text_index) - continue; - - syntax_tree.fUserValue = "jrl [r32+0x1]\n"; - - // actually set registers now. - - auto expr = _text.substr(_text.find("for") + strlen("for")); - - kLatestVar.clear(); - - kState.fSyntaxTree->fLeafList.push_back(syntax_tree); - - kOnForLoop = true; - break; - } - - if (_text[text_index] == '+' && - _text[text_index+1] == '+') - { - _text = _text.replace(_text.find("++"), strlen("++"), ""); - - for (int _text_i = 0; _text_i < _text.size(); ++_text_i) - { - if (_text[_text_i] == '\t' || - _text[_text_i] == ' ') - _text.erase(_text_i, 1); - } - - syntax_tree.fUserValue += "add "; - syntax_tree.fUserValue += _text; - - if (syntax_tree.fUserValue.find(";") != std::string::npos) - syntax_tree.fUserValue.erase(syntax_tree.fUserValue.find(";"), 1); - - kState.fSyntaxTree->fLeafList.push_back(syntax_tree); - } - - if (_text[text_index] == '}') - { - kRegisterCounter = kStartUsable; - - --kBracesCount; - - if (kBracesCount < 1) - { - kInBraces = false; - kBracesCount = 0; - } - - if (kInStruct) - kInStruct = false; - - if (!kInBraces) - { - syntax_tree.fUserValue += kAddIfAnyEnd; - - kAddIfAnyEnd.clear(); - - kState.fSyntaxTree->fLeafList.push_back(syntax_tree); - } - else - { - if (kOnWhileLoop || - kOnForLoop) - { - if (kOnForLoop) - kOnForLoop = false; - - if (kOnWhileLoop) - kOnWhileLoop = false; - - std::string symbol_loop = "_loop_for_"; - symbol_loop += std::to_string(time_off.raw); - symbol_loop += " "; - - syntax_tree.fUserValue = "beq "; - syntax_tree.fUserValue += kState.kStackFrame[kState.kStackFrame.size() - 2].fRegister; - syntax_tree.fUserValue += ","; - syntax_tree.fUserValue += kState.kStackFrame[kState.kStackFrame.size() - 1].fRegister; - syntax_tree.fUserValue += ", __end%s\njb __continue%s\n__export .text __end%s\njlr\nvoid __export .text __continue%s\njb _L"; - syntax_tree.fUserValue += std::to_string(kBracesCount + 1) + "_" + std::to_string(time_off.raw); - - while (syntax_tree.fUserValue.find("%s") != std::string::npos) - { - syntax_tree.fUserValue.replace(syntax_tree.fUserValue.find("%s"), strlen("%s"), symbol_loop); - } - - kState.fSyntaxTree->fLeafList.push_back(syntax_tree); - } - else - { - kState.fSyntaxTree->fLeafList.push_back(syntax_tree); - } - } - } - - syntax_tree.fUserValue.clear(); - } - - syntax_tree.fUserValue = "\n"; - kState.fSyntaxTree->fLeafList.push_back(syntax_tree); - - return true; -} - -static bool kShouldHaveBraces = false; -static std::string kFnName; - -std::string CompilerBackendClang::Check(const char* text, const char* file) -{ - std::string err_str; - std::string ln = text; - - if (ln.empty()) - { - return err_str; - } - - bool non_ascii_found = false; - - for (int i = 0; i < ln.size(); ++i) { - if (isalnum(ln[i])) - { - non_ascii_found = true; - break; - } - } - - if (kShouldHaveBraces && - ln.find('{') != std::string::npos) { - kShouldHaveBraces = false; - } - - if (!non_ascii_found) - return err_str; - - size_t string_index = 1UL; - - if (ln.find('\'') != std::string::npos) - { - string_index = ln.find('\'') + 1; - - for (; string_index < ln.size(); ++string_index) - { - if (ln[string_index] == '\'') - { - if (ln[string_index + 1] != ';') - { - ln.erase(string_index, 1); - } - - return err_str; - } - } - } - else if (ln.find('"') != std::string::npos) - { - string_index = ln.find('"') + 1; - - for (; string_index < ln.size(); ++string_index) - { - if (ln[string_index] == '"') - { - if (ln[string_index + 1] != ';') - { - ln.erase(string_index, 1); - } - else - { - break; - } - } - } - } - else if (ln.find('"') == std::string::npos && - ln.find('\'') == std::string::npos) - { - std::vector forbidden_words; - - forbidden_words.push_back("\\"); - forbidden_words.push_back("?"); - forbidden_words.push_back("@"); - forbidden_words.push_back("~"); - forbidden_words.push_back("::"); - forbidden_words.push_back("/*"); - forbidden_words.push_back("*/"); - - // add them to avoid stupid mistakes. - forbidden_words.push_back("namespace"); - forbidden_words.push_back("class"); - forbidden_words.push_back("extern \"C\""); - - for (auto& forbidden : forbidden_words) - { - if (ParserKit::find_word(ln, forbidden)) - { - err_str += "\nForbidden character detected: "; - err_str += forbidden; - - return err_str; - } - } - } - - struct CompilerVariableRange final - { - std::string fBegin; - std::string fEnd; - }; - - const std::vector variables_list = { - { .fBegin = "static ", .fEnd = "="}, - { .fBegin = "=", .fEnd = ";"}, - { .fBegin = "if(", .fEnd = "="}, - { .fBegin = "if (", .fEnd = "="}, - { .fBegin = "if(", .fEnd = "<"}, - { .fBegin = "if (", .fEnd = "<"}, - { .fBegin = "if(", .fEnd = ">"}, - { .fBegin = "if (", .fEnd = ">"}, - { .fBegin = "if(", .fEnd = ")"}, - { .fBegin = "if (", .fEnd = ")"}, - - { .fBegin = "else(", .fEnd = "="}, - { .fBegin = "else (", .fEnd = "="}, - { .fBegin = "else(", .fEnd = "<"}, - { .fBegin = "else (", .fEnd = "<"}, - { .fBegin = "else(", .fEnd = ">"}, - { .fBegin = "else (", .fEnd = ">"}, - { .fBegin = "else(", .fEnd = ")"}, - { .fBegin = "else (", .fEnd = ")"}, - }; - - for (auto& variable : variables_list) - { - if (ln.find(variable.fBegin) != std::string::npos) - { - string_index = ln.find(variable.fBegin) + variable.fBegin.size(); - - while (ln[string_index] == ' ') - ++string_index; - - std::string keyword; - - for (; string_index < ln.size(); ++string_index) - { - if (ln[string_index] == variable.fEnd[0]) - { - std::string varname = ""; - - for (size_t index_keyword = ln.find(' '); ln[index_keyword] != variable.fBegin[0]; - ++index_keyword) - { - if (ln[index_keyword] == ' ') - { - continue; - } - - if (isdigit(ln[index_keyword])) - { - goto cc_next_loop; - } - - varname += ln[index_keyword]; - } - - if (varname.find(' ') != std::string::npos) - { - varname.erase(0, varname.find(' ')); - - if (variable.fBegin == "extern") - { - varname.erase(0, varname.find(' ')); - } - } - - std::string reg = kAsmRegisterPrefix; - reg += std::to_string(kRegisterCounter); - - kCompilerVariables.push_back({ .fValue = varname }); - goto cc_check_done; - } - - keyword.push_back(ln[string_index]); - } - - goto cc_next_loop; - - cc_check_done: - - // skip digit value. - if (isdigit(keyword[0]) || - keyword[0] == '"') - { - goto cc_next_loop; - } - - while (keyword.find(' ') != std::string::npos) - keyword.erase(keyword.find(' '), 1); - - for (auto& var : kCompilerVariables) - { - if (var.fValue.find(keyword) != std::string::npos) - { - err_str.clear(); - goto cc_next; - } - } - - for (auto& fn : kCompilerFunctions) - { - if (fn.find(keyword[0]) != std::string::npos) - { - auto where_begin = fn.find(keyword[0]); - auto keyword_begin = 0UL; - auto failed = false; - - for (; where_begin < keyword.size(); ++where_begin) - { - if (fn[where_begin] == '(' && - keyword[keyword_begin] == '(') - break; - - if (fn[where_begin] != keyword[keyword_begin]) - { - failed = true; - break; - } - - ++keyword_begin; - } - - if (!failed) - { - err_str.clear(); - goto cc_next; - } - else - { - continue; - } - } - } - -cc_error_value: - if (keyword.find("->") != std::string::npos) - return err_str; - - if (keyword.find(".") != std::string::npos) - return err_str; - - - if (isalnum(keyword[0])) - err_str += "\nUndefined value: " + keyword; - - return err_str; - } - -cc_next_loop: - continue; - } - -cc_next: - - // extern doesnt declare anything, it imports a variable. - // so that's why it's not declare upper. - if (ParserKit::find_word(ln, "extern")) - { - auto substr = ln.substr(ln.find("extern") + strlen("extern")); - kCompilerVariables.push_back({ .fValue = substr }); - } - - if (kShouldHaveBraces && - ln.find('{') == std::string::npos) - { - err_str += "Missing '{' for function "; - err_str += kFnName; - err_str += "\n"; - - kShouldHaveBraces = false; - kFnName.clear(); - } - else if (kShouldHaveBraces && - ln.find('{') != std::string::npos) - { - kShouldHaveBraces = false; - kFnName.clear(); - } - - bool type_not_found = true; - - if (ln.find('\'') != std::string::npos) - { - ln.replace(ln.find('\''), 3, "0"); - } - - auto first = ln.find('"'); - if (first != std::string::npos) - { - auto second = 0UL; - bool found_second_quote = false; - - for (size_t i = first + 1; i < ln.size(); ++i) - { - if (ln[i] == '\"') - { - found_second_quote = true; - second = i; - - break; - } - } - - if (!found_second_quote) - { - err_str += "Missing terminating \"."; - err_str += " here -> " + ln.substr(ln.find('"'), second); - } - } - - if (ln.find(')') != std::string::npos && - ln.find(';') == std::string::npos) - { - if (ln.find('{') == std::string::npos) - { - kFnName = ln; - kShouldHaveBraces = true; - - goto skip_braces_check; - } - else if (ln.find('{') != std::string::npos) - { - kShouldHaveBraces = false; - } - } - -skip_braces_check: - - for (auto& key : kCompilerTypes) - { - if (ParserKit::find_word(ln, key.fName)) - { - if (isdigit(ln[ln.find(key.fName) + key.fName.size() + 1])) - { - err_str += "\nNumber cannot be set for "; - err_str += key.fName; - err_str += "'s name. here -> "; - err_str += ln; - } - - if (ln.find(key.fName) == 0 || - ln[ln.find(key.fName) - 1] == ' ' || - ln[ln.find(key.fName) - 1] == '\t') - { - type_not_found = false; - - if (ln[ln.find(key.fName) + key.fName.size()] != ' ') - { - type_not_found = true; - - if (ln[ln.find(key.fName) + key.fName.size()] == '\t') - type_not_found = false; - - goto next; - } - else if (ln[ln.find(key.fName) + key.fName.size()] != '\t') - { - type_not_found = true; - - if (ln[ln.find(key.fName) + key.fName.size()] == ' ') - type_not_found = false; - - } - } - -next: - - if (key.fName != "struct" || - key.fName != "enum" || - key.fName != "union") - { - if (ln.find(';') == std::string::npos) - { - if (ln.find('(') != std::string::npos) - { - if (ln.find('=') == std::string::npos) - continue; - } - - err_str += "\nMissing ';', here -> "; - err_str += ln; - } - else - { - continue; - } - - if (ln.find('=') != std::string::npos) - { - if (ln.find('(') != std::string::npos) - { - if (ln.find(')') == std::string::npos) - { - err_str += "\nMissing ')', after '(' here -> "; - err_str += ln.substr(ln.find('(')); - } - } - } - } - } - } - - if (kInBraces && - ln.find("struct") != std::string::npos && - ln.find("union") != std::string::npos && - ln.find("enum") != std::string::npos && - ln.find('=') != std::string::npos) - { - if (ln.find(';') == std::string::npos) - { - err_str += "\nMissing ';' after struct/union/enum declaration, here -> "; - err_str += ln; - } - } - - if (ln.find(';') != std::string::npos && - ln.find("for") == std::string::npos) - { - if (ln.find(';') + 1 != ln.size()) - { - for (int i = 0; i < ln.substr(ln.find(';') + 1).size(); ++i) - { - if ((ln.substr(ln.find(';') + 1)[i] != ' ') || - (ln.substr(ln.find(';') + 1)[i] != '\t')) - { - if (auto err = this->Check(ln.substr(ln.find(';') + 1).c_str(), file); - !err.empty()) - { - err_str += "\nUnexpected text after ';' -> "; - err_str += ln.substr(ln.find(';')); - err_str += err; - } - } - } - } - } - - if (ln.find('(') != std::string::npos) - { - if (ln.find(';') == std::string::npos && - !ParserKit::find_word(ln, "|") && - !ParserKit::find_word(ln, "||") && - !ParserKit::find_word(ln, "&") && - !ParserKit::find_word(ln, "&&") && - !ParserKit::find_word(ln, "~")) - { - bool found_func = false; - size_t i = ln.find('('); - std::vector opens; - std::vector closes; - - for (; i < ln.size(); ++i) - { - if (ln[i] == ')') - { - closes.push_back(1); - } - - if (ln[i] == '(') - { - opens.push_back(1); - } - } - - if (closes.size() != opens.size()) - err_str += "Unterminated (), here -> " + ln; - - bool space_found = false; - - for (int i = 0; i < ln.size(); ++i) - { - if (ln[i] == ')' && - !space_found) - { - space_found = true; - continue; - } - - if (space_found) - { - if (ln[i] == ' ' && - isalnum(ln[i+1])) - { - err_str += "\nBad function format here -> "; - err_str += ln; - } - } - } - } - - if (ln.find('(') < 1) - { - err_str += "\nMissing identifier before '(' here -> "; - err_str += ln; - } - else - { - if (type_not_found && - ln.find(';') == std::string::npos && - ln.find("if") == std::string::npos && - ln.find("while") == std::string::npos && - ln.find("for") == std::string::npos && - ln.find("static") == std::string::npos && - ln.find("inline") == std::string::npos && - ln.find("|") == std::string::npos && - ln.find("&") == std::string::npos && - ln.find("(") == std::string::npos && - ln.find(")") == std::string::npos) - { - err_str += "\n Missing ';' or type, here -> "; - err_str += ln; - } - } - - if (ln.find(')') == std::string::npos) - { - err_str += "\nMissing ')', after '(' here -> "; - err_str += ln.substr(ln.find('(')); - } - } - else - { - if (ln.find("for") != std::string::npos || - ln.find("while") != std::string::npos) - { - err_str += "\nMissing '(', after \"for\", here -> "; - err_str += ln; - } - } - - if (ln.find('}') != std::string::npos && - !kInBraces) - { - if (!kInStruct && - ln.find(';') == std::string::npos) - { - err_str += "\nMismatched '}', here -> "; - err_str += ln; - } - } - - if (!ln.empty()) - { - if (ln.find(';') == std::string::npos && - ln.find("struct") == std::string::npos && - ln.find("enum") == std::string::npos && - ln.find("union") == std::string::npos && - ln.find("for") == std::string::npos && - ln.find("while") == std::string::npos && - ln.find('{') == std::string::npos && - ln.find('}') == std::string::npos && - ln.find(')') == std::string::npos && - ln.find('(') == std::string::npos && - ln.find(',') == std::string::npos && - ln.find("typedef") == std::string::npos) - { - if (ln.size() <= 2) - return err_str; - - err_str += "\nMissing ';', here -> "; - err_str += ln; - } - } - - return err_str; -} - -///////////////////////////////////////////////////////////////////////////////////////// - -/** - * @brief C To Assembly mount-point. - */ - -///////////////////////////////////////////////////////////////////////////////////////// - -class AssemblyMountpointClang final : public CxxKit::AssemblyMountpoint -{ -public: - explicit AssemblyMountpointClang() = default; - ~AssemblyMountpointClang() override = default; - - CXXKIT_COPY_DEFAULT(AssemblyMountpointClang); - - [[maybe_unused]] static Int32 Arch() noexcept { return CxxKit::AssemblyFactory::kArchRISCV; } - - Int32 CompileToFormat(CxxKit::StringView& src, Int32 arch) override - { - if (arch != AssemblyMountpointClang::Arch()) - return -1; - - if (kCompilerBackend == nullptr) - return -1; - - /* @brief copy contents wihtout extension */ - std::string src_file = src.CData(); - std::ifstream src_fp = std::ifstream(src_file, std::ios::in); - std::string dest; - - for (auto& ch : src_file) - { - if (ch == '.') - { - break; - } - - dest += ch; - } - - /* According to pef abi. */ - dest += kAsmFileExt64x0; - - kState.fOutputAssembly = std::make_unique(dest); - - auto fmt = CxxKit::current_date(); - - (*kState.fOutputAssembly) << "# Path: " << src_file << "\n"; - (*kState.fOutputAssembly) << "# Language: MP-UX Assembly\n"; - (*kState.fOutputAssembly) << "# Build Date: " << fmt << "\n\n"; - - ParserKit::SyntaxLeafList syntax; - - kState.fSyntaxTreeList.push_back(syntax); - kState.fSyntaxTree = &kState.fSyntaxTreeList[kState.fSyntaxTreeList.size() - 1]; - - std::string line_src; - - while (std::getline(src_fp, line_src)) - { - if (auto err = kCompilerBackend->Check(line_src.c_str(), src.CData()); - err.empty()) - { - kCompilerBackend->Compile(line_src.c_str(), src.CData()); - } - else - { - detail::print_error(err, src.CData()); - } - } - - if (kAcceptableErrors > 0) - return -1; - - std::vector keywords = { "ldw", "stw", "lda", "sta", "add", "dec", "mv"}; - - for (auto& leaf : kState.fSyntaxTree->fLeafList) - { - for (auto& keyword : keywords) - { - if (ParserKit::find_word(leaf.fUserValue, keyword)) - { - std::size_t cnt = 0UL; - - for (auto & reg : kState.kStackFrame) - { - std::string needle; - - for (size_t i = 0; i < reg.fName.size(); i++) - { - if (reg.fName[i] == ' ') - { - ++i; - - for (; i < reg.fName.size(); i++) - { - if (reg.fName[i] == ',') - { - break; - } - - if (reg.fName[i] == ' ') - continue; - - needle += reg.fName[i]; - } - - break; - } - } - - if (ParserKit::find_word(leaf.fUserValue, needle)) - { - leaf.fUserValue.replace(leaf.fUserValue.find(needle), - needle.size(), reg.fRegister); - - if (leaf.fUserValue.find("__import") != std::string::npos) - { - if (leaf.fUserValue.find("__import") < leaf.fUserValue.find(needle)) - { - leaf.fUserValue.erase(leaf.fUserValue.find("__import"), strlen("__import")); - } - } - - ++cnt; - } - } - - if (cnt > 1 && keyword != "mv" && keyword != "add" && keyword != "dec") - { - leaf.fUserValue.replace(leaf.fUserValue.find(keyword), keyword.size(), "mv"); - } - } - } - } - - for (auto& leaf : kState.fSyntaxTree->fLeafList) - { - (*kState.fOutputAssembly) << leaf.fUserValue; - } - - kState.fSyntaxTree = nullptr; - - kState.fOutputAssembly->flush(); - kState.fOutputAssembly.reset(); - - return kOk; - } - -}; - -///////////////////////////////////////////////////////////////////////////////////////// - -#define kPrintF printf -#define kSplashCxx() kPrintF(kWhite "%s\n", "cc, v1.13, (c) Western Company") - -static void cc_print_help() -{ - kSplashCxx(); - kPrintF(kWhite "--asm={MACHINE}: %s\n", "Compile to a specific assembler syntax. (masm)"); - kPrintF(kWhite "--compiler={COMPILER}: %s\n", "Select compiler engine (builtin -> dolvik)."); -} - -///////////////////////////////////////////////////////////////////////////////////////// - -#define kExt ".c" - -int main(int argc, char** argv) -{ - kCompilerTypes.push_back({ .fName = "void", .fValue = "void" }); - kCompilerTypes.push_back({ .fName = "char", .fValue = "byte" }); - kCompilerTypes.push_back({ .fName = "short", .fValue = "hword" }); - kCompilerTypes.push_back({ .fName = "int", .fValue = "dword" }); - kCompilerTypes.push_back({ .fName = "long", .fValue = "qword" }); - - bool skip = false; - - for (auto index = 1UL; index < argc; ++index) - { - if (skip) - { - skip = false; - continue; - } - - if (argv[index][0] == '-') - { - if (strcmp(argv[index], "-v") == 0 || - strcmp(argv[index], "--version") == 0) - { - kSplashCxx(); - return kOk; - } - - if (strcmp(argv[index], "-verbose") == 0) - { - kState.kVerbose = true; - - continue; - } - - if (strcmp(argv[index], "-h") == 0 || - strcmp(argv[index], "--help") == 0) - { - cc_print_help(); - - return kOk; - } - - if (strcmp(argv[index], "--dialect") == 0) - { - if (kCompilerBackend) - std::cout << kCompilerBackend->Language() << "\n"; - - return kOk; - } - - if (strcmp(argv[index], "--asm=masm") == 0) - { - delete kFactory.Unmount(); - - kFactory.Mount(new AssemblyMountpointClang()); - kMachine = CxxKit::AssemblyFactory::kArchRISCV; - - continue; - } - - if (strcmp(argv[index], "--compiler=dolvik") == 0) - { - if (!kCompilerBackend) - kCompilerBackend = new CompilerBackendClang(); - - continue; - } - - if (strcmp(argv[index], "-fmax-exceptions") == 0) - { - try - { - kErrorLimit = std::strtol(argv[index + 1], nullptr, 10); - } - // catch anything here - catch (...) - { - kErrorLimit = 0; - } - - skip = true; - - continue; - } - - std::string err = "Unknown command: "; - err += argv[index]; - - detail::print_error(err, "cc"); - - continue; - } - - kFileList.emplace_back(argv[index]); - - CxxKit::StringView srcFile = CxxKit::StringBuilder::Construct(argv[index]); - - if (strstr(argv[index], kExt) == nullptr) - { - if (kState.kVerbose) - { - std::cerr << argv[index] << " is not a valid C line_src.\n"; - } - - return -1; - } - - if (kFactory.Compile(srcFile, kMachine) != kOk) - return -1; - } - - return kOk; -} diff --git a/CompilerDriver/ccplus.cc b/CompilerDriver/ccplus.cc new file mode 100644 index 0000000..f9f94e4 --- /dev/null +++ b/CompilerDriver/ccplus.cc @@ -0,0 +1,640 @@ +/* + * ======================================================== + * + * ccplus + * Copyright Western Company, all rights reserved. + * + * ======================================================== + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define kOk 0 + +/* Western Company C driver */ +/* This is part of MP-UX C SDK. */ +/* (c) Western Company */ + +///////////////////// + +// ANSI ESCAPE CODES + +///////////////////// + +#define kBlank "\e[0;30m" +#define kRed "\e[0;31m" +#define kWhite "\e[0;97m" + +///////////////////////////////////// + +// INTERNAL STUFF OF THE C COMPILER + +///////////////////////////////////// + +namespace detail +{ + struct CompilerRegisterMap + { + std::string fName; + std::string fRegister; + }; + + struct CompilerState + { + std::vector fSyntaxTreeList; + std::vector kStackFrame; + ParserKit::SyntaxLeafList* fSyntaxTree{ nullptr }; + std::unique_ptr fOutputAssembly; + std::string fLastFile; + std::string fLastError; + bool kVerbose; + }; +} + +static detail::CompilerState kState; +static SizeType kErrorLimit = 100; + +static Int32 kAcceptableErrors = 0; + +namespace detail +{ + void print_error(std::string reason, std::string file) noexcept + { + if (reason[0] == '\n') + reason.erase(0, 1); + + if (file.find(".pp") != std::string::npos) + { + file.erase(file.find(".pp"), 3); + } + + if (kState.fLastFile != file) + { + std::cout << kRed << "[ ccplus ] " << kWhite << ((file == "ccplus") ? "internal compiler error " : ("in file, " + file)) << kBlank << std::endl; + std::cout << kRed << "[ ccplus ] " << kWhite << reason << kBlank << std::endl; + + kState.fLastFile = file; + } + else + { + std::cout << kRed << "[ ccplus ] [ " << kState.fLastFile << " ] " << kWhite << reason << kBlank << std::endl; + } + + if (kAcceptableErrors > kErrorLimit) + std::exit(3); + + ++kAcceptableErrors; + } + + struct CompilerType + { + std::string fName; + std::string fValue; + }; +} + +///////////////////////////////////////////////////////////////////////////////////////// + +// Target architecture. +static int kMachine = 0; + +///////////////////////////////////////// + +// REGISTERS ACCORDING TO USED ASSEMBLER + +///////////////////////////////////////// + +static size_t kRegisterCnt = kAsmRegisterLimit; +static size_t kStartUsable = 1; +static size_t kUsableLimit = 14; +static size_t kRegisterCounter = kStartUsable; +static std::string kRegisterPrefix = kAsmRegisterPrefix; +static std::vector kKeywords; + +///////////////////////////////////////// + +// COMPILER PARSING UTILITIES/STATES. + +///////////////////////////////////////// + +static std::vector kFileList; +static CxxKit::AssemblyFactory kFactory; +static bool kInStruct = false; +static bool kOnWhileLoop = false; +static bool kOnForLoop = false; +static bool kInBraces = false; +static size_t kBracesCount = 0UL; + +/* @brief C compiler backend for Western Company C */ +class CompilerBackendClang final : public ParserKit::CompilerBackend +{ +public: + explicit CompilerBackendClang() = default; + ~CompilerBackendClang() override = default; + + CXXKIT_COPY_DEFAULT(CompilerBackendClang); + + bool Compile(const std::string& text, const char* file) override; + + const char* Language() override { return "Optimized 64x0 C++"; } + +}; + +static CompilerBackendClang* kCompilerBackend = nullptr; +static std::vector kCompilerVariables; +static std::vector kCompilerFunctions; + +namespace detail +{ + union number_cast + { + number_cast(UInt64 raw) + : raw(raw) + {} + + char number[8]; + UInt64 raw; + + }; + + struct ast_interface + { + explicit ast_interface(std::string& value) + : mValue(value) + { + this->_Compile(); + } + + ~ast_interface() = default; + + CXXKIT_COPY_DEFAULT(ast_interface); + + private: + std::string mProcessed; + std::string mValue; + + void _Compile() noexcept + { + if (mValue.empty()) + { + return; + } + + + } + + }; +} + +///////////////////////////////////////////////////////////////////////////////////////// + +// @name Compile +// @brief Generate MASM from a C source. + +///////////////////////////////////////////////////////////////////////////////////////// + +bool CompilerBackendClang::Compile(const std::string& text, const char* file) +{ + if (text.empty()) + return false; + + // if (expr) + // int name = expr; + // expr; + + std::size_t index = 0UL; + + auto syntax_tree = ParserKit::SyntaxLeafList::SyntaxLeaf(); + + syntax_tree.fUserData = text; + kState.fSyntaxTree->fLeafList.push_back(syntax_tree); + + std::string text_cpy = text; + + std::vector> keywords_list; + + for (auto& keyword : kKeywords) + { + while (text_cpy.find(keyword) != std::string::npos) + { + keywords_list.push_back(std::make_pair(keyword, index)); + ++index; + + text_cpy.erase(text_cpy.find(keyword), keyword.size()); + } + } + + // TODO: sort keywords + + for (auto& keyword : keywords_list) + { + syntax_tree.fUserData = keyword.first; + kState.fSyntaxTree->fLeafList.push_back(syntax_tree); + + std::cout << keyword.first << "\n"; + } + + return true; +} + +///////////////////////////////////////////////////////////////////////////////////////// + +/** + * @brief C To Assembly mount-point. + */ + +///////////////////////////////////////////////////////////////////////////////////////// + +class AssemblyMountpointClang final : public CxxKit::AssemblyMountpoint +{ +public: + explicit AssemblyMountpointClang() = default; + ~AssemblyMountpointClang() override = default; + + CXXKIT_COPY_DEFAULT(AssemblyMountpointClang); + + [[maybe_unused]] static Int32 Arch() noexcept { return CxxKit::AssemblyFactory::kArchRISCV; } + + Int32 CompileToFormat(CxxKit::StringView& src, Int32 arch) override + { + if (arch != AssemblyMountpointClang::Arch()) + return -1; + + if (kCompilerBackend == nullptr) + return -1; + + /* @brief copy contents wihtout extension */ + std::string src_file = src.CData(); + std::ifstream src_fp = std::ifstream(src_file, std::ios::in); + std::string dest; + + for (auto& ch : src_file) + { + if (ch == '.') + { + break; + } + + dest += ch; + } + + /* According to pef abi. */ + dest += kAsmFileExt64x0; + + kState.fOutputAssembly = std::make_unique(dest); + + auto fmt = CxxKit::current_date(); + + (*kState.fOutputAssembly) << "# Path: " << src_file << "\n"; + (*kState.fOutputAssembly) << "# Language: MP-UX Assembly\n"; + (*kState.fOutputAssembly) << "# Build Date: " << fmt << "\n\n"; + + ParserKit::SyntaxLeafList syntax; + + kState.fSyntaxTreeList.push_back(syntax); + kState.fSyntaxTree = &kState.fSyntaxTreeList[kState.fSyntaxTreeList.size() - 1]; + + std::string source; + + while (std::getline(src_fp, source)) + { + kCompilerBackend->Compile(source.c_str(), src.CData()); + } + + if (kAcceptableErrors > 0) + return -1; + + std::vector lines; + + struct scope_type + { + std::vector vals; + int reg_cnt; + int id; + + bool operator==(const scope_type& typ) { return typ.id == id; } + }; + + std::vector scope; + bool found_type = false; + bool is_pointer = false; + bool found_expr = false; + bool found_func = false; + + for (auto& leaf : kState.fSyntaxTree->fLeafList) + { + if (leaf.fUserData == "{") + { + scope.push_back({}); + } + + if (leaf.fUserData == "{") + { + scope.pop_back(); + } + + if (leaf.fUserData == "int" || + leaf.fUserData == "long" || + leaf.fUserData == "unsigned" || + leaf.fUserData == "short" || + leaf.fUserData == "char" || + leaf.fUserData == "struct" || + leaf.fUserData == "class") + { + found_type = true; + } + + if (leaf.fUserData == "(") + { + if (found_type) + { + found_expr = true; + found_type = false; + is_pointer = false; + } + } + + if (leaf.fUserData == ")") + { + if (found_expr) + { + found_expr = false; + is_pointer = false; + } + } + + if (leaf.fUserData == ",") + { + if (is_pointer) + { + is_pointer = false; + } + } + + if (leaf.fUserData == "*") + { + if (found_type && !found_expr) + is_pointer = true; + } + + if (leaf.fUserData == "=") + { + auto& front = scope.front(); + + if (found_type) + { + std::string reg = "r"; + reg += std::to_string(front.reg_cnt); + ++front.reg_cnt; + + leaf.fUserValue = !is_pointer ? "ldw %s, %s1\n" : "lda %s, %s1\n"; + + for (auto& ln : lines) + { + if (ln.find(leaf.fUserData) != std::string::npos && + ln.find(";") != std::string::npos) + { + auto val = ln.substr(ln.find(leaf.fUserData) + leaf.fUserData.size()); + val.erase(val.find(";"), 1); + + leaf.fUserValue.replace(leaf.fUserValue.find("%s1"), strlen("%s1"), val); + } + } + + leaf.fUserValue.replace(leaf.fUserValue.find("%s"), strlen("%s"), reg); + } + + is_pointer = false; + found_type = false; + } + + if (leaf.fUserData == "return") + { + leaf.fUserValue = "ldw r19, %s\njlr"; + + if (!lines.empty()) + { + for (auto& ln : lines) + { + if (ln.find(leaf.fUserData) != std::string::npos && + ln.find(";") != std::string::npos) + { + auto val = ln.substr(ln.find(leaf.fUserData) + leaf.fUserData.size()); + val.erase(val.find(";"), 1); + + leaf.fUserValue.replace(leaf.fUserValue.find("%s"), strlen("%s"), val); + } + } + } + else + { + leaf.fUserValue.replace(leaf.fUserValue.find("%s"), strlen("%s"), "0"); + } + + continue; + } + + lines.push_back(leaf.fUserData); + } + + for (auto& leaf : kState.fSyntaxTree->fLeafList) + { + (*kState.fOutputAssembly) << leaf.fUserValue; + } + + kState.fSyntaxTree = nullptr; + + kState.fOutputAssembly->flush(); + kState.fOutputAssembly.reset(); + + return kOk; + } + +}; + +///////////////////////////////////////////////////////////////////////////////////////// + +#define kPrintF printf +#define kSplashCxx() kPrintF(kWhite "%s\n", "ccplus, v1.14, (c) Western Company.") + +static void cxx_print_help() +{ + kSplashCxx(); + kPrintF(kWhite "--asm={MACHINE}: %s\n", "Compile with a specific syntax. (64x0, 32x0)"); + kPrintF(kWhite "--compiler={COMPILER}: %s\n", "Select compiler engine (builtin -> vanhalen++)."); +} + +///////////////////////////////////////////////////////////////////////////////////////// + +#define kExt ".cc" + +int main(int argc, char** argv) +{ + kKeywords.push_back("auto"); + kKeywords.push_back("else"); + kKeywords.push_back("break"); + kKeywords.push_back("switch"); + kKeywords.push_back("enum"); + kKeywords.push_back("register"); + kKeywords.push_back("do"); + kKeywords.push_back("return"); + kKeywords.push_back("if"); + kKeywords.push_back("default"); + kKeywords.push_back("struct"); + kKeywords.push_back("_Packed"); + kKeywords.push_back("extern"); + kKeywords.push_back("volatile"); + kKeywords.push_back("static"); + kKeywords.push_back("for"); + kKeywords.push_back("class"); + kKeywords.push_back("{"); + kKeywords.push_back("}"); + kKeywords.push_back("("); + kKeywords.push_back(")"); + kKeywords.push_back("char"); + kKeywords.push_back("int"); + kKeywords.push_back("short"); + kKeywords.push_back("long"); + kKeywords.push_back("float"); + kKeywords.push_back("double"); + kKeywords.push_back("unsigned"); + kKeywords.push_back("__export__"); + kKeywords.push_back("__packed__"); + kKeywords.push_back("namespace"); + kKeywords.push_back("while"); + kKeywords.push_back("sizeof"); + kKeywords.push_back("private"); + kKeywords.push_back("->"); + kKeywords.push_back("."); + kKeywords.push_back("::"); + kKeywords.push_back("*"); + kKeywords.push_back("+"); + kKeywords.push_back("-"); + kKeywords.push_back("/"); + kKeywords.push_back("="); + kKeywords.push_back("=="); + kKeywords.push_back("!="); + kKeywords.push_back(">="); + kKeywords.push_back("<="); + kKeywords.push_back(">"); + kKeywords.push_back("<"); + kKeywords.push_back(":"); + kKeywords.push_back(","); + kKeywords.push_back(";"); + kKeywords.push_back("public"); + kKeywords.push_back("protected"); + + bool skip = false; + + for (auto index = 1UL; index < argc; ++index) + { + if (skip) + { + skip = false; + continue; + } + + if (argv[index][0] == '-') + { + if (strcmp(argv[index], "-v") == 0 || + strcmp(argv[index], "--version") == 0) + { + kSplashCxx(); + return kOk; + } + + if (strcmp(argv[index], "-verbose") == 0) + { + kState.kVerbose = true; + + continue; + } + + if (strcmp(argv[index], "-h") == 0 || + strcmp(argv[index], "--help") == 0) + { + cxx_print_help(); + + return kOk; + } + + if (strcmp(argv[index], "--dialect") == 0) + { + if (kCompilerBackend) + std::cout << kCompilerBackend->Language() << "\n"; + + return kOk; + } + + if (strcmp(argv[index], "--asm=masm") == 0) + { + delete kFactory.Unmount(); + + kFactory.Mount(new AssemblyMountpointClang()); + kMachine = CxxKit::AssemblyFactory::kArchRISCV; + + continue; + } + + if (strcmp(argv[index], "--compiler=vanhalen") == 0) + { + if (!kCompilerBackend) + kCompilerBackend = new CompilerBackendClang(); + + continue; + } + + if (strcmp(argv[index], "-fmax-exceptions") == 0) + { + try + { + kErrorLimit = std::strtol(argv[index + 1], nullptr, 10); + } + // catch anything here + catch (...) + { + kErrorLimit = 0; + } + + skip = true; + + continue; + } + + std::string err = "Unknown command: "; + err += argv[index]; + + detail::print_error(err, "ccplus"); + + continue; + } + + kFileList.emplace_back(argv[index]); + + CxxKit::StringView srcFile = CxxKit::StringBuilder::Construct(argv[index]); + + if (strstr(argv[index], kExt) == nullptr) + { + if (kState.kVerbose) + { + std::cerr << argv[index] << " is not a valid C source.\n"; + } + + return -1; + } + + if (kFactory.Compile(srcFile, kMachine) != kOk) + return -1; + } + + return kOk; +} diff --git a/CompilerDriver/ccplus.cxx b/CompilerDriver/ccplus.cxx deleted file mode 100644 index d9ef3fe..0000000 --- a/CompilerDriver/ccplus.cxx +++ /dev/null @@ -1,640 +0,0 @@ -/* - * ======================================================== - * - * ccplus - * Copyright Western Company, all rights reserved. - * - * ======================================================== - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define kOk 0 - -/* Western Company C driver */ -/* This is part of MP-UX C SDK. */ -/* (c) Western Company */ - -///////////////////// - -// ANSI ESCAPE CODES - -///////////////////// - -#define kBlank "\e[0;30m" -#define kRed "\e[0;31m" -#define kWhite "\e[0;97m" - -///////////////////////////////////// - -// INTERNAL STUFF OF THE C COMPILER - -///////////////////////////////////// - -namespace detail -{ - struct CompilerRegisterMap - { - std::string fName; - std::string fRegister; - }; - - struct CompilerState - { - std::vector fSyntaxTreeList; - std::vector kStackFrame; - ParserKit::SyntaxLeafList* fSyntaxTree{ nullptr }; - std::unique_ptr fOutputAssembly; - std::string fLastFile; - std::string fLastError; - bool kVerbose; - }; -} - -static detail::CompilerState kState; -static SizeType kErrorLimit = 100; - -static Int32 kAcceptableErrors = 0; - -namespace detail -{ - void print_error(std::string reason, std::string file) noexcept - { - if (reason[0] == '\n') - reason.erase(0, 1); - - if (file.find(".pp") != std::string::npos) - { - file.erase(file.find(".pp"), 3); - } - - if (kState.fLastFile != file) - { - std::cout << kRed << "[ ccplus ] " << kWhite << ((file == "ccplus") ? "internal compiler error " : ("in file, " + file)) << kBlank << std::endl; - std::cout << kRed << "[ ccplus ] " << kWhite << reason << kBlank << std::endl; - - kState.fLastFile = file; - } - else - { - std::cout << kRed << "[ ccplus ] [ " << kState.fLastFile << " ] " << kWhite << reason << kBlank << std::endl; - } - - if (kAcceptableErrors > kErrorLimit) - std::exit(3); - - ++kAcceptableErrors; - } - - struct CompilerType - { - std::string fName; - std::string fValue; - }; -} - -///////////////////////////////////////////////////////////////////////////////////////// - -// Target architecture. -static int kMachine = 0; - -///////////////////////////////////////// - -// REGISTERS ACCORDING TO USED ASSEMBLER - -///////////////////////////////////////// - -static size_t kRegisterCnt = kAsmRegisterLimit; -static size_t kStartUsable = 1; -static size_t kUsableLimit = 14; -static size_t kRegisterCounter = kStartUsable; -static std::string kRegisterPrefix = kAsmRegisterPrefix; -static std::vector kKeywords; - -///////////////////////////////////////// - -// COMPILER PARSING UTILITIES/STATES. - -///////////////////////////////////////// - -static std::vector kFileList; -static CxxKit::AssemblyFactory kFactory; -static bool kInStruct = false; -static bool kOnWhileLoop = false; -static bool kOnForLoop = false; -static bool kInBraces = false; -static size_t kBracesCount = 0UL; - -/* @brief C compiler backend for Western Company C */ -class CompilerBackendClang final : public ParserKit::CompilerBackend -{ -public: - explicit CompilerBackendClang() = default; - ~CompilerBackendClang() override = default; - - CXXKIT_COPY_DEFAULT(CompilerBackendClang); - - bool Compile(const std::string& text, const char* file) override; - - const char* Language() override { return "Optimized 64x0 C++"; } - -}; - -static CompilerBackendClang* kCompilerBackend = nullptr; -static std::vector kCompilerVariables; -static std::vector kCompilerFunctions; - -namespace detail -{ - union number_cast - { - number_cast(UInt64 raw) - : raw(raw) - {} - - char number[8]; - UInt64 raw; - - }; - - struct ast_interface - { - explicit ast_interface(std::string& value) - : mValue(value) - { - this->_Compile(); - } - - ~ast_interface() = default; - - CXXKIT_COPY_DEFAULT(ast_interface); - - private: - std::string mProcessed; - std::string mValue; - - void _Compile() noexcept - { - if (mValue.empty()) - { - return; - } - - - } - - }; -} - -///////////////////////////////////////////////////////////////////////////////////////// - -// @name Compile -// @brief Generate MASM from a C source. - -///////////////////////////////////////////////////////////////////////////////////////// - -bool CompilerBackendClang::Compile(const std::string& text, const char* file) -{ - if (text.empty()) - return false; - - // if (expr) - // int name = expr; - // expr; - - std::size_t index = 0UL; - - auto syntax_tree = ParserKit::SyntaxLeafList::SyntaxLeaf(); - - syntax_tree.fUserData = text; - kState.fSyntaxTree->fLeafList.push_back(syntax_tree); - - std::string text_cpy = text; - - std::vector> keywords_list; - - for (auto& keyword : kKeywords) - { - while (text_cpy.find(keyword) != std::string::npos) - { - keywords_list.push_back(std::make_pair(keyword, index)); - ++index; - - text_cpy.erase(text_cpy.find(keyword), keyword.size()); - } - } - - // TODO: sort keywords - - for (auto& keyword : keywords_list) - { - syntax_tree.fUserData = keyword.first; - kState.fSyntaxTree->fLeafList.push_back(syntax_tree); - - std::cout << keyword.first << "\n"; - } - - return true; -} - -///////////////////////////////////////////////////////////////////////////////////////// - -/** - * @brief C To Assembly mount-point. - */ - -///////////////////////////////////////////////////////////////////////////////////////// - -class AssemblyMountpointClang final : public CxxKit::AssemblyMountpoint -{ -public: - explicit AssemblyMountpointClang() = default; - ~AssemblyMountpointClang() override = default; - - CXXKIT_COPY_DEFAULT(AssemblyMountpointClang); - - [[maybe_unused]] static Int32 Arch() noexcept { return CxxKit::AssemblyFactory::kArchRISCV; } - - Int32 CompileToFormat(CxxKit::StringView& src, Int32 arch) override - { - if (arch != AssemblyMountpointClang::Arch()) - return -1; - - if (kCompilerBackend == nullptr) - return -1; - - /* @brief copy contents wihtout extension */ - std::string src_file = src.CData(); - std::ifstream src_fp = std::ifstream(src_file, std::ios::in); - std::string dest; - - for (auto& ch : src_file) - { - if (ch == '.') - { - break; - } - - dest += ch; - } - - /* According to pef abi. */ - dest += kAsmFileExt64x0; - - kState.fOutputAssembly = std::make_unique(dest); - - auto fmt = CxxKit::current_date(); - - (*kState.fOutputAssembly) << "# Path: " << src_file << "\n"; - (*kState.fOutputAssembly) << "# Language: MP-UX Assembly\n"; - (*kState.fOutputAssembly) << "# Build Date: " << fmt << "\n\n"; - - ParserKit::SyntaxLeafList syntax; - - kState.fSyntaxTreeList.push_back(syntax); - kState.fSyntaxTree = &kState.fSyntaxTreeList[kState.fSyntaxTreeList.size() - 1]; - - std::string source; - - while (std::getline(src_fp, source)) - { - kCompilerBackend->Compile(source.c_str(), src.CData()); - } - - if (kAcceptableErrors > 0) - return -1; - - std::vector lines; - - struct scope_type - { - std::vector vals; - int reg_cnt; - int id; - - bool operator==(const scope_type& typ) { return typ.id == id; } - }; - - std::vector scope; - bool found_type = false; - bool is_pointer = false; - bool found_expr = false; - bool found_func = false; - - for (auto& leaf : kState.fSyntaxTree->fLeafList) - { - if (leaf.fUserData == "{") - { - scope.push_back({}); - } - - if (leaf.fUserData == "{") - { - scope.pop_back(); - } - - if (leaf.fUserData == "int" || - leaf.fUserData == "long" || - leaf.fUserData == "unsigned" || - leaf.fUserData == "short" || - leaf.fUserData == "char" || - leaf.fUserData == "struct" || - leaf.fUserData == "class") - { - found_type = true; - } - - if (leaf.fUserData == "(") - { - if (found_type) - { - found_expr = true; - found_type = false; - is_pointer = false; - } - } - - if (leaf.fUserData == ")") - { - if (found_expr) - { - found_expr = false; - is_pointer = false; - } - } - - if (leaf.fUserData == ",") - { - if (is_pointer) - { - is_pointer = false; - } - } - - if (leaf.fUserData == "*") - { - if (found_type && !found_expr) - is_pointer = true; - } - - if (leaf.fUserData == "=") - { - auto& front = scope.front(); - - if (found_type) - { - std::string reg = "r"; - reg += std::to_string(front.reg_cnt); - ++front.reg_cnt; - - leaf.fUserValue = !is_pointer ? "ldw %s, %s1\n" : "lda %s, %s1\n"; - - for (auto& ln : lines) - { - if (ln.find(leaf.fUserData) != std::string::npos && - ln.find(";") != std::string::npos) - { - auto val = ln.substr(ln.find(leaf.fUserData) + leaf.fUserData.size()); - val.erase(val.find(";"), 1); - - leaf.fUserValue.replace(leaf.fUserValue.find("%s1"), strlen("%s1"), val); - } - } - - leaf.fUserValue.replace(leaf.fUserValue.find("%s"), strlen("%s"), reg); - } - - is_pointer = false; - found_type = false; - } - - if (leaf.fUserData == "return") - { - leaf.fUserValue = "ldw r19, %s\njlr"; - - if (!lines.empty()) - { - for (auto& ln : lines) - { - if (ln.find(leaf.fUserData) != std::string::npos && - ln.find(";") != std::string::npos) - { - auto val = ln.substr(ln.find(leaf.fUserData) + leaf.fUserData.size()); - val.erase(val.find(";"), 1); - - leaf.fUserValue.replace(leaf.fUserValue.find("%s"), strlen("%s"), val); - } - } - } - else - { - leaf.fUserValue.replace(leaf.fUserValue.find("%s"), strlen("%s"), "0"); - } - - continue; - } - - lines.push_back(leaf.fUserData); - } - - for (auto& leaf : kState.fSyntaxTree->fLeafList) - { - (*kState.fOutputAssembly) << leaf.fUserValue; - } - - kState.fSyntaxTree = nullptr; - - kState.fOutputAssembly->flush(); - kState.fOutputAssembly.reset(); - - return kOk; - } - -}; - -///////////////////////////////////////////////////////////////////////////////////////// - -#define kPrintF printf -#define kSplashCxx() kPrintF(kWhite "%s\n", "ccplus, v1.13, (c) Western Company.") - -static void cxx_print_help() -{ - kSplashCxx(); - kPrintF(kWhite "--asm={MACHINE}: %s\n", "Compile to a specific assembler syntax. (masm)"); - kPrintF(kWhite "--compiler={COMPILER}: %s\n", "Select compiler engine (builtin -> vanhalen++)."); -} - -///////////////////////////////////////////////////////////////////////////////////////// - -#define kExt ".c" - -int main(int argc, char** argv) -{ - kKeywords.push_back("auto"); - kKeywords.push_back("else"); - kKeywords.push_back("break"); - kKeywords.push_back("switch"); - kKeywords.push_back("enum"); - kKeywords.push_back("register"); - kKeywords.push_back("do"); - kKeywords.push_back("return"); - kKeywords.push_back("if"); - kKeywords.push_back("default"); - kKeywords.push_back("struct"); - kKeywords.push_back("_Packed"); - kKeywords.push_back("extern"); - kKeywords.push_back("volatile"); - kKeywords.push_back("static"); - kKeywords.push_back("for"); - kKeywords.push_back("class"); - kKeywords.push_back("{"); - kKeywords.push_back("}"); - kKeywords.push_back("("); - kKeywords.push_back(")"); - kKeywords.push_back("char"); - kKeywords.push_back("int"); - kKeywords.push_back("short"); - kKeywords.push_back("long"); - kKeywords.push_back("float"); - kKeywords.push_back("double"); - kKeywords.push_back("unsigned"); - kKeywords.push_back("__export__"); - kKeywords.push_back("__packed__"); - kKeywords.push_back("namespace"); - kKeywords.push_back("while"); - kKeywords.push_back("sizeof"); - kKeywords.push_back("private"); - kKeywords.push_back("->"); - kKeywords.push_back("."); - kKeywords.push_back("::"); - kKeywords.push_back("*"); - kKeywords.push_back("+"); - kKeywords.push_back("-"); - kKeywords.push_back("/"); - kKeywords.push_back("="); - kKeywords.push_back("=="); - kKeywords.push_back("!="); - kKeywords.push_back(">="); - kKeywords.push_back("<="); - kKeywords.push_back(">"); - kKeywords.push_back("<"); - kKeywords.push_back(":"); - kKeywords.push_back(","); - kKeywords.push_back(";"); - kKeywords.push_back("public"); - kKeywords.push_back("protected"); - - bool skip = false; - - for (auto index = 1UL; index < argc; ++index) - { - if (skip) - { - skip = false; - continue; - } - - if (argv[index][0] == '-') - { - if (strcmp(argv[index], "-v") == 0 || - strcmp(argv[index], "--version") == 0) - { - kSplashCxx(); - return kOk; - } - - if (strcmp(argv[index], "-verbose") == 0) - { - kState.kVerbose = true; - - continue; - } - - if (strcmp(argv[index], "-h") == 0 || - strcmp(argv[index], "--help") == 0) - { - cxx_print_help(); - - return kOk; - } - - if (strcmp(argv[index], "--dialect") == 0) - { - if (kCompilerBackend) - std::cout << kCompilerBackend->Language() << "\n"; - - return kOk; - } - - if (strcmp(argv[index], "--asm=masm") == 0) - { - delete kFactory.Unmount(); - - kFactory.Mount(new AssemblyMountpointClang()); - kMachine = CxxKit::AssemblyFactory::kArchRISCV; - - continue; - } - - if (strcmp(argv[index], "--compiler=vanhalen") == 0) - { - if (!kCompilerBackend) - kCompilerBackend = new CompilerBackendClang(); - - continue; - } - - if (strcmp(argv[index], "-fmax-exceptions") == 0) - { - try - { - kErrorLimit = std::strtol(argv[index + 1], nullptr, 10); - } - // catch anything here - catch (...) - { - kErrorLimit = 0; - } - - skip = true; - - continue; - } - - std::string err = "Unknown command: "; - err += argv[index]; - - detail::print_error(err, "ccplus"); - - continue; - } - - kFileList.emplace_back(argv[index]); - - CxxKit::StringView srcFile = CxxKit::StringBuilder::Construct(argv[index]); - - if (strstr(argv[index], kExt) == nullptr) - { - if (kState.kVerbose) - { - std::cerr << argv[index] << " is not a valid C source.\n"; - } - - return -1; - } - - if (kFactory.Compile(srcFile, kMachine) != kOk) - return -1; - } - - return kOk; -} diff --git a/CompilerDriver/cpp.cc b/CompilerDriver/cpp.cc new file mode 100644 index 0000000..6986e27 --- /dev/null +++ b/CompilerDriver/cpp.cc @@ -0,0 +1,1024 @@ +/* + * ======================================================== + * + * cpp + * Copyright Western Company, all rights reserved. + * + * ======================================================== + */ + +#include +#include +#include +#include +#include + +typedef Int32(*cpp_parser_fn_t)(std::string& line, std::ifstream& hdr_file, std::ofstream& pp_out); + +///////////////////////////////////////////////////////////////////////////////////////// + +// @brief Preprocessor internal types. + +///////////////////////////////////////////////////////////////////////////////////////// + +namespace details +{ + enum + { + kEqual, + kGreaterEqThan, + kLesserEqThan, + kGreaterThan, + kLesserThan, + kNotEqual, + }; + + struct cpp_macro_condition final + { + int32_t fType; + std::string fTypeName; + }; + + struct cpp_macro final + { + std::vector fArgs; + std::string fName; + std::string fValue; + }; + + class cpp_pragma final + { + public: + explicit cpp_pragma() = default; + ~cpp_pragma() = default; + + CXXKIT_COPY_DEFAULT(cpp_pragma); + + std::string fMacroName{ "" }; + Int32(*fParse)(std::string& line, std::ifstream& hdr_file, std::ofstream& pp_out); + + }; +} + +static std::vector kFiles; +static std::vector kMacros; +static std::vector kIncludes; + +static std::string kWoringDir; + +static std::vector kKeywords = { + "include", + "if", + "pragma", + "define", + "elif", + "ifdef", + "ifndef", + "else", + "warning", + "error" +}; + +#define kKeywordCxxCnt kKeywords.size() + +///////////////////////////////////////////////////////////////////////////////////////// + +// @name cpp_parse_if_condition +// @brief parse #if condition + +///////////////////////////////////////////////////////////////////////////////////////// + +int32_t cpp_parse_if_condition(details::cpp_macro_condition& cond, + details::cpp_macro& macro, + bool& inactive_code, bool& defined, + std::string& macro_str) +{ + if (cond.fType == details::kEqual) + { + auto substr_macro = macro_str.substr(macro_str.find(macro.fName) + macro.fName.size()); + + if (substr_macro.find(macro.fValue) != std::string::npos) + { + if (macro.fValue == "0") + { + defined = false; + inactive_code = true; + + return 1; + } + + defined = true; + inactive_code = false; + + return 1; + } + } + else if (cond.fType == details::kNotEqual) + { + auto substr_macro = macro_str.substr(macro_str.find(macro.fName) + macro.fName.size()); + + if (substr_macro.find(macro.fName) != std::string::npos) + { + if (substr_macro.find(macro.fValue) != std::string::npos) + { + defined = false; + inactive_code = true; + + return 1; + } + + defined = true; + inactive_code = false; + + return 1; + } + + return 0; + } + + auto substr_macro = macro_str.substr(macro_str.find(macro.fName) + macro.fName.size()); + + std::string number; + + for (auto& macro : kMacros) + { + if (substr_macro.find(macro.fName) != std::string::npos) + { + for (size_t i = 0; i < macro.fName.size(); ++i) + { + if (isdigit(macro.fValue[i])) + { + number += macro.fValue[i]; + } + else + { + number.clear(); + break; + } + } + + break; + } + } + + size_t y = 2; + + /* last try */ + for (; y < macro_str.size(); y++) + { + if (isdigit(macro_str[y])) + { + for (size_t x = y; x < macro_str.size(); x++) + { + if (macro_str[x] == ' ') + break; + + number += macro_str[x]; + } + + break; + } + } + + size_t rhs = atol(macro.fValue.c_str()); + size_t lhs = atol(number.c_str()); + + if (lhs == 0) + { + number.clear(); + ++y; + + for (; y < macro_str.size(); y++) + { + if (isdigit(macro_str[y])) + { + for (size_t x = y; x < macro_str.size(); x++) + { + if (macro_str[x] == ' ') + break; + + number += macro_str[x]; + } + + break; + } + } + + lhs = atol(number.c_str()); + } + + if (cond.fType == details::kGreaterThan) + { + if (lhs < rhs) + { + defined = true; + inactive_code = false; + + return 1; + } + + return 0; + } + + if (cond.fType == details::kGreaterEqThan) + { + if (lhs <= rhs) + { + defined = true; + inactive_code = false; + + return 1; + } + + return 0; + } + + if (cond.fType == details::kLesserEqThan) + { + if (lhs >= rhs) + { + defined = true; + inactive_code = false; + + return 1; + } + + return 0; + } + + if (cond.fType == details::kLesserThan) + { + if (lhs > rhs) + { + defined = true; + inactive_code = false; + + return 1; + } + + return 0; + } + + return 0; +} + +///////////////////////////////////////////////////////////////////////////////////////// + +// @brief stores every included file here. + +///////////////////////////////////////////////////////////////////////////////////////// + +std::vector kAllIncludes; + +///////////////////////////////////////////////////////////////////////////////////////// + +// @name cpp_parse_file +// @brief parse file to preprocess it. + +///////////////////////////////////////////////////////////////////////////////////////// + +void cpp_parse_file(std::ifstream& hdr_file, std::ofstream& pp_out) +{ + std::string hdr_line; + std::string line_after_include; + + bool inactive_code = false; + bool comment = false; + bool defined = false; + bool else_branch = false; + + try + { + while (std::getline(hdr_file, hdr_line)) + { + // make cc, ccplus life easier + if (hdr_line.find("//") != std::string::npos) + { + hdr_line.erase(hdr_line.find("//")); + } + + if (hdr_line[0] == '#' && + hdr_line.find("endif") != std::string::npos) + { + if (!defined && + inactive_code) + { + inactive_code = false; + defined = false; + + continue; + } + + continue; + } + + if (!defined && + inactive_code) + { + continue; + } + + if (defined && + inactive_code) + { + continue; + } + + for (auto macro : kMacros) + { + if (ParserKit::find_word(hdr_line, macro.fName) && + hdr_line.find("#define") == std::string::npos) + { + hdr_line = hdr_line.replace(hdr_line.find(macro.fName), macro.fName.size(), macro.fValue); + } + } + + if (hdr_line[0] == '#' && + hdr_line.find("define") != std::string::npos) + { + auto line_after_define = hdr_line.substr(hdr_line.find("define") + strlen("define") + 1); + + std::string macro_value; + std::string macro_key; + + std::size_t pos = 0UL; + + std::vector args; + bool on_args = false; + + for (auto& ch : line_after_define) + { + ++pos; + + if (ch == '(') + { + on_args = true; + continue; + } + + if (ch == ')') + { + on_args = false; + continue; + } + + if (ch == '\\') + continue; + + if (on_args) + continue; + + if (ch == ' ') + { + for (size_t i = pos; i < line_after_define.size(); i++) + { + macro_value += line_after_define[i]; + } + + break; + } + + macro_key += ch; + } + + for (auto& ch : line_after_define) + { + if (ch == '(') + { + std::string arg; + + for (size_t i = pos+1; i < line_after_define.size(); i++) + { + if (line_after_define[i] == ')') + break; + + if (line_after_define[i] == ' ') + continue; + + if (line_after_define[i] == ',') + { + args.push_back(arg); + arg.clear(); + + continue; + } + + arg += line_after_define[i]; + } + + break; + } + } + + details::cpp_macro macro; + + macro.fArgs = args; + macro.fName = macro_key; + macro.fValue = macro_value; + + kMacros.emplace_back(macro); + + continue; + } + + if (hdr_line[0] != '#') + { + if (inactive_code) + { + continue; + } + + for (auto& macro : kMacros) + { + if (hdr_line.find(macro.fName) != std::string::npos) + { + std::vector arg_values; + + if (macro.fArgs.size() > 0) + { + for (size_t i = 0; i < hdr_line.size(); ++i) + { + if (hdr_line[i] == '(') + { + std::string tmp_arg; + + for (size_t x = i; x < hdr_line.size(); x++) + { + if (hdr_line[x] == ')') + break; + + if (hdr_line[x] == ' ') + continue; + + if (hdr_line[i] == '\\') + continue; + + if (hdr_line[x] == ',') + { + arg_values.push_back(tmp_arg); + tmp_arg.clear(); + continue; + } + + tmp_arg += hdr_line[x]; + } + + break; + } + } + + std::string symbol; + + for (size_t i = 0; i < macro.fValue.size(); i++) + { + if (macro.fValue[i] == '(') + break; + + if (macro.fValue[i] == '\\') + continue; + + symbol += macro.fValue[i]; + } + + hdr_line.replace(hdr_line.find(macro.fName), macro.fName.size(), symbol); + + size_t x_arg_indx = 0; + + for (size_t i = hdr_line.find(macro.fValue); i < hdr_line.size(); ++i) + { + if (hdr_line.find(macro.fArgs[x_arg_indx]) == i) + { + hdr_line.replace(i, macro.fArgs[x_arg_indx].size(), arg_values[x_arg_indx]); + ++x_arg_indx; + } + } + + } + else + { + std::string symbol; + + for (size_t i = 0; i < macro.fValue.size(); i++) + { + if (macro.fValue[i] == ' ') + continue; + + if (macro.fValue[i] == '\\') + continue; + + symbol += macro.fValue[i]; + } + + hdr_line.replace(hdr_line.find(macro.fName), macro.fName.size(), symbol); + } + + break; + } + } + + pp_out << hdr_line << std::endl; + + continue; + } + + if (hdr_line[0] == '#' && + hdr_line.find("ifndef") != std::string::npos) + { + auto line_after_ifndef = hdr_line.substr(hdr_line.find("ifndef") + strlen("ifndef") + 1); + std::string macro; + + for (auto& ch : line_after_ifndef) + { + if (ch == ' ') + { + break; + } + + macro += ch; + } + + if (macro == "0") + { + defined = true; + inactive_code = false; + continue; + } + + if (macro == "1") + { + defined = false; + inactive_code = true; + + continue; + } + + bool found = false; + + defined = true; + inactive_code = false; + + for (auto& macro_ref : kMacros) + { + if (hdr_line.find(macro_ref.fName) != std::string::npos) + { + found = true; + break; + } + } + + if (found) + { + defined = false; + inactive_code = true; + + continue; + } + } + else if (hdr_line[0] == '#' && + hdr_line.find("else") != std::string::npos) + { + if (!defined && + inactive_code) + { + inactive_code = false; + defined = true; + + continue; + } + else + { + defined = false; + inactive_code = true; + + continue; + } + + else_branch = true; + } + else if (hdr_line[0] == '#' && + hdr_line.find("ifdef") != std::string::npos) + { + auto line_after_ifdef = hdr_line.substr(hdr_line.find("ifdef") + strlen("ifdef") + 1); + std::string macro; + + for (auto& ch : line_after_ifdef) + { + if (ch == ' ') + { + break; + } + + macro += ch; + } + + if (macro == "0") + { + defined = false; + inactive_code = true; + + continue; + } + + if (macro == "1") + { + defined = true; + inactive_code = false; + + continue; + } + + defined = false; + inactive_code = true; + + for (auto& macro_ref : kMacros) + { + if (hdr_line.find(macro_ref.fName) != std::string::npos) + { + defined = true; + inactive_code = false; + + break; + } + } + } + else if (hdr_line[0] == '#' && + hdr_line.find("pragma") != std::string::npos) + { + line_after_include = hdr_line.substr(hdr_line.find("pragma once")); + + // search for this file + auto it = std::find(kAllIncludes.cbegin(), + kAllIncludes.cend(), line_after_include); + + if (it == kAllIncludes.cend()) + { + goto kIncludeFile; + } + } + else if (hdr_line[0] == '#' && + hdr_line.find("if") != std::string::npos) + { + inactive_code = true; + + std::vector cpp_macro_condition_list = { + { + .fType = details::kEqual, + .fTypeName = "==", + }, + { + .fType = details::kNotEqual, + .fTypeName = "!=", + }, + { + .fType = details::kLesserThan, + .fTypeName = "<", + }, + { + .fType = details::kGreaterThan, + .fTypeName = ">", + }, + { + .fType = details::kLesserEqThan, + .fTypeName = "<=", + }, + { + .fType = details::kGreaterEqThan, + .fTypeName = ">=", + }, + }; + + int32_t good_to_go = 0; + + for (auto& macro_condition : cpp_macro_condition_list) + { + if (hdr_line.find(macro_condition.fTypeName) != std::string::npos) + { + for (auto& found_macro : kMacros) + { + if (hdr_line.find(found_macro.fName) != std::string::npos) + { + good_to_go = cpp_parse_if_condition(macro_condition, found_macro, + inactive_code, defined, + hdr_line); + + break; + } + } + } + } + + if (good_to_go) + continue; + + auto line_after_if = hdr_line.substr(hdr_line.find("if") + strlen("if") + 1); + std::string macro; + + for (auto& ch : line_after_if) + { + if (ch == ' ') + { + break; + } + + macro += ch; + } + + if (macro == "0") + { + defined = false; + inactive_code = true; + continue; + } + + if (macro == "1") + { + defined = true; + inactive_code = false; + + continue; + } + + // last try, is it defined to be one? + for (auto& macro_ref : kMacros) + { + if (macro_ref.fName.find(macro) != std::string::npos && + macro_ref.fValue == "1") + { + inactive_code = false; + defined = true; + + break; + } + } + } + else if (hdr_line[0] == '#' && + hdr_line.find("warning") != std::string::npos) + { + auto line_after_warning = hdr_line.substr(hdr_line.find("warning") + strlen("warning") + 1); + std::string message; + + for (auto& ch : line_after_warning) + { + if (ch == '\r' || + ch == '\n') + { + break; + } + + message += ch; + } + + std::cout << "Warning: " << message << std::endl; + } + else if (hdr_line[0] == '#' && + hdr_line.find("error") != std::string::npos) + { + auto line_after_warning = hdr_line.substr(hdr_line.find("error") + strlen("error") + 1); + std::string message; + + for (auto& ch : line_after_warning) + { + if (ch == '\r' || + ch == '\n') + { + break; + } + + message += ch; + } + + throw std::runtime_error("Error: " + message); + } + else if (hdr_line[0] == '#' && + hdr_line.find("include") != std::string::npos) + { + line_after_include = hdr_line.substr(hdr_line.find("include")); + +kIncludeFile: + auto it = std::find(kAllIncludes.cbegin(), + kAllIncludes.cend(), line_after_include); + + if (it != kAllIncludes.cend()) + { + continue; + } + + std::string path; + + kAllIncludes.push_back(line_after_include); + + bool enable = false; + bool not_local = false; + + for (auto& ch : line_after_include) + { + if (ch == ' ') + continue; + + if (ch == '<') + not_local = true; + + if (ch == '\"' || + ch == '<') + { + enable = true; + continue; + } + + if (enable) + { + if (ch == '>' || + ch == '\"') + break; + + path += ch; + } + } + + if (not_local) + { + bool open = false; + + for (auto& include : kIncludes) + { + std::ifstream header(include + '/' + path); + + if (!header.is_open()) + continue; + + open = true; + + cpp_parse_file(header, pp_out); + + break; + } + + if (open == false) + { + throw std::runtime_error("cpp: no such include file: " + path); + } + } + else + { + std::ifstream header(kWoringDir + path); + + if (!header.is_open()) + throw std::runtime_error("cpp: no such include file: " + path); + + cpp_parse_file(header, pp_out); + } + } + else + { + std::cerr << ("cpp: unknown pre-processor directive, " + hdr_line) << "\n"; + continue; + } + } + } + catch (std::out_of_range& oor) + { + return; + } +} + +///////////////////////////////////////////////////////////////////////////////////////// + +// @brief main entrypoint of app. + +///////////////////////////////////////////////////////////////////////////////////////// + +int main(int argc, char** argv) +{ + try + { + bool skip = false; + bool double_skip = false; + + details::cpp_macro macro_1; + macro_1.fName = "__true"; + macro_1.fValue = "1"; + + kMacros.push_back(macro_1); + + details::cpp_macro macro_0; + macro_0.fName = "__false"; + macro_0.fValue = "0"; + + kMacros.push_back(macro_0); + + for (auto index = 1UL; index < argc; ++index) + { + if (skip) + { + skip = false; + continue; + } + + if (double_skip) + { + ++index; + double_skip = false; + continue; + } + + if (argv[index][0] == '-') + { + if (strcmp(argv[index], "-v") == 0 || + strcmp(argv[index], "--version") == 0) + { + printf("%s\n", "cpp v1.11, (c) Western Company"); + return 0; + } + + if (strcmp(argv[index], "-h") == 0 || + strcmp(argv[index], "--help") == 0) + { + printf("%s\n", "cpp v1.11, (c) Western Company"); + printf("%s\n", "--working-dir: set directory to working path."); + printf("%s\n", "--include-dir: add directory to include path."); + printf("%s\n", "--define: define macro."); + + return 0; + } + + if (strcmp(argv[index], "--include-dir") == 0) + { + std::string inc = argv[index+1]; + + skip = true; + + kIncludes.push_back(inc); + } + + if (strcmp(argv[index], "--working-dir") == 0) + { + std::string inc = argv[index+1]; + skip = true; + kWoringDir = inc; + } + + if (strcmp(argv[index], "--define") == 0 && + argv[index + 1] != nullptr && + argv[index + 2] != nullptr) + { + std::string macro_key = argv[index + 1]; + + std::string macro_value; + bool is_string = false; + + for (int argv_find_len = 0; + argv_find_len < strlen(argv[index]); + ++argv_find_len) + { + if (!isdigit(argv[index][argv_find_len])) + { + is_string = true; + macro_value += "\""; + + break; + } + } + + macro_value += argv[index + 2]; + + if (is_string) + macro_value += "\""; + + details::cpp_macro macro; + macro.fName = macro_key; + macro.fValue = macro_value; + + kMacros.push_back(macro); + + double_skip = true; + } + + continue; + } + + kFiles.emplace_back(argv[index]); + } + + if (kFiles.empty()) + return CXXKIT_EXEC_ERROR; + + for (auto& file : kFiles) + { + if (!std::filesystem::exists(file)) + continue; + + std::ifstream file_descriptor(file); + std::ofstream file_descriptor_pp(file + ".pp"); + + cpp_parse_file(file_descriptor, file_descriptor_pp); + } + + return 0; + } + catch(const std::runtime_error& e) + { + std::cout << e.what() << '\n'; + } + + return 0; +} diff --git a/CompilerDriver/cpp.cxx b/CompilerDriver/cpp.cxx deleted file mode 100644 index 6986e27..0000000 --- a/CompilerDriver/cpp.cxx +++ /dev/null @@ -1,1024 +0,0 @@ -/* - * ======================================================== - * - * cpp - * Copyright Western Company, all rights reserved. - * - * ======================================================== - */ - -#include -#include -#include -#include -#include - -typedef Int32(*cpp_parser_fn_t)(std::string& line, std::ifstream& hdr_file, std::ofstream& pp_out); - -///////////////////////////////////////////////////////////////////////////////////////// - -// @brief Preprocessor internal types. - -///////////////////////////////////////////////////////////////////////////////////////// - -namespace details -{ - enum - { - kEqual, - kGreaterEqThan, - kLesserEqThan, - kGreaterThan, - kLesserThan, - kNotEqual, - }; - - struct cpp_macro_condition final - { - int32_t fType; - std::string fTypeName; - }; - - struct cpp_macro final - { - std::vector fArgs; - std::string fName; - std::string fValue; - }; - - class cpp_pragma final - { - public: - explicit cpp_pragma() = default; - ~cpp_pragma() = default; - - CXXKIT_COPY_DEFAULT(cpp_pragma); - - std::string fMacroName{ "" }; - Int32(*fParse)(std::string& line, std::ifstream& hdr_file, std::ofstream& pp_out); - - }; -} - -static std::vector kFiles; -static std::vector kMacros; -static std::vector kIncludes; - -static std::string kWoringDir; - -static std::vector kKeywords = { - "include", - "if", - "pragma", - "define", - "elif", - "ifdef", - "ifndef", - "else", - "warning", - "error" -}; - -#define kKeywordCxxCnt kKeywords.size() - -///////////////////////////////////////////////////////////////////////////////////////// - -// @name cpp_parse_if_condition -// @brief parse #if condition - -///////////////////////////////////////////////////////////////////////////////////////// - -int32_t cpp_parse_if_condition(details::cpp_macro_condition& cond, - details::cpp_macro& macro, - bool& inactive_code, bool& defined, - std::string& macro_str) -{ - if (cond.fType == details::kEqual) - { - auto substr_macro = macro_str.substr(macro_str.find(macro.fName) + macro.fName.size()); - - if (substr_macro.find(macro.fValue) != std::string::npos) - { - if (macro.fValue == "0") - { - defined = false; - inactive_code = true; - - return 1; - } - - defined = true; - inactive_code = false; - - return 1; - } - } - else if (cond.fType == details::kNotEqual) - { - auto substr_macro = macro_str.substr(macro_str.find(macro.fName) + macro.fName.size()); - - if (substr_macro.find(macro.fName) != std::string::npos) - { - if (substr_macro.find(macro.fValue) != std::string::npos) - { - defined = false; - inactive_code = true; - - return 1; - } - - defined = true; - inactive_code = false; - - return 1; - } - - return 0; - } - - auto substr_macro = macro_str.substr(macro_str.find(macro.fName) + macro.fName.size()); - - std::string number; - - for (auto& macro : kMacros) - { - if (substr_macro.find(macro.fName) != std::string::npos) - { - for (size_t i = 0; i < macro.fName.size(); ++i) - { - if (isdigit(macro.fValue[i])) - { - number += macro.fValue[i]; - } - else - { - number.clear(); - break; - } - } - - break; - } - } - - size_t y = 2; - - /* last try */ - for (; y < macro_str.size(); y++) - { - if (isdigit(macro_str[y])) - { - for (size_t x = y; x < macro_str.size(); x++) - { - if (macro_str[x] == ' ') - break; - - number += macro_str[x]; - } - - break; - } - } - - size_t rhs = atol(macro.fValue.c_str()); - size_t lhs = atol(number.c_str()); - - if (lhs == 0) - { - number.clear(); - ++y; - - for (; y < macro_str.size(); y++) - { - if (isdigit(macro_str[y])) - { - for (size_t x = y; x < macro_str.size(); x++) - { - if (macro_str[x] == ' ') - break; - - number += macro_str[x]; - } - - break; - } - } - - lhs = atol(number.c_str()); - } - - if (cond.fType == details::kGreaterThan) - { - if (lhs < rhs) - { - defined = true; - inactive_code = false; - - return 1; - } - - return 0; - } - - if (cond.fType == details::kGreaterEqThan) - { - if (lhs <= rhs) - { - defined = true; - inactive_code = false; - - return 1; - } - - return 0; - } - - if (cond.fType == details::kLesserEqThan) - { - if (lhs >= rhs) - { - defined = true; - inactive_code = false; - - return 1; - } - - return 0; - } - - if (cond.fType == details::kLesserThan) - { - if (lhs > rhs) - { - defined = true; - inactive_code = false; - - return 1; - } - - return 0; - } - - return 0; -} - -///////////////////////////////////////////////////////////////////////////////////////// - -// @brief stores every included file here. - -///////////////////////////////////////////////////////////////////////////////////////// - -std::vector kAllIncludes; - -///////////////////////////////////////////////////////////////////////////////////////// - -// @name cpp_parse_file -// @brief parse file to preprocess it. - -///////////////////////////////////////////////////////////////////////////////////////// - -void cpp_parse_file(std::ifstream& hdr_file, std::ofstream& pp_out) -{ - std::string hdr_line; - std::string line_after_include; - - bool inactive_code = false; - bool comment = false; - bool defined = false; - bool else_branch = false; - - try - { - while (std::getline(hdr_file, hdr_line)) - { - // make cc, ccplus life easier - if (hdr_line.find("//") != std::string::npos) - { - hdr_line.erase(hdr_line.find("//")); - } - - if (hdr_line[0] == '#' && - hdr_line.find("endif") != std::string::npos) - { - if (!defined && - inactive_code) - { - inactive_code = false; - defined = false; - - continue; - } - - continue; - } - - if (!defined && - inactive_code) - { - continue; - } - - if (defined && - inactive_code) - { - continue; - } - - for (auto macro : kMacros) - { - if (ParserKit::find_word(hdr_line, macro.fName) && - hdr_line.find("#define") == std::string::npos) - { - hdr_line = hdr_line.replace(hdr_line.find(macro.fName), macro.fName.size(), macro.fValue); - } - } - - if (hdr_line[0] == '#' && - hdr_line.find("define") != std::string::npos) - { - auto line_after_define = hdr_line.substr(hdr_line.find("define") + strlen("define") + 1); - - std::string macro_value; - std::string macro_key; - - std::size_t pos = 0UL; - - std::vector args; - bool on_args = false; - - for (auto& ch : line_after_define) - { - ++pos; - - if (ch == '(') - { - on_args = true; - continue; - } - - if (ch == ')') - { - on_args = false; - continue; - } - - if (ch == '\\') - continue; - - if (on_args) - continue; - - if (ch == ' ') - { - for (size_t i = pos; i < line_after_define.size(); i++) - { - macro_value += line_after_define[i]; - } - - break; - } - - macro_key += ch; - } - - for (auto& ch : line_after_define) - { - if (ch == '(') - { - std::string arg; - - for (size_t i = pos+1; i < line_after_define.size(); i++) - { - if (line_after_define[i] == ')') - break; - - if (line_after_define[i] == ' ') - continue; - - if (line_after_define[i] == ',') - { - args.push_back(arg); - arg.clear(); - - continue; - } - - arg += line_after_define[i]; - } - - break; - } - } - - details::cpp_macro macro; - - macro.fArgs = args; - macro.fName = macro_key; - macro.fValue = macro_value; - - kMacros.emplace_back(macro); - - continue; - } - - if (hdr_line[0] != '#') - { - if (inactive_code) - { - continue; - } - - for (auto& macro : kMacros) - { - if (hdr_line.find(macro.fName) != std::string::npos) - { - std::vector arg_values; - - if (macro.fArgs.size() > 0) - { - for (size_t i = 0; i < hdr_line.size(); ++i) - { - if (hdr_line[i] == '(') - { - std::string tmp_arg; - - for (size_t x = i; x < hdr_line.size(); x++) - { - if (hdr_line[x] == ')') - break; - - if (hdr_line[x] == ' ') - continue; - - if (hdr_line[i] == '\\') - continue; - - if (hdr_line[x] == ',') - { - arg_values.push_back(tmp_arg); - tmp_arg.clear(); - continue; - } - - tmp_arg += hdr_line[x]; - } - - break; - } - } - - std::string symbol; - - for (size_t i = 0; i < macro.fValue.size(); i++) - { - if (macro.fValue[i] == '(') - break; - - if (macro.fValue[i] == '\\') - continue; - - symbol += macro.fValue[i]; - } - - hdr_line.replace(hdr_line.find(macro.fName), macro.fName.size(), symbol); - - size_t x_arg_indx = 0; - - for (size_t i = hdr_line.find(macro.fValue); i < hdr_line.size(); ++i) - { - if (hdr_line.find(macro.fArgs[x_arg_indx]) == i) - { - hdr_line.replace(i, macro.fArgs[x_arg_indx].size(), arg_values[x_arg_indx]); - ++x_arg_indx; - } - } - - } - else - { - std::string symbol; - - for (size_t i = 0; i < macro.fValue.size(); i++) - { - if (macro.fValue[i] == ' ') - continue; - - if (macro.fValue[i] == '\\') - continue; - - symbol += macro.fValue[i]; - } - - hdr_line.replace(hdr_line.find(macro.fName), macro.fName.size(), symbol); - } - - break; - } - } - - pp_out << hdr_line << std::endl; - - continue; - } - - if (hdr_line[0] == '#' && - hdr_line.find("ifndef") != std::string::npos) - { - auto line_after_ifndef = hdr_line.substr(hdr_line.find("ifndef") + strlen("ifndef") + 1); - std::string macro; - - for (auto& ch : line_after_ifndef) - { - if (ch == ' ') - { - break; - } - - macro += ch; - } - - if (macro == "0") - { - defined = true; - inactive_code = false; - continue; - } - - if (macro == "1") - { - defined = false; - inactive_code = true; - - continue; - } - - bool found = false; - - defined = true; - inactive_code = false; - - for (auto& macro_ref : kMacros) - { - if (hdr_line.find(macro_ref.fName) != std::string::npos) - { - found = true; - break; - } - } - - if (found) - { - defined = false; - inactive_code = true; - - continue; - } - } - else if (hdr_line[0] == '#' && - hdr_line.find("else") != std::string::npos) - { - if (!defined && - inactive_code) - { - inactive_code = false; - defined = true; - - continue; - } - else - { - defined = false; - inactive_code = true; - - continue; - } - - else_branch = true; - } - else if (hdr_line[0] == '#' && - hdr_line.find("ifdef") != std::string::npos) - { - auto line_after_ifdef = hdr_line.substr(hdr_line.find("ifdef") + strlen("ifdef") + 1); - std::string macro; - - for (auto& ch : line_after_ifdef) - { - if (ch == ' ') - { - break; - } - - macro += ch; - } - - if (macro == "0") - { - defined = false; - inactive_code = true; - - continue; - } - - if (macro == "1") - { - defined = true; - inactive_code = false; - - continue; - } - - defined = false; - inactive_code = true; - - for (auto& macro_ref : kMacros) - { - if (hdr_line.find(macro_ref.fName) != std::string::npos) - { - defined = true; - inactive_code = false; - - break; - } - } - } - else if (hdr_line[0] == '#' && - hdr_line.find("pragma") != std::string::npos) - { - line_after_include = hdr_line.substr(hdr_line.find("pragma once")); - - // search for this file - auto it = std::find(kAllIncludes.cbegin(), - kAllIncludes.cend(), line_after_include); - - if (it == kAllIncludes.cend()) - { - goto kIncludeFile; - } - } - else if (hdr_line[0] == '#' && - hdr_line.find("if") != std::string::npos) - { - inactive_code = true; - - std::vector cpp_macro_condition_list = { - { - .fType = details::kEqual, - .fTypeName = "==", - }, - { - .fType = details::kNotEqual, - .fTypeName = "!=", - }, - { - .fType = details::kLesserThan, - .fTypeName = "<", - }, - { - .fType = details::kGreaterThan, - .fTypeName = ">", - }, - { - .fType = details::kLesserEqThan, - .fTypeName = "<=", - }, - { - .fType = details::kGreaterEqThan, - .fTypeName = ">=", - }, - }; - - int32_t good_to_go = 0; - - for (auto& macro_condition : cpp_macro_condition_list) - { - if (hdr_line.find(macro_condition.fTypeName) != std::string::npos) - { - for (auto& found_macro : kMacros) - { - if (hdr_line.find(found_macro.fName) != std::string::npos) - { - good_to_go = cpp_parse_if_condition(macro_condition, found_macro, - inactive_code, defined, - hdr_line); - - break; - } - } - } - } - - if (good_to_go) - continue; - - auto line_after_if = hdr_line.substr(hdr_line.find("if") + strlen("if") + 1); - std::string macro; - - for (auto& ch : line_after_if) - { - if (ch == ' ') - { - break; - } - - macro += ch; - } - - if (macro == "0") - { - defined = false; - inactive_code = true; - continue; - } - - if (macro == "1") - { - defined = true; - inactive_code = false; - - continue; - } - - // last try, is it defined to be one? - for (auto& macro_ref : kMacros) - { - if (macro_ref.fName.find(macro) != std::string::npos && - macro_ref.fValue == "1") - { - inactive_code = false; - defined = true; - - break; - } - } - } - else if (hdr_line[0] == '#' && - hdr_line.find("warning") != std::string::npos) - { - auto line_after_warning = hdr_line.substr(hdr_line.find("warning") + strlen("warning") + 1); - std::string message; - - for (auto& ch : line_after_warning) - { - if (ch == '\r' || - ch == '\n') - { - break; - } - - message += ch; - } - - std::cout << "Warning: " << message << std::endl; - } - else if (hdr_line[0] == '#' && - hdr_line.find("error") != std::string::npos) - { - auto line_after_warning = hdr_line.substr(hdr_line.find("error") + strlen("error") + 1); - std::string message; - - for (auto& ch : line_after_warning) - { - if (ch == '\r' || - ch == '\n') - { - break; - } - - message += ch; - } - - throw std::runtime_error("Error: " + message); - } - else if (hdr_line[0] == '#' && - hdr_line.find("include") != std::string::npos) - { - line_after_include = hdr_line.substr(hdr_line.find("include")); - -kIncludeFile: - auto it = std::find(kAllIncludes.cbegin(), - kAllIncludes.cend(), line_after_include); - - if (it != kAllIncludes.cend()) - { - continue; - } - - std::string path; - - kAllIncludes.push_back(line_after_include); - - bool enable = false; - bool not_local = false; - - for (auto& ch : line_after_include) - { - if (ch == ' ') - continue; - - if (ch == '<') - not_local = true; - - if (ch == '\"' || - ch == '<') - { - enable = true; - continue; - } - - if (enable) - { - if (ch == '>' || - ch == '\"') - break; - - path += ch; - } - } - - if (not_local) - { - bool open = false; - - for (auto& include : kIncludes) - { - std::ifstream header(include + '/' + path); - - if (!header.is_open()) - continue; - - open = true; - - cpp_parse_file(header, pp_out); - - break; - } - - if (open == false) - { - throw std::runtime_error("cpp: no such include file: " + path); - } - } - else - { - std::ifstream header(kWoringDir + path); - - if (!header.is_open()) - throw std::runtime_error("cpp: no such include file: " + path); - - cpp_parse_file(header, pp_out); - } - } - else - { - std::cerr << ("cpp: unknown pre-processor directive, " + hdr_line) << "\n"; - continue; - } - } - } - catch (std::out_of_range& oor) - { - return; - } -} - -///////////////////////////////////////////////////////////////////////////////////////// - -// @brief main entrypoint of app. - -///////////////////////////////////////////////////////////////////////////////////////// - -int main(int argc, char** argv) -{ - try - { - bool skip = false; - bool double_skip = false; - - details::cpp_macro macro_1; - macro_1.fName = "__true"; - macro_1.fValue = "1"; - - kMacros.push_back(macro_1); - - details::cpp_macro macro_0; - macro_0.fName = "__false"; - macro_0.fValue = "0"; - - kMacros.push_back(macro_0); - - for (auto index = 1UL; index < argc; ++index) - { - if (skip) - { - skip = false; - continue; - } - - if (double_skip) - { - ++index; - double_skip = false; - continue; - } - - if (argv[index][0] == '-') - { - if (strcmp(argv[index], "-v") == 0 || - strcmp(argv[index], "--version") == 0) - { - printf("%s\n", "cpp v1.11, (c) Western Company"); - return 0; - } - - if (strcmp(argv[index], "-h") == 0 || - strcmp(argv[index], "--help") == 0) - { - printf("%s\n", "cpp v1.11, (c) Western Company"); - printf("%s\n", "--working-dir: set directory to working path."); - printf("%s\n", "--include-dir: add directory to include path."); - printf("%s\n", "--define: define macro."); - - return 0; - } - - if (strcmp(argv[index], "--include-dir") == 0) - { - std::string inc = argv[index+1]; - - skip = true; - - kIncludes.push_back(inc); - } - - if (strcmp(argv[index], "--working-dir") == 0) - { - std::string inc = argv[index+1]; - skip = true; - kWoringDir = inc; - } - - if (strcmp(argv[index], "--define") == 0 && - argv[index + 1] != nullptr && - argv[index + 2] != nullptr) - { - std::string macro_key = argv[index + 1]; - - std::string macro_value; - bool is_string = false; - - for (int argv_find_len = 0; - argv_find_len < strlen(argv[index]); - ++argv_find_len) - { - if (!isdigit(argv[index][argv_find_len])) - { - is_string = true; - macro_value += "\""; - - break; - } - } - - macro_value += argv[index + 2]; - - if (is_string) - macro_value += "\""; - - details::cpp_macro macro; - macro.fName = macro_key; - macro.fValue = macro_value; - - kMacros.push_back(macro); - - double_skip = true; - } - - continue; - } - - kFiles.emplace_back(argv[index]); - } - - if (kFiles.empty()) - return CXXKIT_EXEC_ERROR; - - for (auto& file : kFiles) - { - if (!std::filesystem::exists(file)) - continue; - - std::ifstream file_descriptor(file); - std::ofstream file_descriptor_pp(file + ".pp"); - - cpp_parse_file(file_descriptor, file_descriptor_pp); - } - - return 0; - } - catch(const std::runtime_error& e) - { - std::cout << e.what() << '\n'; - } - - return 0; -} diff --git a/CompilerDriver/ld.cc b/CompilerDriver/ld.cc new file mode 100644 index 0000000..d9e1ffe --- /dev/null +++ b/CompilerDriver/ld.cc @@ -0,0 +1,594 @@ +/* + * ======================================================== + * + * C++Kit + * Copyright Western Company, all rights reserved. + * + * ======================================================== + */ + +// @file ld.cxx +// @brief AE to PEF linker. +// Use this to compile to PEF compliant OS. + +// README: Do not look up for anything with .text/.data/.page_zero! +// It will be loaded when program will start up! +// Unlike $$dynamic$$ these containers will be loaded before CUS will do its job. + +#include + +#include +#include +#include + +//! Portable Executable Format +#include + +//! Advanced Executable Object Format +#include + +//! @brief standard PEF entry. +#define kPefStart "__start" + +#define kToolVersion "ld v1.17, (c) Western Company" + +#define StringCompare(dst, src) strcmp(dst, src) + +#define kPefNoCpu 0U +#define kPefNoSubCpu 0U + +#define kWhite "\e[0;97m" +#define kStdOut (std::cout << kWhite) + +#define kPefDeaultOrg (uint64_t)0x10000 +#define kPefLinkerNumId 0x5046FF +#define kPefAbiId "Container:Abi:MP-UX" + +enum { kAbiMpUx = 0x5046 /* PF */ }; + +std::ofstream& operator<<(std::ofstream& fp, CxxKit::PEFContainer& container) +{ + fp.write((char*)&container, sizeof(CxxKit::PEFContainer)); + return fp; +} + +std::ofstream& operator<<(std::ofstream& fp, CxxKit::PEFCommandHeader& container) +{ + fp.write((char*)&container, sizeof(CxxKit::PEFCommandHeader)); + return fp; +} + +static std::string kOutput = "a.out"; +static Int32 kAbi = kAbiMpUx; +static Int32 kSubArch = kPefNoSubCpu; +static Int32 kArch = kPefNoCpu; +static Bool kFatBinaryEnable = false; +static Bool kStartFound = false; +static Bool kDuplicateSymbols = false; +static Bool kVerbose = false; + +/* ld is to be found, mld is to be found at runtime. */ +static const char* kLdDefineSymbol = ":ld:"; +static const char* kLdDynamicSym = ":mld:"; + +/* object code and list. */ +static std::vector kObjectList; +static std::vector kObjectBytes; + +int main(int argc, char** argv) +{ + bool is_executable = true; + + for (size_t i = 1; i < argc; ++i) + { + if (StringCompare(argv[i], "-h") == 0) + { + kStdOut << kToolVersion << "\n"; + kStdOut << "-v: Print program version.\n"; + kStdOut << "-verbose: Print program backtrace (verbose mode).\n"; + kStdOut << "-shared: Output as a shared library.\n"; + kStdOut << "-m64000: Link for the 64x0.\n"; + kStdOut << "-fatbin: Output as FAT PEF.\n"; + kStdOut << "-o: Select output filename.\n"; + + // bye + return 0; + } + else if (StringCompare(argv[i], "-v") == 0 || + StringCompare(argv[i], "--version") == 0) + { + kStdOut << kToolVersion << std::endl; + // bye :D + return 0; + } + // + // we continue our way if these conditions are met. + // they are not files and are just flags. + // don't forget the 'continue' after your code. + // + else if (StringCompare(argv[i], "-m64000") == 0) + { + kArch = CxxKit::kPefArch64000; + + continue; + } + else if (StringCompare(argv[i], "-fatbin") == 0) + { + kFatBinaryEnable = true; + + continue; + } + else if (StringCompare(argv[i], "-verbose") == 0) + { + kVerbose = true; + continue; + } + else if (StringCompare(argv[i], "-shared") == 0) + { + if (kOutput.find(".out") != std::string::npos) + kOutput.erase(kOutput.find(".out"), strlen(".out")); + + kOutput += ".lib"; + + is_executable = false; + + continue; + } + else if (StringCompare(argv[i], "-o") == 0) + { + kOutput = argv[i+1]; + ++i; + + continue; + } + else + { + kObjectList.emplace_back(argv[i]); + + continue; + } + + kStdOut << "ld: ignore flag: " << argv[i] << "\n"; + } + + // sanity check. + if (kObjectList.empty()) + { + kStdOut << "ld: no input files." << std::endl; + return CXXKIT_EXEC_ERROR; + } + else + { + // check for exisiting files. + for (auto& obj : kObjectList) + { + if (!std::filesystem::exists(obj)) + { + // if filesystem doesn't find file + // -> throw error. + kStdOut << "ld: no such file: " << obj << std::endl; + return CXXKIT_EXEC_ERROR; + } + } + } + + // PEF expects a valid architecture when outputing a binary. + if (kArch == 0) + { + kStdOut << "ld: no target architecture set, can't continue." << std::endl; + return CXXKIT_EXEC_ERROR; + } + + CxxKit::PEFContainer pef_container{}; + + int32_t archs = kArch; + + pef_container.Count = 0UL; + pef_container.Kind = CxxKit::kPefKindExec; + pef_container.SubCpu = kSubArch; + pef_container.Linker = kPefLinkerNumId; // Western Company Linker + pef_container.Abi = kAbi; // Multi-Processor UX ABI + pef_container.Magic[0] = kPefMagic[kFatBinaryEnable ? 2 : 0]; + pef_container.Magic[1] = kPefMagic[1]; + pef_container.Magic[2] = kPefMagic[kFatBinaryEnable ? 0 : 2]; + pef_container.Version = kPefVersion; + + // specify the start address, can be 0x10000 + pef_container.Start = kPefDeaultOrg; + pef_container.HdrSz = sizeof(CxxKit::PEFContainer); + + std::ofstream output_fc(kOutput, std::ofstream::binary); + + if (output_fc.bad()) + { + if (kVerbose) + { + kStdOut << "ld: error: " << strerror(errno) << "\n"; + } + + return -CXXKIT_FILE_NOT_FOUND; + } + + //! Read AE to convert as PEF. + + std::vector pef_command_hdrs; + + for (const auto& i : kObjectList) + { + if (!std::filesystem::exists(i)) + continue; + + CxxKit::AEHeader hdr{}; + + std::ifstream input_object(i, std::ifstream::binary); + + input_object.read((char*)&hdr, sizeof(CxxKit::AEHeader)); + + auto ae_header = hdr; + + if (ae_header.fArch != kArch) + { + if (kVerbose) + kStdOut << "ld: pef: is a fat binary? : "; + + if (!kFatBinaryEnable) + { + if (kVerbose) + kStdOut << "no.\n"; + + kStdOut << "ld: error: object " << i << " is a different kind of architecture and output isn't treated as FAT binary." << std::endl; + + std::remove(kOutput.c_str()); + return -CXXKIT_FAT_ERROR; + } + else + { + if (kVerbose) + { + kStdOut << "yes.\n"; + } + } + } + + if (ae_header.fMagic[0] == kAEMag0 && + ae_header.fMagic[1] == kAEMag1 && + ae_header.fSize == sizeof(CxxKit::AEHeader)) + { + // append arch type to archs varaible. + archs |= ae_header.fArch; + std::size_t cnt = ae_header.fCount; + + if (kVerbose) + kStdOut << "ld: object header found, record count: " << cnt << "\n"; + + pef_container.Count = cnt; + + char* raw_ae_records = new char[cnt * sizeof(CxxKit::AERecordHeader)]; + memset(raw_ae_records, 0, cnt * sizeof(CxxKit::AERecordHeader)); + + input_object.read(raw_ae_records, std::streamsize(cnt * sizeof(CxxKit::AERecordHeader))); + + auto* ae_records = (CxxKit::AERecordHeader*)raw_ae_records; + + for (size_t ae_record_index = 0; ae_record_index < cnt; ++ae_record_index) + { + CxxKit::PEFCommandHeader command_header{ 0 }; + + memcpy(command_header.Name, ae_records[ae_record_index].fName, kPefNameLen); + + // check this header if it's any valid. + if (std::string(command_header.Name).find(".text") == std::string::npos && + std::string(command_header.Name).find(".data") == std::string::npos && + std::string(command_header.Name).find(".page_zero") == std::string::npos) + { + if (std::string(command_header.Name).find(kPefStart) == std::string::npos && + *command_header.Name == 0) + { + if (std::string(command_header.Name).find(kLdDefineSymbol) != std::string::npos) + { + goto ld_mark_header; + } + else + { + continue; + } + } + } + + if (std::string(command_header.Name).find(kPefStart) != std::string::npos && + std::string(command_header.Name).find(".text") != std::string::npos) + { + kStartFound = true; + pef_container.Start = ae_records[ae_record_index].fOffset; + } + +ld_mark_header: + command_header.Offset = ae_records[ae_record_index].fOffset; + command_header.Kind = ae_records[ae_record_index].fKind; + command_header.Size = ae_records[ae_record_index].fSize; + + if (kVerbose) + kStdOut << "ld: object record: " << ae_records[ae_record_index].fName << " was marked.\n"; + + pef_command_hdrs.emplace_back(command_header); + } + + delete[] raw_ae_records; + + std::vector bytes; + bytes.resize(ae_header.fCodeSize); + + input_object.seekg(ae_header.fStartCode); + input_object.read(bytes.data(), ae_header.fCodeSize); + + for (auto& byte : bytes) + { + kObjectBytes.push_back(byte); + } + + continue; + } + + kStdOut << "ld: not an object: " << i << std::endl; + std::remove(kOutput.c_str()); + + // don't continue, it is a fatal error. + return -CXXKIT_EXEC_ERROR; + } + + pef_container.Cpu = archs; + + output_fc << pef_container; + + if (kVerbose) + { + kStdOut << "ld: pef: wrote container header.\n"; + } + + output_fc.seekp(std::streamsize(pef_container.HdrSz)); + + std::vector not_found; + std::vector symbols; + + // step 2: check for errors (multiple symbols, undefined ones) + + for (auto & pef_command_hdr : pef_command_hdrs) + { + // check if this symbol needs to be resolved. + if (std::string(pef_command_hdr.Name).find(kLdDefineSymbol) != + std::string::npos && + std::string(pef_command_hdr.Name).find(kLdDynamicSym) == + std::string::npos) + { + if (kVerbose) + kStdOut << "ld: found undefined symbol: " << pef_command_hdr.Name << "\n"; + + if (auto it = std::find(not_found.begin(), not_found.end(), std::string(pef_command_hdr.Name)); + it == not_found.end()) + { + not_found.emplace_back(pef_command_hdr.Name); + } + } + + symbols.emplace_back(pef_command_hdr.Name); + } + + // Now try to solve these symbols. + + for (size_t not_found_idx = 0; not_found_idx < pef_command_hdrs.size(); ++not_found_idx) + { + if (auto it = std::find(not_found.begin(), not_found.end(), std::string(pef_command_hdrs[not_found_idx].Name)); + it != not_found.end()) + { + std::string symbol_imp = *it; + + if (symbol_imp.find(kLdDefineSymbol) == std::string::npos) + continue; + + // erase the lookup prefix. + symbol_imp.erase(0, symbol_imp.find(kLdDefineSymbol) + strlen(kLdDefineSymbol)); + + // demangle everything. + while (symbol_imp.find('$') != std::string::npos) + symbol_imp.erase(symbol_imp.find('$'), 1); + + // the reason we do is because, this may not match the symbol, and we need + // to look for other matching symbols. + for (auto& pef_command_hdr : pef_command_hdrs) + { + if (std::string(pef_command_hdr.Name).find(symbol_imp) != std::string::npos && + std::string(pef_command_hdr.Name).find(kLdDefineSymbol) == std::string::npos) + { + std::string undefined_symbol = pef_command_hdr.Name; + auto result_of_sym = undefined_symbol.substr(undefined_symbol.find(symbol_imp)); + + for (int i = 0; result_of_sym[i] != 0; ++i) + { + if (result_of_sym[i] != symbol_imp[i]) + goto ld_continue_search; + + } + + not_found.erase(it); + + if (kVerbose) + kStdOut << "ld: found symbol: " << pef_command_hdr.Name << "\n"; + + break; + } + } + +ld_continue_search: + continue; + } + } + + // step 3: check for errors (recheck if we have those symbols.) + + if (!kStartFound && is_executable) + { + if (kVerbose) + kStdOut << "ld: undefined symbol: __start, you may have forget to link against your runtime library.\n"; + + kStdOut << "ld: undefined entrypoint " << kPefStart << " for executable " << kOutput << "\n"; + } + + // step 4: write some pef commands. + + CxxKit::PEFCommandHeader date_header{}; + + time_t timestamp = time(nullptr); + + std::string timestamp_str = "ContainerDate:"; + timestamp_str += std::to_string(timestamp); + + strcpy(date_header.Name, timestamp_str.c_str()); + + date_header.Flags = 0; + date_header.Kind = CxxKit::kPefData; + date_header.Offset = output_fc.tellp(); + date_header.Size = timestamp_str.size(); + + output_fc << date_header; + + CxxKit::PEFCommandHeader abi_header{}; + + memcpy(abi_header.Name, kPefAbiId, strlen(kPefAbiId)); + + abi_header.Size = strlen(kPefAbiId); + abi_header.Offset = output_fc.tellp(); + abi_header.Flags = 0; + abi_header.Kind = CxxKit::kPefLinkerID; + + output_fc << abi_header; + + CxxKit::PEFCommandHeader uuid_header{}; + + uuid_t uuid{ 0 }; + uuid_generate_random(uuid); + + memcpy(uuid_header.Name, "UUID_TYPE:4:", strlen("UUID_TYPE:4:")); + memcpy(uuid_header.Name + strlen("UUID_TYPE:4:"), uuid, 16); + + uuid_header.Size = 16; + uuid_header.Offset = output_fc.tellp(); + uuid_header.Flags = 0; + uuid_header.Kind = 0; + + output_fc << uuid_header; + + // prepare a symbol vector. + std::vector undefined_symbols; + std::vector duplicate_symbols; + + // Finally write down the command headers. + // And check for any duplications + for (size_t cmd_hdr = 0UL; cmd_hdr < pef_command_hdrs.size(); ++cmd_hdr) + { + if (std::string(pef_command_hdrs[cmd_hdr].Name).find(kLdDefineSymbol) != + std::string::npos && + std::string(pef_command_hdrs[cmd_hdr].Name).find(kLdDynamicSym) == + std::string::npos) + { + // ignore :ld: headers, they do not contain code. + continue; + } + + std::string sym_name = pef_command_hdrs[cmd_hdr].Name; + + if (!sym_name.empty()) + { + undefined_symbols.emplace_back(sym_name); + } + + output_fc << pef_command_hdrs[cmd_hdr]; + + for (size_t cmd_hdr_sub = 0UL; cmd_hdr_sub < pef_command_hdrs.size(); ++cmd_hdr_sub) + { + if (cmd_hdr_sub == cmd_hdr) + continue; + + if (std::string(pef_command_hdrs[cmd_hdr_sub].Name).find(kLdDefineSymbol) != + std::string::npos && + std::string(pef_command_hdrs[cmd_hdr_sub].Name).find(kLdDynamicSym) == + std::string::npos) + { + // ignore :ld: headers, they do not contain code. + continue; + } + + auto& pef_command_hdr = pef_command_hdrs[cmd_hdr_sub]; + + if (pef_command_hdr.Name == std::string(pef_command_hdrs[cmd_hdr].Name)) + { + if (std::find(duplicate_symbols.cbegin(), duplicate_symbols.cend(), pef_command_hdr.Name) == duplicate_symbols.cend()) + { + duplicate_symbols.push_back(pef_command_hdr.Name); + } + + if (kVerbose) + kStdOut << "ld: found duplicate symbol: " << pef_command_hdr.Name << "\n"; + + kDuplicateSymbols = true; + } + } + } + + if (!duplicate_symbols.empty()) + { + for (auto& symbol : duplicate_symbols) + { + kStdOut << "ld: multiple symbols of " << symbol << ".\n"; + } + + std::remove(kOutput.c_str()); + return -CXXKIT_EXEC_ERROR; + } + + // step 2.5: write program bytes. + + for (auto byte : kObjectBytes) + { + output_fc << byte; + } + + if (kVerbose) + kStdOut << "ld: wrote code for: " << kOutput << "\n"; + + // step 3: check if we have those symbols + + std::vector unreferenced_symbols; + + for (auto & pef_command_hdr : pef_command_hdrs) + { + if (auto it = std::find(not_found.begin(), not_found.end(), std::string(pef_command_hdr.Name)); + it != not_found.end()) + { + unreferenced_symbols.emplace_back(pef_command_hdr.Name); + } + } + + if (!unreferenced_symbols.empty()) + { + for (auto& unreferenced_symbol : unreferenced_symbols) + { + kStdOut << "ld: undefined symbol " << unreferenced_symbol << "\n"; + } + } + + if (!kStartFound || + kDuplicateSymbols && + std::filesystem::exists(kOutput) || + !unreferenced_symbols.empty()) + { + if (kVerbose) + kStdOut << "ld: code for: " << kOutput << ", is corrupt, removing file...\n"; + + std::remove(kOutput.c_str()); + return -CXXKIT_EXEC_ERROR; + } + + return 0; +} + +// Last rev 3-1-24 \ No newline at end of file diff --git a/CompilerDriver/ld.cxx b/CompilerDriver/ld.cxx deleted file mode 100644 index d316fe3..0000000 --- a/CompilerDriver/ld.cxx +++ /dev/null @@ -1,587 +0,0 @@ -/* - * ======================================================== - * - * C++Kit - * Copyright Western Company, all rights reserved. - * - * ======================================================== - */ - -// @file ld.cxx -// @brief AE to PEF linker. -// Use this to compile to PEF compliant OS. - -// README: Do not look up for anything with .text/.data/.page_zero! -// It will be loaded when program will start up! -// Unlike $$dynamic$$ these containers will be loaded before CUS will do its job. - -#include - -#include -#include -#include - -//! Portable Executable Format -#include - -//! Advanced Executable Object Format -#include - -//! @brief standard PEF entry. -#define kPefStart "__start" - -#define kToolVersion "ld v1.15, (c) Western Company" - -#define StringCompare(dst, src) strcmp(dst, src) - -#define kPefNoCpu 0U -#define kPefNoSubCpu 0U - -#define kWhite "\e[0;97m" -#define kStdOut (std::cout << kWhite) - -#define kPefDeaultOrg (uint64_t)0x10000 -#define kPefLinkerNumId 0x5046FF -#define kPefAbiId "Container:Abi:MP-UX" - -enum { kAbiMpUx = 0x5046 /* PF */ }; - -std::ofstream& operator<<(std::ofstream& fp, CxxKit::PEFContainer& container) -{ - fp.write((char*)&container, sizeof(CxxKit::PEFContainer)); - return fp; -} - -std::ofstream& operator<<(std::ofstream& fp, CxxKit::PEFCommandHeader& container) -{ - fp.write((char*)&container, sizeof(CxxKit::PEFCommandHeader)); - return fp; -} - -static std::string kOutput = "a.out"; - -static Int32 kAbi = kAbiMpUx; -static Int32 kSubArch = kPefNoSubCpu; -static Int32 kArch = kPefNoCpu; -static Bool kFatBinaryEnable = false; -static Bool kStartFound = false; -static Bool kDuplicateSymbols = false; -static Bool kVerbose = false; - -/* ld is to be found, mld is to be found at runtime. */ -static const char* kLdDefineSymbol = ":ld:"; -static const char* kLdDynamicSym = ":mld:"; - -/* object code and list. */ -static std::vector kObjectList; -static std::vector kObjectBytes; - -int main(int argc, char** argv) -{ - bool is_executable = true; - - for (size_t i = 1; i < argc; ++i) - { - if (StringCompare(argv[i], "-h") == 0) - { - kStdOut << kToolVersion << "\n"; - kStdOut << "-v: Print program version.\n"; - kStdOut << "-verbose: Print program backtrace (verbose mode).\n"; - kStdOut << "-shared: Output as a shared library.\n"; - kStdOut << "-m64000: Link for the X64000.\n"; - kStdOut << "-fatbin: Output as FAT PEF.\n"; - kStdOut << "-o: Select output filename.\n"; - - // bye - return 0; - } - else if (StringCompare(argv[i], "-v") == 0 || - StringCompare(argv[i], "--version") == 0) - { - kStdOut << kToolVersion << std::endl; - // bye :D - return 0; - } - // - // we continue our way if these conditions are met. - // they are not files and are just flags. - // don't forget the 'continue' after your code. - // - else if (StringCompare(argv[i], "-m64000") == 0) - { - kArch = CxxKit::kPefArch64000; - - continue; - } - else if (StringCompare(argv[i], "-fatbin") == 0) - { - kFatBinaryEnable = true; - - continue; - } - else if (StringCompare(argv[i], "-verbose") == 0) - { - kVerbose = true; - continue; - } - else if (StringCompare(argv[i], "-shared") == 0) - { - if (kOutput.find(".out") != std::string::npos) - kOutput.erase(kOutput.find(".out"), strlen(".out")); - - kOutput += ".lib"; - - is_executable = false; - - continue; - } - else if (StringCompare(argv[i], "-o") == 0) - { - kOutput = argv[i+1]; - ++i; - - continue; - } - else - { - kObjectList.emplace_back(argv[i]); - - continue; - } - - kStdOut << "ld: ignore flag: " << argv[i] << "\n"; - } - - // sanity check. - if (kObjectList.empty()) - { - kStdOut << "ld: no input files." << std::endl; - return CXXKIT_EXEC_ERROR; - } - else - { - // check for exisiting files. - for (auto& obj : kObjectList) - { - if (!std::filesystem::exists(obj)) - { - kStdOut << "ld: no such file: " << obj << std::endl; - return CXXKIT_EXEC_ERROR; - } - } - } - - if (kArch == 0) - { - kStdOut << "ld: no target architecture set, can't continue." << std::endl; - return CXXKIT_EXEC_ERROR; - } - - CxxKit::PEFContainer pef_container{}; - - pef_container.Count = 0UL; - pef_container.Kind = CxxKit::kPefKindExec; - pef_container.SubCpu = kSubArch; - pef_container.Cpu = kArch; - pef_container.Linker = kPefLinkerNumId; // Western Company Linker - pef_container.Abi = kAbi; // Multi-Processor UX ABI - pef_container.Magic[0] = kPefMagic[kFatBinaryEnable ? 2 : 0]; - pef_container.Magic[1] = kPefMagic[1]; - pef_container.Magic[2] = kPefMagic[kFatBinaryEnable ? 0 : 2]; - pef_container.Version = kPefVersion; - - // specify the start address. - pef_container.Start = kPefDeaultOrg; - pef_container.HdrSz = sizeof(CxxKit::PEFContainer); - - std::ofstream output_fc(kOutput, std::ofstream::binary); - - if (output_fc.bad()) - { - if (kVerbose) - { - kStdOut << "ld: error: " << strerror(errno) << "\n"; - } - - return -CXXKIT_FILE_NOT_FOUND; - } - - output_fc << pef_container; - - if (kVerbose) - { - kStdOut << "ld: PEF: wrote container header.\n"; - } - - //! Read AE to convert as PEF. - - std::vector pef_command_hdrs; - - for (const auto& i : kObjectList) - { - if (!std::filesystem::exists(i)) - continue; - - CxxKit::AEHeader hdr{}; - - std::ifstream input_object(i, std::ifstream::binary); - - input_object.read((char*)&hdr, sizeof(CxxKit::AEHeader)); - - auto ae_header = hdr; - - if (ae_header.fArch != kArch) - { - if (kVerbose) - kStdOut << "ld: PEF: is a fat binary? : "; - - if (!kFatBinaryEnable) - { - if (kVerbose) - kStdOut << "NO\n"; - - kStdOut << "ld: error: object " << i << " is a different kind of architecture and output isn't treated as FAT binary." << std::endl; - - std::remove(kOutput.c_str()); - return -CXXKIT_FAT_ERROR; - } - else - { - if (kVerbose) - { - kStdOut << "YES\n"; - } - } - } - - if (ae_header.fMagic[0] == kAEMag0 && - ae_header.fMagic[1] == kAEMag1 && - ae_header.fSize == sizeof(CxxKit::AEHeader)) - { - std::size_t cnt = ae_header.fCount; - - if (kVerbose) - kStdOut << "ld: object header found, record count: " << cnt << "\n"; - - pef_container.Count = cnt; - - char* raw_ae_records = new char[cnt * sizeof(CxxKit::AERecordHeader)]; - memset(raw_ae_records, 0, cnt * sizeof(CxxKit::AERecordHeader)); - - input_object.read(raw_ae_records, std::streamsize(cnt * sizeof(CxxKit::AERecordHeader))); - - auto* ae_records = (CxxKit::AERecordHeader*)raw_ae_records; - - for (size_t ae_record_index = 0; ae_record_index < cnt; ++ae_record_index) - { - CxxKit::PEFCommandHeader command_header{ 0 }; - - memcpy(command_header.Name, ae_records[ae_record_index].fName, kPefNameLen); - - // check this header if it's any valid. - if (std::string(command_header.Name).find(".text") == std::string::npos && - std::string(command_header.Name).find(".data") == std::string::npos && - std::string(command_header.Name).find(".page_zero") == std::string::npos) - { - if (std::string(command_header.Name).find(kPefStart) == std::string::npos && - *command_header.Name == 0) - { - if (std::string(command_header.Name).find(kLdDefineSymbol) != std::string::npos) - { - goto ld_mark_header; - } - else - { - continue; - } - } - } - - if (std::string(command_header.Name).find(kPefStart) != std::string::npos && - std::string(command_header.Name).find(".text") != std::string::npos) - { - kStartFound = true; - pef_container.Start = ae_records[ae_record_index].fOffset; - } - -ld_mark_header: - command_header.Offset = ae_records[ae_record_index].fOffset; - command_header.Kind = ae_records[ae_record_index].fKind; - command_header.Size = ae_records[ae_record_index].fSize; - - if (kVerbose) - kStdOut << "ld: object record: " << ae_records[ae_record_index].fName << " was marked.\n"; - - pef_command_hdrs.emplace_back(command_header); - } - - delete[] raw_ae_records; - - std::vector bytes; - bytes.resize(ae_header.fCodeSize); - - input_object.seekg(ae_header.fStartCode); - input_object.read(bytes.data(), ae_header.fCodeSize); - - for (auto& byte : bytes) - { - kObjectBytes.push_back(byte); - } - - continue; - } - - kStdOut << "ld: not an object " << i << std::endl; - std::remove(kOutput.c_str()); - - // don't continue, it is a fatal error. - return -CXXKIT_EXEC_ERROR; - } - - output_fc.seekp(std::streamsize(pef_container.HdrSz)); - - std::vector not_found; - std::vector symbols; - - // step 2: check for errors (multiple symbols, undefined ones) - - for (auto & pef_command_hdr : pef_command_hdrs) - { - // check if this symbol needs to be resolved. - if (std::string(pef_command_hdr.Name).find(kLdDefineSymbol) != - std::string::npos && - std::string(pef_command_hdr.Name).find(kLdDynamicSym) == - std::string::npos) - { - if (kVerbose) - kStdOut << "ld: found undefined symbol: " << pef_command_hdr.Name << "\n"; - - if (auto it = std::find(not_found.begin(), not_found.end(), std::string(pef_command_hdr.Name)); - it == not_found.end()) - { - not_found.emplace_back(pef_command_hdr.Name); - } - } - - symbols.emplace_back(pef_command_hdr.Name); - } - - // Now try to solve these symbols. - - for (size_t not_found_idx = 0; not_found_idx < pef_command_hdrs.size(); ++not_found_idx) - { - if (auto it = std::find(not_found.begin(), not_found.end(), std::string(pef_command_hdrs[not_found_idx].Name)); - it != not_found.end()) - { - std::string symbol_imp = *it; - - if (symbol_imp.find(kLdDefineSymbol) == std::string::npos) - continue; - - // erase the lookup prefix. - symbol_imp.erase(0, symbol_imp.find(kLdDefineSymbol) + strlen(kLdDefineSymbol)); - - // demangle everything. - while (symbol_imp.find('$') != std::string::npos) - symbol_imp.erase(symbol_imp.find('$'), 1); - - // the reason we do is because, this may not match the symbol, and we need - // to look for other matching symbols. - for (auto& pef_command_hdr : pef_command_hdrs) - { - if (std::string(pef_command_hdr.Name).find(symbol_imp) != std::string::npos && - std::string(pef_command_hdr.Name).find(kLdDefineSymbol) == std::string::npos) - { - std::string undefined_symbol = pef_command_hdr.Name; - auto result_of_sym = undefined_symbol.substr(undefined_symbol.find(symbol_imp)); - - for (int i = 0; result_of_sym[i] != 0; ++i) - { - if (result_of_sym[i] != symbol_imp[i]) - goto ld_continue_search; - - } - - not_found.erase(it); - - if (kVerbose) - kStdOut << "ld: found symbol: " << pef_command_hdr.Name << "\n"; - - break; - } - } - -ld_continue_search: - continue; - } - } - - // step 3: check for errors (recheck if we have those symbols.) - - if (!kStartFound && is_executable) - { - if (kVerbose) - kStdOut << "ld: undefined symbol: __start, you may have forget to link against your runtime library.\n"; - - kStdOut << "ld: undefined entrypoint " << kPefStart << " for executable " << kOutput << "\n"; - } - - // step 4: write some pef commands. - - CxxKit::PEFCommandHeader date_header{}; - - time_t timestamp = time(nullptr); - - std::string timestamp_str = "ContainerDate:"; - timestamp_str += std::to_string(timestamp); - - strcpy(date_header.Name, timestamp_str.c_str()); - - date_header.Flags = 0; - date_header.Kind = CxxKit::kPefData; - date_header.Offset = output_fc.tellp(); - date_header.Size = timestamp_str.size(); - - output_fc << date_header; - - CxxKit::PEFCommandHeader abi_header{}; - - memcpy(abi_header.Name, kPefAbiId, strlen(kPefAbiId)); - - abi_header.Size = strlen(kPefAbiId); - abi_header.Offset = output_fc.tellp(); - abi_header.Flags = 0; - abi_header.Kind = CxxKit::kPefLinkerID; - - output_fc << abi_header; - - CxxKit::PEFCommandHeader uuid_header{}; - - uuid_t uuid{ 0 }; - uuid_generate_random(uuid); - - memcpy(uuid_header.Name, "UUID_TYPE:4:", strlen("UUID_TYPE:4:")); - memcpy(uuid_header.Name + strlen("UUID_TYPE:4:"), uuid, 16); - - uuid_header.Size = 16; - uuid_header.Offset = output_fc.tellp(); - uuid_header.Flags = 0; - uuid_header.Kind = 0; - - output_fc << uuid_header; - - // prepare a symbol vector. - std::vector undefined_symbols; - std::vector duplicate_symbols; - - // Finally write down the command headers. - // And check for any duplications - for (size_t cmd_hdr = 0UL; cmd_hdr < pef_command_hdrs.size(); ++cmd_hdr) - { - if (std::string(pef_command_hdrs[cmd_hdr].Name).find(kLdDefineSymbol) != - std::string::npos && - std::string(pef_command_hdrs[cmd_hdr].Name).find(kLdDynamicSym) == - std::string::npos) - { - // ignore :ld: headers, they do not contain code. - continue; - } - - std::string sym_name = pef_command_hdrs[cmd_hdr].Name; - - if (!sym_name.empty()) - { - undefined_symbols.emplace_back(sym_name); - } - - output_fc << pef_command_hdrs[cmd_hdr]; - - for (size_t cmd_hdr_sub = 0UL; cmd_hdr_sub < pef_command_hdrs.size(); ++cmd_hdr_sub) - { - if (cmd_hdr_sub == cmd_hdr) - continue; - - if (std::string(pef_command_hdrs[cmd_hdr_sub].Name).find(kLdDefineSymbol) != - std::string::npos && - std::string(pef_command_hdrs[cmd_hdr_sub].Name).find(kLdDynamicSym) == - std::string::npos) - { - // ignore :ld: headers, they do not contain code. - continue; - } - - auto& pef_command_hdr = pef_command_hdrs[cmd_hdr_sub]; - - if (pef_command_hdr.Name == std::string(pef_command_hdrs[cmd_hdr].Name)) - { - if (std::find(duplicate_symbols.cbegin(), duplicate_symbols.cend(), pef_command_hdr.Name) == duplicate_symbols.cend()) - { - duplicate_symbols.push_back(pef_command_hdr.Name); - } - - if (kVerbose) - kStdOut << "ld: found duplicate symbol: " << pef_command_hdr.Name << "\n"; - - kDuplicateSymbols = true; - } - } - } - - if (!duplicate_symbols.empty()) - { - for (auto& symbol : duplicate_symbols) - { - kStdOut << "ld: multiple symbols of " << symbol << ".\n"; - } - - std::remove(kOutput.c_str()); - return -CXXKIT_EXEC_ERROR; - } - - // step 2.5: write program bytes. - - for (auto byte : kObjectBytes) - { - output_fc << byte; - } - - if (kVerbose) - kStdOut << "ld: wrote code for: " << kOutput << "\n"; - - // step 3: check if we have those symbols - - std::vector unreferenced_symbols; - - for (auto & pef_command_hdr : pef_command_hdrs) - { - if (auto it = std::find(not_found.begin(), not_found.end(), std::string(pef_command_hdr.Name)); - it != not_found.end()) - { - unreferenced_symbols.emplace_back(pef_command_hdr.Name); - } - } - - if (!unreferenced_symbols.empty()) - { - for (auto& unreferenced_symbol : unreferenced_symbols) - { - kStdOut << "ld: undefined symbol " << unreferenced_symbol << "\n"; - } - } - - if (!kStartFound || - kDuplicateSymbols && - std::filesystem::exists(kOutput) || - !unreferenced_symbols.empty()) - { - if (kVerbose) - kStdOut << "ld: code for: " << kOutput << ", is corrupt, removing file...\n"; - - std::remove(kOutput.c_str()); - return -CXXKIT_EXEC_ERROR; - } - - return 0; -} - -// Last rev 3-1-24 \ No newline at end of file diff --git a/CompilerDriver/makefile b/CompilerDriver/makefile index f22ded6..e1060d5 100644 --- a/CompilerDriver/makefile +++ b/CompilerDriver/makefile @@ -9,30 +9,31 @@ LINK_CC=g++ -std=c++20 LINK_INC=-I../ -I../C++Kit -LINK_SRC=ld.cxx +LINK_SRC=ld.cc LINK_OUTPUT=bin/ld +LINK_ALT_OUTPUT=bin/mld -PP_SRC=cpp.cxx +PP_SRC=cpp.cc PP_OUTPUT=bin/cpp CC2_OUTPUT=bin/cppfront CC2_SRC=cc2/source/cppfront.cpp -CC_SRC=ccplus.cxx ../C++Kit/StdKit/*.cpp ../C++Kit/AsmKit/*.cpp +CC_SRC=ccplus.cc ../C++Kit/StdKit/*.cc ../C++Kit/AsmKit/*.cc CC_OUTPUT=bin/ccplus -CC_SRC=cc.cxx ../C++Kit/StdKit/*.cpp ../C++Kit/AsmKit/*.cpp +CC_SRC=cc.cc ../C++Kit/StdKit/*.cc ../C++Kit/AsmKit/*.cc CC_OUTPUT=bin/cc -CXX_SRC=ccplus.cxx ../C++Kit/StdKit/*.cpp ../C++Kit/AsmKit/*.cpp +CXX_SRC=ccplus.cc ../C++Kit/StdKit/*.cc ../C++Kit/AsmKit/*.cc CXX_OUTPUT=bin/ccplus -MASM_SRC=masm.cxx ../C++Kit/StdKit/*.cpp ../C++Kit/AsmKit/*.cpp +MASM_SRC=masm.cc ../C++Kit/StdKit/*.cc ../C++Kit/AsmKit/*.cc MASM_OUTPUT=bin/masm .PHONY: all all: cxx - @echo "[make] done build" + @echo "[make] done build." .PHONY: cxx cxx: ld @@ -45,6 +46,7 @@ cxx: ld .PHONY: ld ld: $(LINK_CC) $(LINK_INC) $(LINK_SRC) -o $(LINK_OUTPUT) + cp $(LINK_OUTPUT) $(LINK_ALT_OUTPUT) .PHONY: help help: diff --git a/CompilerDriver/masm.cc b/CompilerDriver/masm.cc new file mode 100644 index 0000000..849e141 --- /dev/null +++ b/CompilerDriver/masm.cc @@ -0,0 +1,957 @@ +/* + * ======================================================== + * + * C++Kit + * Copyright Western Company, all rights reserved. + * + * ======================================================== + */ + +///////////////////////////////////////////////////////////////////////////////////////// + +// @file masm.cxx +// @brief MP-UX 64x0 Assembler. + +// REMINDER: when dealing with an undefined symbol use (string size):ld:(string) +// so that ld will look for it. + +///////////////////////////////////////////////////////////////////////////////////////// + +#include +#include +#include +#include +#include +#include +#include + +///////////////////// + +// ANSI ESCAPE CODES + +///////////////////// + +#define kBlank "\e[0;30m" +#define kRed "\e[0;31m" +#define kWhite "\e[0;97m" +#define kYellow "\e[0;33m" + +#define kStdOut (std::cout << kWhite) + +static char kOutputArch = CxxKit::kPefArch64000; + +//! base relocation address for every mp-ux app. +static UInt32 kErrorLimit = 10; +static UInt32 kAcceptableErrors = 0; + +static std::size_t kCounter = 1UL; + +static bool kVerbose = false; + +static std::vector kBytes; +static CxxKit::AERecordHeader kCurrentRecord{ .fName = "", .fKind = CxxKit::kPefCode, .fSize = 0, .fOffset = 0 }; + +static std::vector kRecords; +static std::vector kUndefinedSymbols; + +static const std::string kUndefinedSymbol = ":ld:"; +static const std::string kRelocSymbol = ":mld:"; + +// \brief forward decl. +static std::string masm_check_line(std::string& line, const std::string& file); +static bool masm_read_attributes(std::string& line); +static void masm_read_instruction(std::string& line, const std::string& file); + +namespace detail +{ + void print_error(std::string reason, const std::string& file) noexcept + { + if (reason[0] == '\n') + reason.erase(0, 1); + + kStdOut << kRed << "[ masm ] " << kWhite << ((file == "masm") ? "internal assembler error " : ("in file, " + file)) << kBlank << std::endl; + kStdOut << kRed << "[ masm ] " << kWhite << reason << kBlank << std::endl; + + if (kAcceptableErrors > kErrorLimit) + std::exit(3); + + ++kAcceptableErrors; + } + + void print_warning(std::string reason, const std::string& file) noexcept + { + if (reason[0] == '\n') + reason.erase(0, 1); + + if (!file.empty()) + { + kStdOut << kYellow << "[ file ] " << kWhite << file << kBlank << std::endl; + } + + kStdOut << kYellow << "[ masm ] " << kWhite << reason << kBlank << std::endl; + } +} + +// provide operator<< for AE + +std::ofstream& operator<<(std::ofstream& fp, CxxKit::AEHeader& container) +{ + fp.write((char*)&container, sizeof(CxxKit::AEHeader)); + + return fp; +} + +std::ofstream& operator<<(std::ofstream& fp, CxxKit::AERecordHeader& container) +{ + fp.write((char*)&container, sizeof(CxxKit::AERecordHeader)); + + return fp; +} + +///////////////////////////////////////////////////////////////////////////////////////// + +// @brief Main entrypoint. + +///////////////////////////////////////////////////////////////////////////////////////// + +int main(int argc, char** argv) +{ + for (size_t i = 1; i < argc; ++i) + { + if (argv[i][0] == '-') + { + if (strcmp(argv[i], "-v") == 0) + { + kStdOut << "masm: The MP-UX Assembler.\nmasm: v1.10\nmasm: Copyright (c) 2023 Western Company.\n"; + return 0; + } + + if (strcmp(argv[i], "-h") == 0) + { + kStdOut << "masm: The MP-UX Assembler.\nmasm: Copyright (c) 2023 Western Company.\n"; + kStdOut << "-v: Print program version.\n"; + kStdOut << "-verbose: Print verbose output.\n"; + kStdOut << "-m64000: Compile for the X64000 instruction set.\n"; + + return 0; + } + else if (strcmp(argv[i], "-m64000") == 0 || + strcmp(argv[i], "-m64x0") == 0) + { + kOutputArch = CxxKit::kPefArch64000; + + if (kVerbose) + { + kStdOut << "masm: Select 64x0 as object output.\n"; + } + + continue; + } + else if (strcmp(argv[i], "-verbose") == 0) + { + kVerbose = true; + continue; + } + + kStdOut << "masm: ignore " << argv[i] << "\n"; + continue; + } + + if (!std::filesystem::exists(argv[i])) + continue; + + std::string object_output(argv[i]); + + if (object_output.find(kAsmFileExt64x0) != std::string::npos) + { + object_output.erase(object_output.find(kAsmFileExt64x0), std::size(kAsmFileExt64x0)); + } + + object_output += kObjectFileExt; + + std::ifstream file_ptr(argv[i]); + std::ofstream file_ptr_out(object_output, + std::ofstream::binary); + + if (file_ptr_out.bad()) + { + if (kVerbose) + { + kStdOut << "masm: error: " << strerror(errno) << "\n"; + } + } + + std::string line; + + CxxKit::AEHeader hdr{ 0 }; + + memset(hdr.fPad, kAEInvalidOpcode, kAEPad); + + hdr.fMagic[0] = kAEMag0; + hdr.fMagic[1] = kAEMag1; + hdr.fSize = sizeof(CxxKit::AEHeader); + hdr.fArch = kOutputArch; + + ///////////////////////////////////////////////////////////////////////////////////////// + + // COMPILATION LOOP + + ///////////////////////////////////////////////////////////////////////////////////////// + + while (std::getline(file_ptr, line)) + { + if (auto ln = masm_check_line(line, argv[i]); + !ln.empty()) + { + detail::print_error(ln, argv[i]); + continue; + } + + try + { + masm_read_attributes(line); + masm_read_instruction(line, argv[i]); + } + catch(const std::exception& e) + { + if (kVerbose) + { + std::string what = e.what(); + detail::print_warning("exit because of: " + what, "masm"); + } + + std::filesystem::remove(object_output); + goto masm_fail_exit; + } + + } + + if (kVerbose) + kStdOut << "masm: writing to file...\n"; + + // this is the final step, write everything to the file. + + auto pos = file_ptr_out.tellp(); + + hdr.fCount = kRecords.size() + kUndefinedSymbols.size(); + + file_ptr_out << hdr; + + if (kRecords.empty()) + { + std::filesystem::remove(object_output); + return -1; + } + + kRecords[kRecords.size() - 1].fSize = kBytes.size(); + + std::size_t record_count = 0UL; + + for (auto& rec : kRecords) + { + if (kVerbose) + kStdOut << "masm: wrote record " << rec.fName << " to file...\n"; + + rec.fFlags |= CxxKit::kKindRelocationAtRuntime; + rec.fOffset = record_count; + ++record_count; + + file_ptr_out << rec; + } + + // increment once again, so that we won't lie about the kUndefinedSymbols. + ++record_count; + + for (auto& sym : kUndefinedSymbols) + { + CxxKit::AERecordHeader _record_hdr{ 0 }; + + if (kVerbose) + kStdOut << "masm: wrote symbol " << sym << " to file...\n"; + + _record_hdr.fKind = kAEInvalidOpcode; + _record_hdr.fSize = sym.size(); + _record_hdr.fOffset = record_count; + + ++record_count; + + memset(_record_hdr.fPad, kAEInvalidOpcode, kAEPad); + memcpy(_record_hdr.fName, sym.c_str(), sym.size()); + + file_ptr_out << _record_hdr; + + ++kCounter; + } + + auto pos_end = file_ptr_out.tellp(); + + file_ptr_out.seekp(pos); + + hdr.fStartCode = pos_end; + hdr.fCodeSize = kBytes.size(); + + file_ptr_out << hdr; + + file_ptr_out.seekp(pos_end); + + // byte from byte, we write this. + for (auto& byte : kBytes) + { + file_ptr_out.write(reinterpret_cast(&byte), sizeof(byte)); + } + + if (kVerbose) + kStdOut << "masm: wrote program bytes to file...\n"; + + file_ptr_out.flush(); + file_ptr_out.close(); + + if (kVerbose) + kStdOut << "masm: exit succeeded with code 0.\n"; + + return 0; + } + +masm_fail_exit: + + if (kVerbose) + kStdOut << "masm: exit failed with code -1.\n"; + + return -1; +} + +///////////////////////////////////////////////////////////////////////////////////////// + +// @brief Check for attributes +// returns true if any was found. + +///////////////////////////////////////////////////////////////////////////////////////// + +static bool masm_read_attributes(std::string& line) +{ + // __import is the opposite of export, it signals to the ld + // that we need this symbol. + if (ParserKit::find_word(line, "__import")) + { + auto name = line.substr(line.find("__import") + strlen("__import")); + + std::string result = std::to_string(name.size()); + result += kUndefinedSymbol; + + // mangle this + for (char & j : name) + { + if (j == ' ' || + j == ',') + j = '$'; + + } + + result += name; + + if (name.find(".text") != std::string::npos) + { + // data is treated as code. + kCurrentRecord.fKind = CxxKit::kPefCode; + } + else if (name.find(".data") != std::string::npos) + { + // no code will be executed from here. + kCurrentRecord.fKind = CxxKit::kPefData; + } + else if (name.find(".page_zero") != std::string::npos) + { + // this is a bss section. + kCurrentRecord.fKind = CxxKit::kPefZero; + } + + // this is a special case for the start stub. + // we want this so that ld can find it. + + if (name == "__start") + { + kCurrentRecord.fKind = CxxKit::kPefCode; + } + + // now we can tell the code size of the previous kCurrentRecord. + + if (!kRecords.empty()) + kRecords[kRecords.size() - 1].fSize = kBytes.size(); + + memset(kCurrentRecord.fName, 0, kAESymbolLen); + memcpy(kCurrentRecord.fName, result.c_str(), result.size()); + + ++kCounter; + + memset(kCurrentRecord.fPad, kAEInvalidOpcode, kAEPad); + + kRecords.emplace_back(kCurrentRecord); + + return true; + } + + // __export is a special keyword used by masm to tell the AE output stage to mark this section as a header. + // it currently supports .text, .data., page_zero + if (ParserKit::find_word(line, "__export")) + { + auto name = line.substr(line.find("__export") + strlen("__export")); + + for (char& j : name) + { + if (j == ' ') + j = '$'; + + } + + if (name.find(',') != std::string::npos) + name.erase(name.find(',')); + + if (name.find(".text") != std::string::npos) + { + // data is treated as code. + kCurrentRecord.fKind = CxxKit::kPefCode; + } + else if (name.find(".data") != std::string::npos) + { + // no code will be executed from here. + kCurrentRecord.fKind = CxxKit::kPefData; + } + else if (name.find(".page_zero") != std::string::npos) + { + // this is a bss section. + kCurrentRecord.fKind = CxxKit::kPefZero; + } + + // this is a special case for the start stub. + // we want this so that ld can find it. + + if (name == "__start") + { + kCurrentRecord.fKind = CxxKit::kPefCode; + } + + // now we can tell the code size of the previous kCurrentRecord. + + if (!kRecords.empty()) + kRecords[kRecords.size() - 1].fSize = kBytes.size(); + + memset(kCurrentRecord.fName, 0, kAESymbolLen); + memcpy(kCurrentRecord.fName, name.c_str(), name.size()); + + ++kCounter; + + memset(kCurrentRecord.fPad, kAEInvalidOpcode, kAEPad); + + kRecords.emplace_back(kCurrentRecord); + + return true; + } + + return false; +} + +// \brief algorithms and helpers. + +namespace detail::algorithm +{ + // \brief authorize a brief set of characters. + static inline bool is_not_alnum_space(char c) + { + return !(isalpha(c) || isdigit(c) || (c == ' ') || (c == '\t') || (c == ',') || + (c == '(') || (c == ')') || (c == '"') || (c == '\'') || (c == '[') || (c == ']') + || (c == '+') || (c == '_')); + } + + bool is_valid(const std::string &str) + { + if (ParserKit::find_word(str, "__export") || + ParserKit::find_word(str, "__import")) + return true; + + return find_if(str.begin(), str.end(), is_not_alnum_space) == str.end(); + } +} + +///////////////////////////////////////////////////////////////////////////////////////// + +// @brief Check for line (syntax check) + +///////////////////////////////////////////////////////////////////////////////////////// + +static std::string masm_check_line(std::string& line, const std::string& file) +{ + (void)file; + + std::string err_str; + + while (line.find('\t') != std::string::npos) + line.erase(line.find('\t'), 1); + + if (line.empty() || + ParserKit::find_word(line, "__import") || + ParserKit::find_word(line, "__export") || + ParserKit::find_word(line, "#") || + ParserKit::find_word(line, ";") || + ParserKit::find_word(line, "layout")) + { + if (line.find('#') != std::string::npos) + { + line.erase(line.find('#')); + } + + if (line.find(';') != std::string::npos) + { + line.erase(line.find(';')); + } + + return err_str; + } + + if (!detail::algorithm::is_valid(line)) + { + err_str = "Line contains non alphanumeric characters.\nhere -> "; + err_str += line; + + return err_str; + } + + // check for a valid instruction format. + + if (line.find(',') != std::string::npos) + { + if (line.find(',') + 1 == line.size()) + { + err_str += "\ninstruction lacks right register, here -> "; + err_str += line.substr(line.find(',')); + + return err_str; + } + else + { + bool nothing_on_right = true; + + if (line.find(',') + 1 > line.size()) + { + err_str += "\ninstruction not complete, here -> "; + err_str += line; + + return err_str; + } + + auto substr = line.substr(line.find(',') + 1); + + for (auto& ch : substr) + { + if (ch != ' ' && + ch != '\t') + { + nothing_on_right = false; + } + } + + // this means we found nothing after that ',' . + if (nothing_on_right) + { + err_str += "\ninstruction not complete, here -> "; + err_str += line; + + return err_str; + } + } + } + + std::vector opcodes_list = { "jb", "psh", "stw", "ldw", "lda", "sta" }; + + for (auto& opcodes : kOpcodes64x0) + { + if (line.find(opcodes.fName) != std::string::npos) + { + for (auto& op : opcodes_list) + { + if (line == op || + line.find(op) != std::string::npos && + !isspace(line[line.find(op) + op.size()])) + { + err_str += "\nmalformed "; + err_str += op; + err_str += " instruction, here -> "; + err_str += line; + } + } + + return err_str; + } + } + + err_str += "Unknown syntax: "; + err_str += line; + + return err_str; +} + +///////////////////////////////////////////////////////////////////////////////////////// + +// @brief internal namespace + +///////////////////////////////////////////////////////////////////////////////////////// + +namespace detail +{ + union number_cast + { + explicit number_cast(UInt64 raw) + : raw(raw) + {} + + char number[8]; + UInt64 raw; + }; +} + +static bool masm_write_number(std::size_t pos, std::string& jump_label) +{ + if (!isdigit(jump_label[pos])) + return false; + + switch (jump_label[pos+1]) + { + case 'x': + { + if (auto res = strtoq(jump_label.substr(pos + 2).c_str(), + nullptr, 16); + !res) + { + if (errno != 0) + { + detail::print_error("invalid hex number: " + jump_label, "masm"); + throw std::runtime_error("invalid_hex"); + + return false; + } + } + + detail::number_cast num(strtoq(jump_label.substr(pos + 2).c_str(), + nullptr, 16)); + + for (char& i : num.number) + { + kBytes.push_back(i); + } + + if (kVerbose) + { + kStdOut << "masm: found a base 16 number here: " << jump_label.substr(pos) << "\n"; + } + + return true; + } + case 'b': + { + if (auto res = strtoq(jump_label.substr(pos + 2).c_str(), + nullptr, 2); + !res) + { + if (errno != 0) + { + detail::print_error("invalid binary number: " + jump_label, "masm"); + throw std::runtime_error("invalid_bin"); + + return false; + } + } + + detail::number_cast num(strtoq(jump_label.substr(pos + 2).c_str(), + nullptr, 2)); + + if (kVerbose) + { + kStdOut << "masm: found a base 2 number here: " << jump_label.substr(pos) << "\n"; + } + + for (char& i : num.number) + { + kBytes.push_back(i); + } + + return true; + } + case 'o': + { + if (auto res = strtoq(jump_label.substr(pos + 2).c_str(), + nullptr, 7); + !res) + { + if (errno != 0) + { + detail::print_error("invalid octal number: " + jump_label, "masm"); + throw std::runtime_error("invalid_octal"); + + return false; + } + } + + detail::number_cast num(strtoq(jump_label.substr(pos + 2).c_str(), + nullptr, 7)); + + if (kVerbose) + { + kStdOut << "masm: found a base 8 number here: " << jump_label.substr(pos) << "\n"; + } + + for (char& i : num.number) + { + kBytes.push_back(i); + } + + return true; + } + default: + { + break; + } + } + + /* check for errno and stuff like that */ + if (auto res = strtoq(jump_label.substr(pos).c_str(), + nullptr, 10); + !res) + { + if (errno != 0) + { + return false; + } + } + + detail::number_cast num(strtoq(jump_label.substr(pos).c_str(), + nullptr, 10)); + + for (char& i : num.number) + { + kBytes.push_back(i); + } + + if (kVerbose) + { + kStdOut << "masm: found a base 10 number here: " << jump_label.substr(pos) << "\n"; + } + + return true; +} + +///////////////////////////////////////////////////////////////////////////////////////// + +// @brief Read and write instruction to kBytes array. + +///////////////////////////////////////////////////////////////////////////////////////// + +static void masm_read_instruction(std::string& line, const std::string& file) +{ + for (auto& opcodes : kOpcodes64x0) + { + if (ParserKit::find_word(line, opcodes.fName)) + { + std::string name(opcodes.fName); + std::string jump_label, cpy_jump_label; + + kBytes.emplace_back(opcodes.fOpcode); + kBytes.emplace_back(opcodes.fFunct3); + kBytes.emplace_back(opcodes.fFunct7); + + // check funct7 + switch (opcodes.fFunct7) + { + // reg to reg means register to register transfer operation. + case kAsmRegToReg: + case kAsmImmediate: + { + // \brief how many registers we found. + std::size_t found_some = 0UL; + + for (size_t line_index = 0UL; line_index < line.size(); line_index++) + { + if (line[line_index] == 'r' && + isdigit(line[line_index + 1])) + { + std::string register_syntax = kAsmRegisterPrefix; + register_syntax += line[line_index + 1]; + + if (isdigit(line[line_index + 2])) + register_syntax += line[line_index + 2]; + + std::string reg_str; + reg_str += line[line_index + 1]; + + if (isdigit(line[line_index + 2])) + reg_str += line[line_index + 2]; + + std::size_t reg_index = strtoq( + reg_str.c_str(), + nullptr, + 10); + + if (reg_index > kAsmRegisterLimit) + { + detail::print_error("invalid register index, r" + reg_str, file); + throw std::runtime_error("invalid_register_index"); + } + + kBytes.emplace_back(reg_index); + ++found_some; + + if (kVerbose) + { + kStdOut << "masm: Found register: " << register_syntax << "\n"; + kStdOut << "masm: Register count: " << found_some << "\n"; + } + } + } + + // we're not in immediate addressing, reg to reg. + if (opcodes.fFunct7 != kAsmImmediate) + { + // remember! register to register! + if (found_some == 1) + { + detail::print_error("unrecognized register found.\ntip: each masm register starts with 'r'.\nline: " + line, file); + throw std::runtime_error("not_a_register"); + } + } + + if (found_some < 1 && + name != "psh" && + name != "ldw" && + name != "lda" && + name != "stw") + { + detail::print_error("invalid combination of opcode and registers.\nline: " + line, file); + throw std::runtime_error("invalid_comb_op_reg"); + } + + if (found_some > 0 && + name == "pop") + { + detail::print_error("invalid combination for opcode 'pop'.\ntip: it expects nothing.\nline: " + line, file); + throw std::runtime_error("invalid_comb_op_pop"); + } + } + default: + break; + + } + + // try to fetch a number from the name + if (name == "psh" || + name == "jb" || + name == "stw" || + name == "ldw" || + name == "lda" || + name == "sta") + { + auto where_string = name; + + if (name == "stw" || + name == "ldw" || + name == "lda") + where_string = ","; + + jump_label = line.substr(line.find(where_string) + where_string.size()); + cpy_jump_label = jump_label; + + // replace any spaces with $ + if (jump_label[0] == ' ') + { + while (jump_label.find(' ') != std::string::npos) + { + if (isalnum(jump_label[0]) || + isdigit(jump_label[0])) + break; + + jump_label.erase(jump_label.find(' '), 1); + } + } + + if (!masm_write_number(0, jump_label)) + { + // sta expects this: sta 0x000000, r0 + if (name == "sta") + { + detail::print_error("invalid combination of opcode and operands.\nhere ->" + line, file); + throw std::runtime_error("invalid_comb_op_ops"); + + break; + } + + goto masm_write_label; + } + else + { + if (name == "sta" && + cpy_jump_label.find("__import") != std::string::npos) + { + detail::print_error("invalid usage __import on 'sta', here: " + line, file); + throw std::runtime_error("invalid_sta_usage"); + break; + } + } + } + + // if jump to branch + if (name == "jb") + { +masm_write_label: + if (cpy_jump_label.find('\n') != std::string::npos) + cpy_jump_label.erase(cpy_jump_label.find('\n'), 1); + + if (cpy_jump_label.find("__import") == std::string::npos && + name == "psh" || + cpy_jump_label.find("__import") == std::string::npos && + name == "jb") + { + detail::print_error("__import not found on jump label, please add one.", file.c_str()); + throw std::runtime_error("import_jmp_lbl"); + } + else if (cpy_jump_label.find("__import") != std::string::npos) + { + if (name == "sta") + { + detail::print_error("__import is not allowed on a sta operation.", file.c_str()); + throw std::runtime_error("import_sta_op"); + } + + cpy_jump_label.erase(cpy_jump_label.find("__import"), strlen("__import")); + } + + while (cpy_jump_label.find(' ') != std::string::npos) + { + cpy_jump_label.erase(cpy_jump_label.find(' '), 1); + } + + auto mld_reloc_str = std::to_string(cpy_jump_label.size()); + mld_reloc_str += kRelocSymbol; + mld_reloc_str += cpy_jump_label; + + bool ignore_back_slash = false; + + for (auto& reloc_chr : mld_reloc_str) + { + if (reloc_chr == '\\') + { + ignore_back_slash = true; + continue; + } + + if (ignore_back_slash) + { + ignore_back_slash = false; + continue; + } + + kBytes.push_back(reloc_chr); + } + } + + kBytes.push_back('\0'); + } + } + +} \ No newline at end of file diff --git a/CompilerDriver/masm.cxx b/CompilerDriver/masm.cxx deleted file mode 100644 index e3be378..0000000 --- a/CompilerDriver/masm.cxx +++ /dev/null @@ -1,950 +0,0 @@ -/* - * ======================================================== - * - * C++Kit - * Copyright Western Company, all rights reserved. - * - * ======================================================== - */ - -///////////////////////////////////////////////////////////////////////////////////////// - -// @file masm.cxx -// @brief MP-UX 64x0 Assembler. - -// REMINDER: when dealing with an undefined symbol use (string size):ld:(string) -// so that ld will look for it. - -///////////////////////////////////////////////////////////////////////////////////////// - -#include -#include -#include -#include -#include -#include -#include - -///////////////////// - -// ANSI ESCAPE CODES - -///////////////////// - -#define kBlank "\e[0;30m" -#define kRed "\e[0;31m" -#define kWhite "\e[0;97m" -#define kYellow "\e[0;33m" - -#define kStdOut (std::cout << kWhite) - -static char kOutputArch = CxxKit::kPefArch64000; - -//! base relocation address for every mp-ux app. -static UInt32 kErrorLimit = 10; -static UInt32 kAcceptableErrors = 0; - -static std::size_t kCounter = 1UL; - -static bool kVerbose = false; - -static std::vector kBytes; -static CxxKit::AERecordHeader kCurrentRecord{ .fName = "", .fKind = CxxKit::kPefCode, .fSize = 0, .fOffset = 0 }; - -static std::vector kRecords; -static std::vector kUndefinedSymbols; - -static const std::string kUndefinedSymbol = ":ld:"; -static const std::string kRelocSymbol = ":mld:"; - -// \brief forward decl. -static std::string masm_check_line(std::string& line, const std::string& file); -static bool masm_read_attributes(std::string& line); -static void masm_read_instruction(std::string& line, const std::string& file); - -namespace detail -{ - void print_error(std::string reason, const std::string& file) noexcept - { - if (reason[0] == '\n') - reason.erase(0, 1); - - kStdOut << kRed << "[ masm ] " << kWhite << ((file == "masm") ? "internal assembler error " : ("in file, " + file)) << kBlank << std::endl; - kStdOut << kRed << "[ masm ] " << kWhite << reason << kBlank << std::endl; - - if (kAcceptableErrors > kErrorLimit) - std::exit(3); - - ++kAcceptableErrors; - } - - void print_warning(std::string reason, const std::string& file) noexcept - { - if (reason[0] == '\n') - reason.erase(0, 1); - - if (!file.empty()) - { - kStdOut << kYellow << "[ file ] " << kWhite << file << kBlank << std::endl; - } - - kStdOut << kYellow << "[ masm ] " << kWhite << reason << kBlank << std::endl; - } -} - -// provide operator<< for AE - -std::ofstream& operator<<(std::ofstream& fp, CxxKit::AEHeader& container) -{ - fp.write((char*)&container, sizeof(CxxKit::AEHeader)); - - return fp; -} - -std::ofstream& operator<<(std::ofstream& fp, CxxKit::AERecordHeader& container) -{ - fp.write((char*)&container, sizeof(CxxKit::AERecordHeader)); - - return fp; -} - -///////////////////////////////////////////////////////////////////////////////////////// - -// @brief Main entrypoint. - -///////////////////////////////////////////////////////////////////////////////////////// - -int main(int argc, char** argv) -{ - for (size_t i = 1; i < argc; ++i) - { - if (argv[i][0] == '-') - { - if (strcmp(argv[i], "-v") == 0) - { - kStdOut << "masm: The MP-UX Assembler.\nmasm: Copyright (c) 2023 Western Company.\n"; - return 0; - } - - if (strcmp(argv[i], "-h") == 0) - { - kStdOut << "masm: The MP-UX Assembler.\nmasm: Copyright (c) 2023 Western Company.\n"; - kStdOut << "-v: Print program version.\n"; - kStdOut << "-verbose: Print verbose output.\n"; - kStdOut << "-m64000: Compile for the X64000 instruction set.\n"; - - return 0; - } - else if (strcmp(argv[i], "-m64000") == 0) - { - kOutputArch = CxxKit::kPefArch64000; - continue; - } - else if (strcmp(argv[i], "-verbose") == 0) - { - kVerbose = true; - continue; - } - - kStdOut << "masm: ignore " << argv[i] << "\n"; - continue; - } - - if (!std::filesystem::exists(argv[i])) - continue; - - std::string object_output(argv[i]); - - if (object_output.find(kAsmFileExt64x0) != std::string::npos) - { - object_output.erase(object_output.find(kAsmFileExt64x0), std::size(kAsmFileExt64x0)); - } - - object_output += kObjectFileExt; - - std::ifstream file_ptr(argv[i]); - std::ofstream file_ptr_out(object_output, - std::ofstream::binary); - - if (file_ptr_out.bad()) - { - if (kVerbose) - { - kStdOut << "masm: error: " << strerror(errno) << "\n"; - } - } - - std::string line; - - CxxKit::AEHeader hdr{ 0 }; - - memset(hdr.fPad, kAEInvalidOpcode, kAEPad); - - hdr.fMagic[0] = kAEMag0; - hdr.fMagic[1] = kAEMag1; - hdr.fSize = sizeof(CxxKit::AEHeader); - hdr.fArch = kOutputArch; - - ///////////////////////////////////////////////////////////////////////////////////////// - - // COMPILATION LOOP - - ///////////////////////////////////////////////////////////////////////////////////////// - - while (std::getline(file_ptr, line)) - { - if (auto ln = masm_check_line(line, argv[i]); - !ln.empty()) - { - detail::print_error(ln, argv[i]); - continue; - } - - try - { - masm_read_attributes(line); - masm_read_instruction(line, argv[i]); - } - catch(const std::exception& e) - { - if (kVerbose) - { - std::string what = e.what(); - detail::print_warning("exit because of: " + what, "masm"); - } - - std::filesystem::remove(object_output); - goto masm_fail_exit; - } - - } - - if (kVerbose) - kStdOut << "masm: writing to file...\n"; - - // this is the final step, write everything to the file. - - auto pos = file_ptr_out.tellp(); - - hdr.fCount = kRecords.size() + kUndefinedSymbols.size(); - - file_ptr_out << hdr; - - if (kRecords.empty()) - { - std::filesystem::remove(object_output); - return -1; - } - - kRecords[kRecords.size() - 1].fSize = kBytes.size(); - - std::size_t record_count = 0UL; - - for (auto& rec : kRecords) - { - if (kVerbose) - kStdOut << "masm: wrote record " << rec.fName << " to file...\n"; - - rec.fFlags |= CxxKit::kKindRelocationAtRuntime; - rec.fOffset = record_count; - ++record_count; - - file_ptr_out << rec; - } - - // increment once again, so that we won't lie about the kUndefinedSymbols. - ++record_count; - - for (auto& sym : kUndefinedSymbols) - { - CxxKit::AERecordHeader _record_hdr{ 0 }; - - if (kVerbose) - kStdOut << "masm: wrote symbol " << sym << " to file...\n"; - - _record_hdr.fKind = kAEInvalidOpcode; - _record_hdr.fSize = sym.size(); - _record_hdr.fOffset = record_count; - - ++record_count; - - memset(_record_hdr.fPad, kAEInvalidOpcode, kAEPad); - memcpy(_record_hdr.fName, sym.c_str(), sym.size()); - - file_ptr_out << _record_hdr; - - ++kCounter; - } - - auto pos_end = file_ptr_out.tellp(); - - file_ptr_out.seekp(pos); - - hdr.fStartCode = pos_end; - hdr.fCodeSize = kBytes.size(); - - file_ptr_out << hdr; - - file_ptr_out.seekp(pos_end); - - // byte from byte, we write this. - for (auto& byte : kBytes) - { - file_ptr_out.write(reinterpret_cast(&byte), sizeof(byte)); - } - - if (kVerbose) - kStdOut << "masm: wrote program bytes to file...\n"; - - file_ptr_out.flush(); - file_ptr_out.close(); - - if (kVerbose) - kStdOut << "masm: exit succeeded with code 0.\n"; - - return 0; - } - -masm_fail_exit: - - if (kVerbose) - kStdOut << "masm: exit failed with code -1.\n"; - - return -1; -} - -///////////////////////////////////////////////////////////////////////////////////////// - -// @brief Check for attributes -// returns true if any was found. - -///////////////////////////////////////////////////////////////////////////////////////// - -static bool masm_read_attributes(std::string& line) -{ - // __import is the opposite of export, it signals to the ld - // that we need this symbol. - if (ParserKit::find_word(line, "__import")) - { - auto name = line.substr(line.find("__import") + strlen("__import")); - - std::string result = std::to_string(name.size()); - result += kUndefinedSymbol; - - // mangle this - for (char & j : name) - { - if (j == ' ' || - j == ',') - j = '$'; - - } - - result += name; - - if (name.find(".text") != std::string::npos) - { - // data is treated as code. - kCurrentRecord.fKind = CxxKit::kPefCode; - } - else if (name.find(".data") != std::string::npos) - { - // no code will be executed from here. - kCurrentRecord.fKind = CxxKit::kPefData; - } - else if (name.find(".page_zero") != std::string::npos) - { - // this is a bss section. - kCurrentRecord.fKind = CxxKit::kPefZero; - } - - // this is a special case for the start stub. - // we want this so that ld can find it. - - if (name == "__start") - { - kCurrentRecord.fKind = CxxKit::kPefCode; - } - - // now we can tell the code size of the previous kCurrentRecord. - - if (!kRecords.empty()) - kRecords[kRecords.size() - 1].fSize = kBytes.size(); - - memset(kCurrentRecord.fName, 0, kAESymbolLen); - memcpy(kCurrentRecord.fName, result.c_str(), result.size()); - - ++kCounter; - - memset(kCurrentRecord.fPad, kAEInvalidOpcode, kAEPad); - - kRecords.emplace_back(kCurrentRecord); - - return true; - } - - // __export is a special keyword used by masm to tell the AE output stage to mark this section as a header. - // it currently supports .text, .data., page_zero - if (ParserKit::find_word(line, "__export")) - { - auto name = line.substr(line.find("__export") + strlen("__export")); - - for (char& j : name) - { - if (j == ' ') - j = '$'; - - } - - if (name.find(',') != std::string::npos) - name.erase(name.find(',')); - - if (name.find(".text") != std::string::npos) - { - // data is treated as code. - kCurrentRecord.fKind = CxxKit::kPefCode; - } - else if (name.find(".data") != std::string::npos) - { - // no code will be executed from here. - kCurrentRecord.fKind = CxxKit::kPefData; - } - else if (name.find(".page_zero") != std::string::npos) - { - // this is a bss section. - kCurrentRecord.fKind = CxxKit::kPefZero; - } - - // this is a special case for the start stub. - // we want this so that ld can find it. - - if (name == "__start") - { - kCurrentRecord.fKind = CxxKit::kPefCode; - } - - // now we can tell the code size of the previous kCurrentRecord. - - if (!kRecords.empty()) - kRecords[kRecords.size() - 1].fSize = kBytes.size(); - - memset(kCurrentRecord.fName, 0, kAESymbolLen); - memcpy(kCurrentRecord.fName, name.c_str(), name.size()); - - ++kCounter; - - memset(kCurrentRecord.fPad, kAEInvalidOpcode, kAEPad); - - kRecords.emplace_back(kCurrentRecord); - - return true; - } - - return false; -} - -// \brief algorithms and helpers. - -namespace detail::algorithm -{ - // \brief authorize a brief set of characters. - static inline bool is_not_alnum_space(char c) - { - return !(isalpha(c) || isdigit(c) || (c == ' ') || (c == '\t') || (c == ',') || - (c == '(') || (c == ')') || (c == '"') || (c == '\'') || (c == '[') || (c == ']') - || (c == '+') || (c == '_')); - } - - bool is_valid(const std::string &str) - { - if (ParserKit::find_word(str, "__export") || - ParserKit::find_word(str, "__import")) - return true; - - return find_if(str.begin(), str.end(), is_not_alnum_space) == str.end(); - } -} - -///////////////////////////////////////////////////////////////////////////////////////// - -// @brief Check for line (syntax check) - -///////////////////////////////////////////////////////////////////////////////////////// - -static std::string masm_check_line(std::string& line, const std::string& file) -{ - (void)file; - - std::string err_str; - - while (line.find('\t') != std::string::npos) - line.erase(line.find('\t'), 1); - - if (line.empty() || - ParserKit::find_word(line, "__import") || - ParserKit::find_word(line, "__export") || - ParserKit::find_word(line, "#") || - ParserKit::find_word(line, ";") || - ParserKit::find_word(line, "layout")) - { - if (line.find('#') != std::string::npos) - { - line.erase(line.find('#')); - } - - if (line.find(';') != std::string::npos) - { - line.erase(line.find(';')); - } - - return err_str; - } - - if (!detail::algorithm::is_valid(line)) - { - err_str = "Line contains non alphanumeric characters.\nhere -> "; - err_str += line; - - return err_str; - } - - // check for a valid instruction format. - - if (line.find(',') != std::string::npos) - { - if (line.find(',') + 1 == line.size()) - { - err_str += "\ninstruction lacks right register, here -> "; - err_str += line.substr(line.find(',')); - - return err_str; - } - else - { - bool nothing_on_right = true; - - if (line.find(',') + 1 > line.size()) - { - err_str += "\ninstruction not complete, here -> "; - err_str += line; - - return err_str; - } - - auto substr = line.substr(line.find(',') + 1); - - for (auto& ch : substr) - { - if (ch != ' ' && - ch != '\t') - { - nothing_on_right = false; - } - } - - // this means we found nothing after that ',' . - if (nothing_on_right) - { - err_str += "\ninstruction not complete, here -> "; - err_str += line; - - return err_str; - } - } - } - - std::vector opcodes_list = { "jb", "psh", "stw", "ldw", "lda", "sta" }; - - for (auto& opcodes : kOpcodesStd) - { - if (line.find(opcodes.fName) != std::string::npos) - { - for (auto& op : opcodes_list) - { - if (line == op || - line.find(op) != std::string::npos && - !isspace(line[line.find(op) + op.size()])) - { - err_str += "\nmalformed "; - err_str += op; - err_str += " instruction, here -> "; - err_str += line; - } - } - - return err_str; - } - } - - err_str += "Unknown syntax: "; - err_str += line; - - return err_str; -} - -///////////////////////////////////////////////////////////////////////////////////////// - -// @brief internal namespace - -///////////////////////////////////////////////////////////////////////////////////////// - -namespace detail -{ - union number_cast - { - explicit number_cast(UInt64 raw) - : raw(raw) - {} - - char number[8]; - UInt64 raw; - }; -} - -static bool masm_write_number(std::size_t pos, std::string& jump_label) -{ - if (!isdigit(jump_label[pos])) - return false; - - switch (jump_label[pos+1]) - { - case 'x': - { - if (auto res = strtoq(jump_label.substr(pos + 2).c_str(), - nullptr, 16); - !res) - { - if (errno != 0) - { - detail::print_error("invalid hex number: " + jump_label, "masm"); - throw std::runtime_error("invalid_hex"); - - return false; - } - } - - detail::number_cast num(strtoq(jump_label.substr(pos + 2).c_str(), - nullptr, 16)); - - for (char& i : num.number) - { - kBytes.push_back(i); - } - - if (kVerbose) - { - kStdOut << "masm: found a base 16 number here: " << jump_label.substr(pos) << "\n"; - } - - return true; - } - case 'b': - { - if (auto res = strtoq(jump_label.substr(pos + 2).c_str(), - nullptr, 2); - !res) - { - if (errno != 0) - { - detail::print_error("invalid binary number: " + jump_label, "masm"); - throw std::runtime_error("invalid_bin"); - - return false; - } - } - - detail::number_cast num(strtoq(jump_label.substr(pos + 2).c_str(), - nullptr, 2)); - - if (kVerbose) - { - kStdOut << "masm: found a base 2 number here: " << jump_label.substr(pos) << "\n"; - } - - for (char& i : num.number) - { - kBytes.push_back(i); - } - - return true; - } - case 'o': - { - if (auto res = strtoq(jump_label.substr(pos + 2).c_str(), - nullptr, 7); - !res) - { - if (errno != 0) - { - detail::print_error("invalid octal number: " + jump_label, "masm"); - throw std::runtime_error("invalid_octal"); - - return false; - } - } - - detail::number_cast num(strtoq(jump_label.substr(pos + 2).c_str(), - nullptr, 7)); - - if (kVerbose) - { - kStdOut << "masm: found a base 8 number here: " << jump_label.substr(pos) << "\n"; - } - - for (char& i : num.number) - { - kBytes.push_back(i); - } - - return true; - } - default: - { - break; - } - } - - /* check for errno and stuff like that */ - if (auto res = strtoq(jump_label.substr(pos).c_str(), - nullptr, 10); - !res) - { - if (errno != 0) - { - return false; - } - } - - detail::number_cast num(strtoq(jump_label.substr(pos).c_str(), - nullptr, 10)); - - for (char& i : num.number) - { - kBytes.push_back(i); - } - - if (kVerbose) - { - kStdOut << "masm: found a base 10 number here: " << jump_label.substr(pos) << "\n"; - } - - return true; -} - -///////////////////////////////////////////////////////////////////////////////////////// - -// @brief Read and write instruction to kBytes array. - -///////////////////////////////////////////////////////////////////////////////////////// - -static void masm_read_instruction(std::string& line, const std::string& file) -{ - for (auto& opcodes : kOpcodesStd) - { - if (ParserKit::find_word(line, opcodes.fName)) - { - std::string name(opcodes.fName); - std::string jump_label, cpy_jump_label; - - kBytes.emplace_back(opcodes.fOpcode); - kBytes.emplace_back(opcodes.fFunct3); - kBytes.emplace_back(opcodes.fFunct7); - - // check funct7 - switch (opcodes.fFunct7) - { - // reg to reg means register to register transfer operation. - case kAsmRegToReg: - case kAsmImmediate: - { - // \brief how many registers we found. - std::size_t found_some = 0UL; - - for (size_t line_index = 0UL; line_index < line.size(); line_index++) - { - if (line[line_index] == 'r' && - isdigit(line[line_index + 1])) - { - std::string register_syntax = kAsmRegisterPrefix; - register_syntax += line[line_index + 1]; - - if (isdigit(line[line_index + 2])) - register_syntax += line[line_index + 2]; - - std::string reg_str; - reg_str += line[line_index + 1]; - - if (isdigit(line[line_index + 2])) - reg_str += line[line_index + 2]; - - std::size_t reg_index = strtoq( - reg_str.c_str(), - nullptr, - 10); - - if (reg_index > kAsmRegisterLimit) - { - detail::print_error("invalid register index, r" + reg_str, file); - throw std::runtime_error("invalid_register_index"); - } - - kBytes.emplace_back(reg_index); - ++found_some; - - if (kVerbose) - { - kStdOut << "masm: Found register: " << register_syntax << "\n"; - kStdOut << "masm: Register count: " << found_some << "\n"; - } - } - } - - // we're not in immediate addressing, reg to reg. - if (opcodes.fFunct7 != kAsmImmediate) - { - // remember! register to register! - if (found_some == 1) - { - detail::print_error("unrecognized register found.\ntip: each masm register starts with 'r'.\nline: " + line, file); - throw std::runtime_error("not_a_register"); - } - } - - if (found_some < 1 && - name != "psh" && - name != "ldw" && - name != "lda" && - name != "stw") - { - detail::print_error("invalid combination of opcode and registers.\nline: " + line, file); - throw std::runtime_error("invalid_comb_op_reg"); - } - - if (found_some > 0 && - name == "pop") - { - detail::print_error("invalid combination for opcode 'pop'.\ntip: it expects nothing.\nline: " + line, file); - throw std::runtime_error("invalid_comb_op_pop"); - } - } - default: - break; - - } - - // try to fetch a number from the name - if (name == "psh" || - name == "jb" || - name == "stw" || - name == "ldw" || - name == "lda" || - name == "sta") - { - auto where_string = name; - - if (name == "stw" || - name == "ldw" || - name == "lda") - where_string = ","; - - jump_label = line.substr(line.find(where_string) + where_string.size()); - cpy_jump_label = jump_label; - - // replace any spaces with $ - if (jump_label[0] == ' ') - { - while (jump_label.find(' ') != std::string::npos) - { - if (isalnum(jump_label[0]) || - isdigit(jump_label[0])) - break; - - jump_label.erase(jump_label.find(' '), 1); - } - } - - if (!masm_write_number(0, jump_label)) - { - // sta expects this: sta 0x000000, r0 - if (name == "sta") - { - detail::print_error("invalid combination of opcode and operands.\nhere ->" + line, file); - throw std::runtime_error("invalid_comb_op_ops"); - - break; - } - - goto masm_write_label; - } - else - { - if (name == "sta" && - cpy_jump_label.find("__import") != std::string::npos) - { - detail::print_error("invalid usage __import on 'sta', here: " + line, file); - throw std::runtime_error("invalid_sta_usage"); - break; - } - } - } - - // if jump to branch - if (name == "jb") - { -masm_write_label: - if (cpy_jump_label.find('\n') != std::string::npos) - cpy_jump_label.erase(cpy_jump_label.find('\n'), 1); - - if (cpy_jump_label.find("__import") == std::string::npos && - name == "psh" || - cpy_jump_label.find("__import") == std::string::npos && - name == "jb") - { - detail::print_error("__import not found on jump label, please add one.", file.c_str()); - throw std::runtime_error("import_jmp_lbl"); - } - else if (cpy_jump_label.find("__import") != std::string::npos) - { - if (name == "sta") - { - detail::print_error("__import is not allowed on a sta operation.", file.c_str()); - throw std::runtime_error("import_sta_op"); - } - - cpy_jump_label.erase(cpy_jump_label.find("__import"), strlen("__import")); - } - - while (cpy_jump_label.find(' ') != std::string::npos) - { - cpy_jump_label.erase(cpy_jump_label.find(' '), 1); - } - - auto mld_reloc_str = std::to_string(cpy_jump_label.size()); - mld_reloc_str += kRelocSymbol; - mld_reloc_str += cpy_jump_label; - - bool ignore_back_slash = false; - - for (auto& reloc_chr : mld_reloc_str) - { - if (reloc_chr == '\\') - { - ignore_back_slash = true; - continue; - } - - if (ignore_back_slash) - { - ignore_back_slash = false; - continue; - } - - kBytes.push_back(reloc_chr); - } - } - - kBytes.push_back('\0'); - } - } - -} \ No newline at end of file -- cgit v1.2.3