From 877de2b648ae76fa150f75269a23b8ec7476ca5f Mon Sep 17 00:00:00 2001 From: Amlal El Mahrouss Date: Sun, 22 Feb 2026 06:22:26 +0100 Subject: chore: .cc to .cpp conversion, define CK_POSIX in frontends, new driver system for Nectar/C++ frontend. Signed-off-by: Amlal El Mahrouss --- src/CommandLine/cppdrv.cc | 22 - src/CommandLine/cppdrv.cpp | 22 + src/CommandLine/cppdrv.json | 2 +- src/CommandLine/dbg.cc | 17 - src/CommandLine/dbg.cpp | 17 + src/CommandLine/dbg.json | 2 +- src/CommandLine/kdbg.cc | 17 - src/CommandLine/kdbg.cpp | 17 + src/CommandLine/kdbg.json | 2 +- src/CommandLine/ld64-osx.json | 2 +- src/CommandLine/ld64-posix.json | 2 +- src/CommandLine/ld64.cc | 15 - src/CommandLine/ld64.cpp | 15 + src/CommandLine/mld64-osx.json | 2 +- src/CommandLine/mld64-posix.json | 2 +- src/CommandLine/mld64.cc | 15 - src/CommandLine/mld64.cpp | 15 + src/CommandLine/pef-amd64-asm.cc | 37 - src/CommandLine/pef-amd64-asm.cpp | 37 + src/CommandLine/pef-amd64-asm.json | 3 +- src/CommandLine/pef-amd64-drv.cpp | 37 + src/CommandLine/pef-amd64-drv.json | 21 + src/CommandLine/pef-amd64-necdrv.cc | 37 - src/CommandLine/pef-amd64-necdrv.json | 20 - src/CompilerKit/ck-osx-san.json | 6 +- src/CompilerKit/ck-osx.json | 4 +- src/CompilerKit/ck-posix-san.json | 31 + src/CompilerKit/ck-posix.json | 4 +- src/CompilerKit/src/Assemblers/Assembler+32x0.cc | 38 - src/CompilerKit/src/Assemblers/Assembler+32x0.cpp | 38 + src/CompilerKit/src/Assemblers/Assembler+64x0.cc | 875 --------- src/CompilerKit/src/Assemblers/Assembler+64x0.cpp | 875 +++++++++ src/CompilerKit/src/Assemblers/Assembler+AMD64.cc | 1848 -------------------- src/CompilerKit/src/Assemblers/Assembler+AMD64.cpp | 1848 ++++++++++++++++++++ src/CompilerKit/src/Assemblers/Assembler+ARM64.cc | 584 ------- src/CompilerKit/src/Assemblers/Assembler+ARM64.cpp | 584 +++++++ .../src/Assemblers/Assembler+PowerPC.cc | 904 ---------- .../src/Assemblers/Assembler+PowerPC.cpp | 904 ++++++++++ .../src/CodeGenerator+AssemblyFactory.cc | 45 - .../src/CodeGenerator+AssemblyFactory.cpp | 45 + .../src/Compilers/NectarCompiler+AMD64.cc | 1749 ------------------ .../src/Compilers/NectarCompiler+AMD64.cpp | 1749 ++++++++++++++++++ .../src/Linkers/DynamicLinker64+MachO.cc | 705 -------- .../src/Linkers/DynamicLinker64+MachO.cpp | 705 ++++++++ src/CompilerKit/src/Linkers/DynamicLinker64+PEF.cc | 677 ------- .../src/Linkers/DynamicLinker64+PEF.cpp | 677 +++++++ .../src/Preprocessors/Preprocessor+Generic.cc | 917 ---------- .../src/Preprocessors/Preprocessor+Generic.cpp | 917 ++++++++++ src/DebuggerKit/dk-nekernel.json | 2 +- src/DebuggerKit/dk-osx.json | 2 +- src/DebuggerKit/src/NeKernelContract.cc | 89 - src/DebuggerKit/src/NeKernelContract.cpp | 89 + src/DebuggerKit/src/NeKernelContractCLI.cc | 99 -- src/DebuggerKit/src/NeKernelContractCLI.cpp | 99 ++ src/DebuggerKit/src/POSIXMachContractCLI.cc | 96 - src/DebuggerKit/src/POSIXMachContractCLI.cpp | 96 + 56 files changed, 8856 insertions(+), 8823 deletions(-) delete mode 100644 src/CommandLine/cppdrv.cc create mode 100644 src/CommandLine/cppdrv.cpp delete mode 100644 src/CommandLine/dbg.cc create mode 100644 src/CommandLine/dbg.cpp delete mode 100644 src/CommandLine/kdbg.cc create mode 100644 src/CommandLine/kdbg.cpp delete mode 100644 src/CommandLine/ld64.cc create mode 100644 src/CommandLine/ld64.cpp delete mode 100644 src/CommandLine/mld64.cc create mode 100644 src/CommandLine/mld64.cpp delete mode 100644 src/CommandLine/pef-amd64-asm.cc create mode 100644 src/CommandLine/pef-amd64-asm.cpp create mode 100644 src/CommandLine/pef-amd64-drv.cpp create mode 100644 src/CommandLine/pef-amd64-drv.json delete mode 100644 src/CommandLine/pef-amd64-necdrv.cc delete mode 100644 src/CommandLine/pef-amd64-necdrv.json create mode 100644 src/CompilerKit/ck-posix-san.json delete mode 100644 src/CompilerKit/src/Assemblers/Assembler+32x0.cc create mode 100644 src/CompilerKit/src/Assemblers/Assembler+32x0.cpp delete mode 100644 src/CompilerKit/src/Assemblers/Assembler+64x0.cc create mode 100644 src/CompilerKit/src/Assemblers/Assembler+64x0.cpp delete mode 100644 src/CompilerKit/src/Assemblers/Assembler+AMD64.cc create mode 100644 src/CompilerKit/src/Assemblers/Assembler+AMD64.cpp delete mode 100644 src/CompilerKit/src/Assemblers/Assembler+ARM64.cc create mode 100644 src/CompilerKit/src/Assemblers/Assembler+ARM64.cpp delete mode 100644 src/CompilerKit/src/Assemblers/Assembler+PowerPC.cc create mode 100644 src/CompilerKit/src/Assemblers/Assembler+PowerPC.cpp delete mode 100644 src/CompilerKit/src/CodeGenerator+AssemblyFactory.cc create mode 100644 src/CompilerKit/src/CodeGenerator+AssemblyFactory.cpp delete mode 100644 src/CompilerKit/src/Compilers/NectarCompiler+AMD64.cc create mode 100644 src/CompilerKit/src/Compilers/NectarCompiler+AMD64.cpp delete mode 100644 src/CompilerKit/src/Linkers/DynamicLinker64+MachO.cc create mode 100644 src/CompilerKit/src/Linkers/DynamicLinker64+MachO.cpp delete mode 100644 src/CompilerKit/src/Linkers/DynamicLinker64+PEF.cc create mode 100644 src/CompilerKit/src/Linkers/DynamicLinker64+PEF.cpp delete mode 100644 src/CompilerKit/src/Preprocessors/Preprocessor+Generic.cc create mode 100644 src/CompilerKit/src/Preprocessors/Preprocessor+Generic.cpp delete mode 100644 src/DebuggerKit/src/NeKernelContract.cc create mode 100644 src/DebuggerKit/src/NeKernelContract.cpp delete mode 100644 src/DebuggerKit/src/NeKernelContractCLI.cc create mode 100644 src/DebuggerKit/src/NeKernelContractCLI.cpp delete mode 100644 src/DebuggerKit/src/POSIXMachContractCLI.cc create mode 100644 src/DebuggerKit/src/POSIXMachContractCLI.cpp diff --git a/src/CommandLine/cppdrv.cc b/src/CommandLine/cppdrv.cc deleted file mode 100644 index 6977923..0000000 --- a/src/CommandLine/cppdrv.cc +++ /dev/null @@ -1,22 +0,0 @@ -// Copyright 2024-2025, Amlal El Mahrouss (amlal@nekernel.org) -// Licensed under the Apache License, Version 2.0 (See accompanying -// file LICENSE or copy at http://www.apache.org/licenses/LICENSE-2.0) -// Official repository: https://github.com/nekernel-org/nectar - -/// @file cppdrv.cc -/// @brief Nectar frontend preprocessor. - -#include -#include - -CK_IMPORT_C int GenericPreprocessorMain(int argc, char const* argv[]); - -int main(int argc, char const* argv[]) { - if (auto code = GenericPreprocessorMain(argc, argv); code > 0) { - std::printf("cppdrv: preprocessor exited with code %i.\n", code); - - return NECTAR_EXEC_ERROR; - } - - return NECTAR_SUCCESS; -} diff --git a/src/CommandLine/cppdrv.cpp b/src/CommandLine/cppdrv.cpp new file mode 100644 index 0000000..6977923 --- /dev/null +++ b/src/CommandLine/cppdrv.cpp @@ -0,0 +1,22 @@ +// Copyright 2024-2025, Amlal El Mahrouss (amlal@nekernel.org) +// Licensed under the Apache License, Version 2.0 (See accompanying +// file LICENSE or copy at http://www.apache.org/licenses/LICENSE-2.0) +// Official repository: https://github.com/nekernel-org/nectar + +/// @file cppdrv.cc +/// @brief Nectar frontend preprocessor. + +#include +#include + +CK_IMPORT_C int GenericPreprocessorMain(int argc, char const* argv[]); + +int main(int argc, char const* argv[]) { + if (auto code = GenericPreprocessorMain(argc, argv); code > 0) { + std::printf("cppdrv: preprocessor exited with code %i.\n", code); + + return NECTAR_EXEC_ERROR; + } + + return NECTAR_SUCCESS; +} diff --git a/src/CommandLine/cppdrv.json b/src/CommandLine/cppdrv.json index 3fa222e..7c8cbe2 100644 --- a/src/CommandLine/cppdrv.json +++ b/src/CommandLine/cppdrv.json @@ -2,7 +2,7 @@ "compiler_path": "clang++", "compiler_std": "c++20", "headers_path": ["../../include/CompilerKit", "../../include/", "../../include/CompilerKit/src/Detail"], - "sources_path": ["cppdrv.cc"], + "sources_path": ["cppdrv.cpp"], "output_name": "cppdrv", "compiler_flags": ["-L/usr/local/lib", "-lCompilerKit"], "cpp_macros": [ diff --git a/src/CommandLine/dbg.cc b/src/CommandLine/dbg.cc deleted file mode 100644 index 5b08a69..0000000 --- a/src/CommandLine/dbg.cc +++ /dev/null @@ -1,17 +0,0 @@ -// Copyright 2024-2025, Amlal El Mahrouss (amlal@nekernel.org) -// Licensed under the Apache License, Version 2.0 (See accompanying -// file LICENSE or copy at http://www.apache.org/licenses/LICENSE-2.0) -// Official repository: https://github.com/nekernel-org/nectar - -#include - -/// @file dbg.cc -/// @brief Nectar debugger. - -CK_IMPORT_C Int32 DebuggerMachPOSIX(Int32 argc, Char const* argv[]); - -/// @brief Debugger entrypoint. -/// @return Status code of debugger. -Int32 main(Int32 argc, Char const* argv[]) { - return DebuggerMachPOSIX(argc, argv); -} diff --git a/src/CommandLine/dbg.cpp b/src/CommandLine/dbg.cpp new file mode 100644 index 0000000..5b08a69 --- /dev/null +++ b/src/CommandLine/dbg.cpp @@ -0,0 +1,17 @@ +// Copyright 2024-2025, Amlal El Mahrouss (amlal@nekernel.org) +// Licensed under the Apache License, Version 2.0 (See accompanying +// file LICENSE or copy at http://www.apache.org/licenses/LICENSE-2.0) +// Official repository: https://github.com/nekernel-org/nectar + +#include + +/// @file dbg.cc +/// @brief Nectar debugger. + +CK_IMPORT_C Int32 DebuggerMachPOSIX(Int32 argc, Char const* argv[]); + +/// @brief Debugger entrypoint. +/// @return Status code of debugger. +Int32 main(Int32 argc, Char const* argv[]) { + return DebuggerMachPOSIX(argc, argv); +} diff --git a/src/CommandLine/dbg.json b/src/CommandLine/dbg.json index 16fe3e2..ecc63e8 100644 --- a/src/CommandLine/dbg.json +++ b/src/CommandLine/dbg.json @@ -2,7 +2,7 @@ "compiler_path": "clang++", "compiler_std": "c++20", "headers_path": ["../../include/CompilerKit", "../../include", "../../include/CompilerKit/src/Detail"], - "sources_path": ["dbg.cc"], + "sources_path": ["dbg.cpp"], "output_name": "dbg", "compiler_flags": ["-L/usr/lib", "-lDebuggerKit"], "cpp_macros": [ diff --git a/src/CommandLine/kdbg.cc b/src/CommandLine/kdbg.cc deleted file mode 100644 index 148efb3..0000000 --- a/src/CommandLine/kdbg.cc +++ /dev/null @@ -1,17 +0,0 @@ -// Copyright 2024-2025, Amlal El Mahrouss (amlal@nekernel.org) -// Licensed under the Apache License, Version 2.0 (See accompanying -// file LICENSE or copy at http://www.apache.org/licenses/LICENSE-2.0) -// Official repository: https://github.com/nekernel-org/nectar - -#include - -/// @file kdbg.cc -/// @brief NeKernel debugger. - -CK_IMPORT_C Int32 DebuggerNeKernel(Int32 argc, Char const* argv[]); - -/// @brief Debugger entrypoint. -/// @return Status code of debugger. -Int32 main(Int32 argc, Char const* argv[]) { - return DebuggerNeKernel(argc, argv); -} diff --git a/src/CommandLine/kdbg.cpp b/src/CommandLine/kdbg.cpp new file mode 100644 index 0000000..148efb3 --- /dev/null +++ b/src/CommandLine/kdbg.cpp @@ -0,0 +1,17 @@ +// Copyright 2024-2025, Amlal El Mahrouss (amlal@nekernel.org) +// Licensed under the Apache License, Version 2.0 (See accompanying +// file LICENSE or copy at http://www.apache.org/licenses/LICENSE-2.0) +// Official repository: https://github.com/nekernel-org/nectar + +#include + +/// @file kdbg.cc +/// @brief NeKernel debugger. + +CK_IMPORT_C Int32 DebuggerNeKernel(Int32 argc, Char const* argv[]); + +/// @brief Debugger entrypoint. +/// @return Status code of debugger. +Int32 main(Int32 argc, Char const* argv[]) { + return DebuggerNeKernel(argc, argv); +} diff --git a/src/CommandLine/kdbg.json b/src/CommandLine/kdbg.json index 8c36932..6099f04 100644 --- a/src/CommandLine/kdbg.json +++ b/src/CommandLine/kdbg.json @@ -2,7 +2,7 @@ "compiler_path": "clang++", "compiler_std": "c++20", "headers_path": ["../../include/CompilerKit", "../../include/", "../../include/CompilerKit/src/Detail"], - "sources_path": ["kdbg.cc"], + "sources_path": ["kdbg.cpp"], "output_name": "kdbg", "compiler_flags": ["-L/usr/lib", "-lDebuggerKit"], "cpp_macros": [ diff --git a/src/CommandLine/ld64-osx.json b/src/CommandLine/ld64-osx.json index 1a78748..ddd233b 100644 --- a/src/CommandLine/ld64-osx.json +++ b/src/CommandLine/ld64-osx.json @@ -2,7 +2,7 @@ "compiler_path": "clang++", "compiler_std": "c++20", "headers_path": ["../../include/CompilerKit", "../../include/", "../../include/CompilerKit/src/Detail"], - "sources_path": ["ld64.cc"], + "sources_path": ["ld64.cpp"], "output_name": "ld64", "compiler_flags": ["-L/usr/local/lib", "-lCompilerKit"], "cpp_macros": [ diff --git a/src/CommandLine/ld64-posix.json b/src/CommandLine/ld64-posix.json index d2e626b..0847b41 100644 --- a/src/CommandLine/ld64-posix.json +++ b/src/CommandLine/ld64-posix.json @@ -2,7 +2,7 @@ "compiler_path": "clang++", "compiler_std": "c++20", "headers_path": ["../../include/CompilerKit", "../../include/", "../../include/CompilerKit/src/Detail"], - "sources_path": ["ld64.cc"], + "sources_path": ["ld64.cpp"], "output_name": "ld64", "compiler_flags": ["-L/usr/lib", "-lCompilerKit"], "cpp_macros": [ diff --git a/src/CommandLine/ld64.cc b/src/CommandLine/ld64.cc deleted file mode 100644 index 6d8a247..0000000 --- a/src/CommandLine/ld64.cc +++ /dev/null @@ -1,15 +0,0 @@ -// Copyright 2024-2025, Amlal El Mahrouss (amlal@nekernel.org) -// Licensed under the Apache License, Version 2.0 (See accompanying -// file LICENSE or copy at http://www.apache.org/licenses/LICENSE-2.0) -// Official repository: https://github.com/nekernel-org/nectar - -#include - -/// @file ld64.cc -/// @brief Nectar linker for AE objects. - -CK_IMPORT_C Int32 DynamicLinker64PEF(Int32 argc, Char const* argv[]); - -Int32 main(Int32 argc, Char const* argv[]) { - return DynamicLinker64PEF(argc, argv); -} diff --git a/src/CommandLine/ld64.cpp b/src/CommandLine/ld64.cpp new file mode 100644 index 0000000..6d8a247 --- /dev/null +++ b/src/CommandLine/ld64.cpp @@ -0,0 +1,15 @@ +// Copyright 2024-2025, Amlal El Mahrouss (amlal@nekernel.org) +// Licensed under the Apache License, Version 2.0 (See accompanying +// file LICENSE or copy at http://www.apache.org/licenses/LICENSE-2.0) +// Official repository: https://github.com/nekernel-org/nectar + +#include + +/// @file ld64.cc +/// @brief Nectar linker for AE objects. + +CK_IMPORT_C Int32 DynamicLinker64PEF(Int32 argc, Char const* argv[]); + +Int32 main(Int32 argc, Char const* argv[]) { + return DynamicLinker64PEF(argc, argv); +} diff --git a/src/CommandLine/mld64-osx.json b/src/CommandLine/mld64-osx.json index 78b7d3a..57f08f0 100644 --- a/src/CommandLine/mld64-osx.json +++ b/src/CommandLine/mld64-osx.json @@ -2,7 +2,7 @@ "compiler_path": "clang++", "compiler_std": "c++20", "headers_path": ["../../include/CompilerKit", "../../include/", "../../include/CompilerKit/src/Detail"], - "sources_path": ["mld64.cc"], + "sources_path": ["mld64.cpp"], "output_name": "mld64", "compiler_flags": ["-L/usr/local/lib", "-lCompilerKit"], "cpp_macros": [ diff --git a/src/CommandLine/mld64-posix.json b/src/CommandLine/mld64-posix.json index 0b8deab..e408cec 100644 --- a/src/CommandLine/mld64-posix.json +++ b/src/CommandLine/mld64-posix.json @@ -2,7 +2,7 @@ "compiler_path": "clang++", "compiler_std": "c++20", "headers_path": ["../../include/CompilerKit", "../../include/", "../../include/CompilerKit/src/Detail"], - "sources_path": ["mld64.cc"], + "sources_path": ["mld64.cpp"], "output_name": "mld64", "compiler_flags": ["-L/usr/lib", "-lCompilerKit"], "cpp_macros": [ diff --git a/src/CommandLine/mld64.cc b/src/CommandLine/mld64.cc deleted file mode 100644 index f19712b..0000000 --- a/src/CommandLine/mld64.cc +++ /dev/null @@ -1,15 +0,0 @@ -// Copyright 2024-2025, Amlal El Mahrouss (amlal@nekernel.org) -// Licensed under the Apache License, Version 2.0 (See accompanying -// file LICENSE or copy at http://www.apache.org/licenses/LICENSE-2.0) -// Official repository: https://github.com/nekernel-org/nectar - -#include - -/// @file ld64.cc -/// @brief Nectar linker for AE objects. - -CK_IMPORT_C Int32 DynamicLinker64MachO(Int32 argc, Char const* argv[]); - -Int32 main(Int32 argc, Char const* argv[]) { - return DynamicLinker64MachO(argc, argv); -} diff --git a/src/CommandLine/mld64.cpp b/src/CommandLine/mld64.cpp new file mode 100644 index 0000000..f19712b --- /dev/null +++ b/src/CommandLine/mld64.cpp @@ -0,0 +1,15 @@ +// Copyright 2024-2025, Amlal El Mahrouss (amlal@nekernel.org) +// Licensed under the Apache License, Version 2.0 (See accompanying +// file LICENSE or copy at http://www.apache.org/licenses/LICENSE-2.0) +// Official repository: https://github.com/nekernel-org/nectar + +#include + +/// @file ld64.cc +/// @brief Nectar linker for AE objects. + +CK_IMPORT_C Int32 DynamicLinker64MachO(Int32 argc, Char const* argv[]); + +Int32 main(Int32 argc, Char const* argv[]) { + return DynamicLinker64MachO(argc, argv); +} diff --git a/src/CommandLine/pef-amd64-asm.cc b/src/CommandLine/pef-amd64-asm.cc deleted file mode 100644 index 3ca4639..0000000 --- a/src/CommandLine/pef-amd64-asm.cc +++ /dev/null @@ -1,37 +0,0 @@ -// Copyright 2024-2025, Amlal El Mahrouss (amlal@nekernel.org) -// Licensed under the Apache License, Version 2.0 (See accompanying -// file LICENSE or copy at http://www.apache.org/licenses/LICENSE-2.0) -// Official repository: https://github.com/nekernel-org/nectar - -/// @file pef-amd64-asm.cc -/// @brief Nectar C++ frontend compiler for AMD64. - -#include -#include -#include -#include - -#ifdef __APPLE__ -static auto kPath = "/usr/local/lib/libCompilerKit.dylib"; -#else -static auto kPath = "/usr/lib/libCompilerKit.so"; -#endif - -static auto kSymbol = "AssemblerMainAMD64"; - -Int32 main(Int32 argc, Char const* argv[]) { - CompilerKit::ModuleLoader dylib; - dylib(kPath, kSymbol); - - CompilerKit::ModuleLoader::EntryT entrypoint_cxx = - reinterpret_cast(dylib.fEntrypoint); - - if (!entrypoint_cxx) { - kStdOut; - std::printf("error: Could not find entrypoint in %s: %s\n", kPath, dlerror()); - - return EXIT_FAILURE; - } - - return (entrypoint_cxx(argc, argv) == NECTAR_SUCCESS) ? EXIT_SUCCESS : EXIT_FAILURE; -} diff --git a/src/CommandLine/pef-amd64-asm.cpp b/src/CommandLine/pef-amd64-asm.cpp new file mode 100644 index 0000000..3ca4639 --- /dev/null +++ b/src/CommandLine/pef-amd64-asm.cpp @@ -0,0 +1,37 @@ +// Copyright 2024-2025, Amlal El Mahrouss (amlal@nekernel.org) +// Licensed under the Apache License, Version 2.0 (See accompanying +// file LICENSE or copy at http://www.apache.org/licenses/LICENSE-2.0) +// Official repository: https://github.com/nekernel-org/nectar + +/// @file pef-amd64-asm.cc +/// @brief Nectar C++ frontend compiler for AMD64. + +#include +#include +#include +#include + +#ifdef __APPLE__ +static auto kPath = "/usr/local/lib/libCompilerKit.dylib"; +#else +static auto kPath = "/usr/lib/libCompilerKit.so"; +#endif + +static auto kSymbol = "AssemblerMainAMD64"; + +Int32 main(Int32 argc, Char const* argv[]) { + CompilerKit::ModuleLoader dylib; + dylib(kPath, kSymbol); + + CompilerKit::ModuleLoader::EntryT entrypoint_cxx = + reinterpret_cast(dylib.fEntrypoint); + + if (!entrypoint_cxx) { + kStdOut; + std::printf("error: Could not find entrypoint in %s: %s\n", kPath, dlerror()); + + return EXIT_FAILURE; + } + + return (entrypoint_cxx(argc, argv) == NECTAR_SUCCESS) ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/src/CommandLine/pef-amd64-asm.json b/src/CommandLine/pef-amd64-asm.json index 060fbf3..a1b1e1f 100644 --- a/src/CommandLine/pef-amd64-asm.json +++ b/src/CommandLine/pef-amd64-asm.json @@ -2,11 +2,12 @@ "compiler_path": "g++", "compiler_std": "c++20", "headers_path": ["../../include/CompilerKit", "../../include", "../../include/CompilerKit/src/Detail"], - "sources_path": ["pef-amd64-asm.cc"], + "sources_path": ["pef-amd64-asm.cpp"], "output_name": "pef-amd64-asm", "compiler_flags": ["-L/usr/lib"], "cpp_macros": [ "__DRV_ASM__=202601", + "CK_POSIX", "kDistReleaseBranch=$(git rev-parse --abbrev-ref HEAD)-$(uuidgen)" ] } diff --git a/src/CommandLine/pef-amd64-drv.cpp b/src/CommandLine/pef-amd64-drv.cpp new file mode 100644 index 0000000..c8d8dbe --- /dev/null +++ b/src/CommandLine/pef-amd64-drv.cpp @@ -0,0 +1,37 @@ +// Copyright 2024-2025, Amlal El Mahrouss (amlal@nekernel.org) +// Licensed under the Apache License, Version 2.0 (See accompanying +// file LICENSE or copy at http://www.apache.org/licenses/LICENSE-2.0) +// Official repository: https://github.com/nekernel-org/nectar + +/// @file pef-amd64-cxxdrv.cc +/// @brief Nectar C++ frontend compiler for AMD64. + +#include +#include +#include +#include + +#ifdef __APPLE__ +static auto kPath = "/usr/local/lib/libCompilerKit.dylib"; +#else +static auto kPath = "/usr/lib/libCompilerKit.so"; +#endif + +static auto kSymbol = "CompilerNectarAMD64"; + +Int32 main(Int32 argc, Char const* argv[]) { + CompilerKit::ModuleLoader dylib; + dylib(kPath, kSymbol); + + CompilerKit::ModuleLoader::EntryT entrypoint_cxx = + reinterpret_cast(dylib.fEntrypoint); + + if (!entrypoint_cxx) { + kStdOut; + std::printf("error: Could not find entrypoint in %s: %s\n", kPath, dlerror()); + + return EXIT_FAILURE; + } + + return (entrypoint_cxx(argc, argv) == NECTAR_SUCCESS) ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/src/CommandLine/pef-amd64-drv.json b/src/CommandLine/pef-amd64-drv.json new file mode 100644 index 0000000..69609ff --- /dev/null +++ b/src/CommandLine/pef-amd64-drv.json @@ -0,0 +1,21 @@ +{ + "compiler_path": "clang++", + "compiler_std": "c++20", + "headers_path": [ + "../include/CompilerKit", + "../../include", + "../include/CompilerKit/src/Detail" + ], + "sources_path": [ + "pef-amd64-drv.cpp" + ], + "output_name": "pef-amd64-necdrv", + "compiler_flags": [ + ], + "cpp_macros": [ + "__NECDRV__=202504", + "CK_POSIX", + "kDistReleaseBranch=$(git rev-parse --abbrev-ref HEAD)-$(uuidgen)" + ] +} + diff --git a/src/CommandLine/pef-amd64-necdrv.cc b/src/CommandLine/pef-amd64-necdrv.cc deleted file mode 100644 index c8d8dbe..0000000 --- a/src/CommandLine/pef-amd64-necdrv.cc +++ /dev/null @@ -1,37 +0,0 @@ -// Copyright 2024-2025, Amlal El Mahrouss (amlal@nekernel.org) -// Licensed under the Apache License, Version 2.0 (See accompanying -// file LICENSE or copy at http://www.apache.org/licenses/LICENSE-2.0) -// Official repository: https://github.com/nekernel-org/nectar - -/// @file pef-amd64-cxxdrv.cc -/// @brief Nectar C++ frontend compiler for AMD64. - -#include -#include -#include -#include - -#ifdef __APPLE__ -static auto kPath = "/usr/local/lib/libCompilerKit.dylib"; -#else -static auto kPath = "/usr/lib/libCompilerKit.so"; -#endif - -static auto kSymbol = "CompilerNectarAMD64"; - -Int32 main(Int32 argc, Char const* argv[]) { - CompilerKit::ModuleLoader dylib; - dylib(kPath, kSymbol); - - CompilerKit::ModuleLoader::EntryT entrypoint_cxx = - reinterpret_cast(dylib.fEntrypoint); - - if (!entrypoint_cxx) { - kStdOut; - std::printf("error: Could not find entrypoint in %s: %s\n", kPath, dlerror()); - - return EXIT_FAILURE; - } - - return (entrypoint_cxx(argc, argv) == NECTAR_SUCCESS) ? EXIT_SUCCESS : EXIT_FAILURE; -} diff --git a/src/CommandLine/pef-amd64-necdrv.json b/src/CommandLine/pef-amd64-necdrv.json deleted file mode 100644 index ea2209d..0000000 --- a/src/CommandLine/pef-amd64-necdrv.json +++ /dev/null @@ -1,20 +0,0 @@ -{ - "compiler_path": "clang++", - "compiler_std": "c++20", - "headers_path": [ - "../include/CompilerKit", - "../../include", - "../include/CompilerKit/src/Detail" - ], - "sources_path": [ - "pef-amd64-necdrv.cc" - ], - "output_name": "pef-amd64-necdrv", - "compiler_flags": [ - ], - "cpp_macros": [ - "__NECDRV__=202504", - "kDistReleaseBranch=$(git rev-parse --abbrev-ref HEAD)-$(uuidgen)" - ] -} - diff --git a/src/CompilerKit/ck-osx-san.json b/src/CompilerKit/ck-osx-san.json index e133ebf..5fe2ee6 100644 --- a/src/CompilerKit/ck-osx-san.json +++ b/src/CompilerKit/ck-osx-san.json @@ -9,10 +9,10 @@ "/opt/homebrew/Cellar/boost/1.90.0/include" ], "sources_path": [ - "src/*.cc", - "src/*/*.cc" + "src/*.cpp", + "src/*/*.cpp" ], - "output_name": "/usr/local/lib/libCompilerKit.dylib", + "output_name": "/usr/local/lib/libCompilerKit.san.dylib", "compiler_flags": [ "-fPIC", "-shared", diff --git a/src/CompilerKit/ck-osx.json b/src/CompilerKit/ck-osx.json index a4f711d..3aabdcd 100644 --- a/src/CompilerKit/ck-osx.json +++ b/src/CompilerKit/ck-osx.json @@ -9,8 +9,8 @@ "/opt/homebrew/Cellar/boost/1.90.0/include" ], "sources_path": [ - "src/*.cc", - "src/*/*.cc" + "src/*.cpp", + "src/*/*.cpp" ], "output_name": "/usr/local/lib/libCompilerKit.dylib", "compiler_flags": [ diff --git a/src/CompilerKit/ck-posix-san.json b/src/CompilerKit/ck-posix-san.json new file mode 100644 index 0000000..b974fb9 --- /dev/null +++ b/src/CompilerKit/ck-posix-san.json @@ -0,0 +1,31 @@ +{ + "compiler_path": "clang++", + "compiler_std": "c++20", + "headers_path": [ + "../../include/CompilerKit", + "../../include/", + "../../include/CompilerKit/src/", + "../../include/CompilerKit/src/impl", + "/opt/homebrew/Cellar/boost/1.90.0/include" + ], + "sources_path": [ + "src/*.cpp", + "src/*/*.cpp" + ], + "output_name": "/usr/lib/libCompilerKit.san.so", + "compiler_flags": [ + "-fPIC", + "-shared", + "-fstack-protector-all", + "-fno-omit-frame-pointer", + "-g", + "-fsanitize=address", + "-fsanitize=undefined" + ], + "cpp_macros": [ + "__NECTAR__=202505", + "CK_USE_STRUCTS=1", + "kDistReleaseBranch=$(git rev-parse --abbrev-ref HEAD)-$(uuidgen)" + ], + "description": "CompilerKit for OSX, with Asan/Ubsan. CK is the framework behind Nectar." +} diff --git a/src/CompilerKit/ck-posix.json b/src/CompilerKit/ck-posix.json index 9694662..553b22b 100644 --- a/src/CompilerKit/ck-posix.json +++ b/src/CompilerKit/ck-posix.json @@ -8,8 +8,8 @@ "../../include/CompilerKit/src/impl" ], "sources_path": [ - "src/*.cc", - "src/*/*.cc" + "src/*.cpp", + "src/*/*.cpp" ], "output_name": "/usr/lib/libCompilerKit.so", "compiler_flags": [ diff --git a/src/CompilerKit/src/Assemblers/Assembler+32x0.cc b/src/CompilerKit/src/Assemblers/Assembler+32x0.cc deleted file mode 100644 index 787c685..0000000 --- a/src/CompilerKit/src/Assemblers/Assembler+32x0.cc +++ /dev/null @@ -1,38 +0,0 @@ -// Copyright 2024-2025, Amlal El Mahrouss (amlal@nekernel.org) -// Licensed under the Apache License, Version 2.0 (See accompanying -// file LICENSE or copy at http://www.apache.org/licenses/LICENSE-2.0) -// Official repository: https://github.com/nekernel-org/nectar - -/// bugs: 0 - -///////////////////////////////////////////////////////////////////////////////////////// - -// @file Assembler+32x0.cc -// @author Amlal El Mahrouss -// @brief 32x0 Assembler. - -// REMINDER: when dealing with an undefined symbol use (string -// size):LinkerFindSymbol:(string) so that ld will look for it. - -///////////////////////////////////////////////////////////////////////////////////////// - -#ifndef __ASM_NEED_32x0__ -#define __ASM_NEED_32x0__ -#endif - -#include -#include -#include -#include -#include - -///////////////////////////////////////////////////////////////////////////////////////// - -// @brief 32x0 Assembler entrypoint, the program/module starts here. - -///////////////////////////////////////////////////////////////////////////////////////// - -NECTAR_MODULE(NEAssemblerMain32000) { - CompilerKit::install_signal(SIGSEGV, CompilerKit::Detail::drvi_crash_handler); - return EXIT_SUCCESS; -} diff --git a/src/CompilerKit/src/Assemblers/Assembler+32x0.cpp b/src/CompilerKit/src/Assemblers/Assembler+32x0.cpp new file mode 100644 index 0000000..787c685 --- /dev/null +++ b/src/CompilerKit/src/Assemblers/Assembler+32x0.cpp @@ -0,0 +1,38 @@ +// Copyright 2024-2025, Amlal El Mahrouss (amlal@nekernel.org) +// Licensed under the Apache License, Version 2.0 (See accompanying +// file LICENSE or copy at http://www.apache.org/licenses/LICENSE-2.0) +// Official repository: https://github.com/nekernel-org/nectar + +/// bugs: 0 + +///////////////////////////////////////////////////////////////////////////////////////// + +// @file Assembler+32x0.cc +// @author Amlal El Mahrouss +// @brief 32x0 Assembler. + +// REMINDER: when dealing with an undefined symbol use (string +// size):LinkerFindSymbol:(string) so that ld will look for it. + +///////////////////////////////////////////////////////////////////////////////////////// + +#ifndef __ASM_NEED_32x0__ +#define __ASM_NEED_32x0__ +#endif + +#include +#include +#include +#include +#include + +///////////////////////////////////////////////////////////////////////////////////////// + +// @brief 32x0 Assembler entrypoint, the program/module starts here. + +///////////////////////////////////////////////////////////////////////////////////////// + +NECTAR_MODULE(NEAssemblerMain32000) { + CompilerKit::install_signal(SIGSEGV, CompilerKit::Detail::drvi_crash_handler); + return EXIT_SUCCESS; +} diff --git a/src/CompilerKit/src/Assemblers/Assembler+64x0.cc b/src/CompilerKit/src/Assemblers/Assembler+64x0.cc deleted file mode 100644 index bbb8447..0000000 --- a/src/CompilerKit/src/Assemblers/Assembler+64x0.cc +++ /dev/null @@ -1,875 +0,0 @@ -// Copyright 2024-2025, Amlal El Mahrouss (amlal@nekernel.org) -// Licensed under the Apache License, Version 2.0 (See accompanying -// file LICENSE or copy at http://www.apache.org/licenses/LICENSE-2.0) -// Official repository: https://github.com/nekernel-org/nectar - -/// bugs: 0 - -///////////////////////////////////////////////////////////////////////////////////////// - -// @file Assembler+64x0.cc -// @author Amlal El Mahrouss -// @brief 64x000 Assembler. - -// REMINDER: when dealing with an undefined symbol use (string -// size):LinkerFindSymbol:(string) so that ld will look for it. - -///////////////////////////////////////////////////////////////////////////////////////// - -#ifndef __ASM_NEED_64x0__ -#define __ASM_NEED_64x0__ -#endif - -#include -#include -#include -#include -#include - -///////////////////// - -// ANSI ESCAPE CODES - -///////////////////// - -static char kOutputArch = CompilerKit::kPefArch64000; - -constexpr auto k64x0IPAlignment = 0x1U; - -static std::size_t kCounter = 1UL; - -static std::uintptr_t kOrigin = kPefBaseOrigin; -static std::vector> kOriginLabel; - -static std::vector kBytes; - -static CompilerKit::AERecordHeader kCurrentRecord{ - .fName = "", .fKind = CompilerKit::kPefCode, .fSize = 0, .fOffset = 0}; - -static std::vector kRecords; -static std::vector kUndefinedSymbols; - -static const std::string kUndefinedSymbol = ":UndefinedSymbol:"; -static const std::string kRelocSymbol = ":RuntimeSymbol:"; - -// \brief forward decl. -static bool asm_read_attributes(std::string line); - -///////////////////////////////////////////////////////////////////////////////////////// - -// @brief 64x0 assembler entrypoint, the program/module starts here. - -///////////////////////////////////////////////////////////////////////////////////////// - -NECTAR_MODULE(AssemblerMain64x0) { - CompilerKit::install_signal(SIGSEGV, CompilerKit::Detail::drvi_crash_handler); - - for (size_t i = 1; i < argc; ++i) { - if (argv[i][0] == '-') { - if (strcmp(argv[i], "--version") == 0 || strcmp(argv[i], "--v") == 0) { - kStdOut - << "Assembler64x0: 64x0 Assembler.\nAssembler64x0: v1.10\nAssembler64x0: Copyright (c) " - "Amlal El Mahrouss\n"; - return 0; - } else if (strcmp(argv[i], "--help") == 0) { - kStdOut << "Assembler64x0: 64x0 Assembler.\nAssembler64x0: Copyright (c) 2024 Mahrouss " - "Logic.\n"; - kStdOut << "--version: Print program version.\n"; - kStdOut << "--fverbose: Print verbose output.\n"; - kStdOut << "--fbinary: Output as flat binary.\n"; - kStdOut << "--64xxx: Compile for a subset of the X64000.\n"; - - return 0; - } else if (strcmp(argv[i], "--fbinary") == 0) { - kOutputAsBinary = true; - continue; - } else if (strcmp(argv[i], "--fverbose") == 0) { - kVerbose = true; - continue; - } - - kStdOut << "Assembler64x0: ignore " << argv[i] << "\n"; - continue; - } - - if (!std::filesystem::exists(argv[i])) { - kStdOut << "Assembler64x0: can't open: " << argv[i] << std::endl; - goto asm_fail_exit; - } - - std::string object_output(argv[i]); - - for (auto& ext : kAsmFileExts) { - if (object_output.find(ext) != std::string::npos) { - object_output.erase(object_output.find(ext), std::strlen(ext)); - } - } - - object_output += kOutputAsBinary ? kBinaryFileExt : kObjectFileExt; - - std::ifstream file_ptr(argv[i]); - std::ofstream file_ptr_out(object_output, std::ofstream::binary); - - if (file_ptr_out.bad()) { - if (kVerbose) { - kStdOut << "Assembler64x0: error: " << strerror(errno) << "\n"; - } - } - - std::string line; - - CompilerKit::AEHeader hdr{0}; - - memset(hdr.fPad, kAENullType, kAEPad); - - hdr.fMagic[0] = kAEMag0; - hdr.fMagic[1] = kAEMag1; - hdr.fMagic[2] = kAEMag2; - hdr.fSize = sizeof(CompilerKit::AEHeader); - hdr.fArch = kOutputArch; - - ///////////////////////////////////////////////////////////////////////////////////////// - - // COMPILATION LOOP - - ///////////////////////////////////////////////////////////////////////////////////////// - - CompilerKit::Encoder64x0 asm64; - - while (std::getline(file_ptr, line)) { - if (auto ln = asm64.CheckLine(line, argv[i]); !ln.empty()) { - CompilerKit::Detail::print_error(ln, argv[i]); - continue; - } - - try { - asm_read_attributes(line); - asm64.WriteLine(line, argv[i]); - } catch (const std::exception& e) { - if (kVerbose) { - std::string what = e.what(); - CompilerKit::Detail::print_warning("exit because of: " + what, "CompilerKit"); - } - - std::filesystem::remove(object_output); - goto asm_fail_exit; - } - } - - if (!kOutputAsBinary) { - if (kVerbose) { - kStdOut << "Assembler64x0: Writing object file...\n"; - } - - // this is the final step, write everything to the file. - - auto pos = file_ptr_out.tellp(); - - hdr.fCount = kRecords.size() + kUndefinedSymbols.size(); - - file_ptr_out << hdr; - - if (kRecords.empty()) { - kStdErr << "Assembler64x0: At least one record is needed to write an object " - "file.\nAssembler64x0: Make one using `public_segment .code64 foo_bar`.\n"; - - std::filesystem::remove(object_output); - return 1; - } - - kRecords[kRecords.size() - 1].fSize = kBytes.size(); - - std::size_t record_count = 0UL; - - for (auto& rec : kRecords) { - if (kVerbose) kStdOut << "Assembler64x0: Wrote record " << rec.fName << " to file...\n"; - - rec.fFlags |= CompilerKit::kKindRelocationAtRuntime; - rec.fOffset = record_count; - ++record_count; - - file_ptr_out << rec; - } - - // increment once again, so that we won't lie about the kUndefinedSymbols. - ++record_count; - - for (auto& sym : kUndefinedSymbols) { - CompilerKit::AERecordHeader _record_hdr{0}; - - if (kVerbose) kStdOut << "Assembler64x0: Wrote symbol " << sym << " to file...\n"; - - _record_hdr.fKind = kAENullType; - _record_hdr.fSize = sym.size(); - _record_hdr.fOffset = record_count; - - ++record_count; - - memset(_record_hdr.fPad, kAENullType, kAEPad); - memcpy(_record_hdr.fName, sym.c_str(), sym.size()); - - file_ptr_out << _record_hdr; - - ++kCounter; - } - - auto pos_end = file_ptr_out.tellp(); - - file_ptr_out.seekp(pos); - - hdr.fStartCode = pos_end; - hdr.fCodeSize = kBytes.size(); - - file_ptr_out << hdr; - - file_ptr_out.seekp(pos_end); - } else { - if (kVerbose) { - kStdOut << "Assembler64x0: Write raw binary...\n"; - } - } - - // byte from byte, we write this. - for (auto& byte : kBytes) { - file_ptr_out.write(reinterpret_cast(&byte), sizeof(byte)); - } - - if (kVerbose) kStdOut << "Assembler64x0: Wrote file with program in it.\n"; - - file_ptr_out.flush(); - file_ptr_out.close(); - - if (kVerbose) kStdOut << "Assembler64x0: Exit succeeded.\n"; - - return 0; - } - -asm_fail_exit: - - if (kVerbose) kStdOut << "Assembler64x0: Exit failed.\n"; - - return 1; -} - -///////////////////////////////////////////////////////////////////////////////////////// - -// @brief Check for attributes -// returns true if any was found. - -///////////////////////////////////////////////////////////////////////////////////////// - -static bool asm_read_attributes(std::string line) { - // extern_segment is the opposite of public_segment, it signals to the ld - // that we need this symbol. - if (CompilerKit::ast_find_needle(line, "extern_segment")) { - if (kOutputAsBinary) { - CompilerKit::Detail::print_error("Invalid extern_segment directive in flat binary mode.", - "CompilerKit"); - throw std::runtime_error("invalid_extern_segment_bin"); - } - - auto name = line.substr(line.find("extern_segment") + strlen("extern_segment")); - - /// sanity check to avoid stupid linker errors. - if (name.size() == 0) { - CompilerKit::Detail::print_error("Invalid extern_segment", "Nectar"); - throw std::runtime_error("invalid_extern_segment"); - } - - std::string result = std::to_string(name.size()); - result += kUndefinedSymbol; - - // mangle this - for (char& j : name) { - if (j == ' ' || j == ',') j = '$'; - } - - result += name; - - if (name.find(kPefCode64) != std::string::npos) { - // data is treated as code. - kCurrentRecord.fKind = CompilerKit::kPefCode; - } else if (name.find(kPefData64) != std::string::npos) { - // no code will be executed from here. - kCurrentRecord.fKind = CompilerKit::kPefData; - } else if (name.find(kPefZero64) != std::string::npos) { - // this is a bss section. - kCurrentRecord.fKind = CompilerKit::kPefZero; - } - - // this is a special case for the start stub. - // we want this so that ld can find it. - - if (name == kPefStart) { - kCurrentRecord.fKind = CompilerKit::kPefCode; - } - - // now we can tell the code size of the previous kCurrentRecord. - - if (!kRecords.empty()) kRecords[kRecords.size() - 1].fSize = kBytes.size(); - - memset(kCurrentRecord.fName, 0, kAESymbolLen); - memcpy(kCurrentRecord.fName, result.c_str(), result.size()); - - ++kCounter; - - memset(kCurrentRecord.fPad, kAENullType, kAEPad); - - kRecords.emplace_back(kCurrentRecord); - - return true; - } - // public_segment is a special keyword used by Assembler64x0 to tell the AE output stage to - // mark this section as a header. it currently supports .code64, .data64., - // .zero64 - else if (CompilerKit::ast_find_needle(line, "public_segment")) { - if (kOutputAsBinary) { - CompilerKit::Detail::print_error("Invalid public_segment directive in flat binary mode.", - "CompilerKit"); - throw std::runtime_error("invalid_public_segment_bin"); - } - - auto name = line.substr(line.find("public_segment") + strlen("public_segment")); - - std::string name_copy = name; - - for (char& j : name) { - if (j == ' ') j = '$'; - } - - if (name.find(".code64") != std::string::npos) { - // data is treated as code. - - name_copy.erase(name_copy.find(".code64"), strlen(".code64")); - kCurrentRecord.fKind = CompilerKit::kPefCode; - } else if (name.find(".data64") != std::string::npos) { - // no code will be executed from here. - - name_copy.erase(name_copy.find(".data64"), strlen(".data64")); - kCurrentRecord.fKind = CompilerKit::kPefData; - } else if (name.find(".zero64") != std::string::npos) { - // this is a bss section. - - name_copy.erase(name_copy.find(".zero64"), strlen(".zero64")); - kCurrentRecord.fKind = CompilerKit::kPefZero; - } - - // this is a special case for the start stub. - // we want this so that ld can find it. - - if (name == kPefStart) { - kCurrentRecord.fKind = CompilerKit::kPefCode; - } - - while (name_copy.find(" ") != std::string::npos) name_copy.erase(name_copy.find(" "), 1); - - kOriginLabel.push_back(std::make_pair(name_copy, kOrigin)); - ++kOrigin; - - // now we can tell the code size of the previous kCurrentRecord. - - if (!kRecords.empty()) kRecords[kRecords.size() - 1].fSize = kBytes.size(); - - memset(kCurrentRecord.fName, 0, kAESymbolLen); - memcpy(kCurrentRecord.fName, name.c_str(), name.size()); - - ++kCounter; - - memset(kCurrentRecord.fPad, kAENullType, kAEPad); - - kRecords.emplace_back(kCurrentRecord); - - return true; - } - - return false; -} - -// \brief algorithms and helpers. - -namespace CompilerKit::Detail::algorithm { -// \brief authorize a brief set of characters. -static inline bool is_not_alnum_space(char c) { - return !(isalpha(c) || isdigit(c) || (c == ' ') || (c == '\t') || (c == ',') || (c == '(') || - (c == ')') || (c == '"') || (c == '\'') || (c == '[') || (c == ']') || (c == '+') || - (c == '_') || (c == ':') || (c == '@') || (c == '.')); -} - -bool is_valid_64x0(std::string str) { - return std::find_if(str.begin(), str.end(), is_not_alnum_space) == str.end(); -} -} // namespace CompilerKit::Detail::algorithm - -///////////////////////////////////////////////////////////////////////////////////////// - -// @brief Check for line (syntax check) - -///////////////////////////////////////////////////////////////////////////////////////// - -std::string CompilerKit::Encoder64x0::CheckLine(std::string line, std::string file) { - std::string err_str; - - if (line.empty() || CompilerKit::ast_find_needle(line, "extern_segment") || - CompilerKit::ast_find_needle(line, "public_segment") || line.find('#') != std::string::npos || - CompilerKit::ast_find_needle(line, ";")) { - if (line.find('#') != std::string::npos) { - line.erase(line.find('#')); - } else if (line.find(';') != std::string::npos) { - line.erase(line.find(';')); - } else { - // now check the line for validity - if (!CompilerKit::Detail::algorithm::is_valid_64x0(line)) { - err_str = "Line contains non alphanumeric characters.\nhere -> "; - err_str += line; - } - } - - return err_str; - } - - if (!CompilerKit::Detail::algorithm::is_valid_64x0(line)) { - err_str = "Line contains non alphanumeric characters.\nhere -> "; - err_str += line; - - return err_str; - } - - // check for a valid instruction format. - - if (line.find(',') != std::string::npos) { - if (line.find(',') + 1 == line.size()) { - err_str += "\nInstruction lacks right register, here -> "; - err_str += line.substr(line.find(',')); - - return err_str; - } else { - bool nothing_on_right = true; - - if (line.find(',') + 1 > line.size()) { - err_str += "\nInstruction not complete, here -> "; - err_str += line; - - return err_str; - } - - auto substr = line.substr(line.find(',') + 1); - - for (auto& ch : substr) { - if (ch != ' ' && ch != '\t') { - nothing_on_right = false; - } - } - - // this means we found nothing after that ',' . - if (nothing_on_right) { - err_str += "\nInstruction not complete, here -> "; - err_str += line; - - return err_str; - } - } - } - - // these do take an argument. - std::vector operands_inst = {"stw", "ldw", "lda", "sta"}; - - // these don't. - std::vector filter_inst = {"jlr", "jrl", "int"}; - - for (auto& opcode64x0 : kOpcodes64x0) { - if (line.find(opcode64x0.fName) != std::string::npos) { - if (opcode64x0.fFunct7 == kAsmNoArgs) return err_str; - - for (auto& op : operands_inst) { - // if only the instruction was found. - if (line == op) { - err_str += "\nMalformed "; - err_str += op; - err_str += " instruction, here -> "; - err_str += line; - } - } - - // if it is like that -> addr1, 0x0 - if (auto it = std::find(filter_inst.begin(), filter_inst.end(), opcode64x0.fName); - it == filter_inst.cend()) { - if (CompilerKit::ast_find_needle(line, opcode64x0.fName)) { - if (!isspace(line[line.find(opcode64x0.fName) + strlen(opcode64x0.fName)])) { - err_str += "\nMissing space between "; - err_str += opcode64x0.fName; - err_str += " and operands.\nhere -> "; - err_str += line; - } - } - } - - return err_str; - } - } - - err_str += "Unrecognized instruction: " + line; - - return err_str; -} - -bool CompilerKit::Encoder64x0::WriteNumber(const std::size_t& pos, std::string& jump_label) { - if (!isdigit(jump_label[pos])) return false; - - switch (jump_label[pos + 1]) { - case 'x': { - if (auto res = strtol(jump_label.substr(pos + 2).c_str(), nullptr, 16); !res) { - if (errno != 0) { - CompilerKit::Detail::print_error("invalid hex number: " + jump_label, "CompilerKit"); - throw std::runtime_error("invalid_hex_number"); - } - } - - CompilerKit::NumberCast64 num(strtol(jump_label.substr(pos + 2).c_str(), nullptr, 16)); - - for (char& i : num.number) { - kBytes.push_back(i); - } - - if (kVerbose) { - kStdOut << "Assembler64x0: found a base 16 number here: " << jump_label.substr(pos) << "\n"; - } - - return true; - } - case 'b': { - if (auto res = strtol(jump_label.substr(pos + 2).c_str(), nullptr, 2); !res) { - if (errno != 0) { - CompilerKit::Detail::print_error("invalid binary number: " + jump_label, "CompilerKit"); - throw std::runtime_error("invalid_bin"); - } - } - - CompilerKit::NumberCast64 num(strtol(jump_label.substr(pos + 2).c_str(), nullptr, 2)); - - if (kVerbose) { - kStdOut << "Assembler64x0: found a base 2 number here: " << jump_label.substr(pos) << "\n"; - } - - for (char& i : num.number) { - kBytes.push_back(i); - } - - return true; - } - case 'o': { - if (auto res = strtol(jump_label.substr(pos + 2).c_str(), nullptr, 7); !res) { - if (errno != 0) { - CompilerKit::Detail::print_error("invalid octal number: " + jump_label, "CompilerKit"); - throw std::runtime_error("invalid_octal"); - } - } - - CompilerKit::NumberCast64 num(strtol(jump_label.substr(pos + 2).c_str(), nullptr, 7)); - - if (kVerbose) { - kStdOut << "Assembler64x0: found a base 8 number here: " << jump_label.substr(pos) << "\n"; - } - - for (char& i : num.number) { - kBytes.push_back(i); - } - - return true; - } - default: { - break; - } - } - - /* check for errno and stuff like that */ - if (auto res = strtol(jump_label.substr(pos).c_str(), nullptr, 10); !res) { - if (errno != 0) { - return false; - } - } - - CompilerKit::NumberCast64 num(strtol(jump_label.substr(pos).c_str(), nullptr, 10)); - - for (char& i : num.number) { - kBytes.push_back(i); - } - - if (kVerbose) { - kStdOut << "Assembler64x0: found a base 10 number here: " << jump_label.substr(pos) << "\n"; - } - - return true; -} - -///////////////////////////////////////////////////////////////////////////////////////// - -// @brief Read and write an instruction to the output array. - -///////////////////////////////////////////////////////////////////////////////////////// - -bool CompilerKit::Encoder64x0::WriteLine(std::string line, std::string file) { - if (CompilerKit::ast_find_needle(line, "public_segment ")) return true; - - for (auto& opcode64x0 : kOpcodes64x0) { - // strict check here - if (CompilerKit::ast_find_needle(line, opcode64x0.fName) && - CompilerKit::Detail::algorithm::is_valid_64x0(line)) { - std::string name(opcode64x0.fName); - std::string jump_label, cpy_jump_label; - - kBytes.emplace_back(opcode64x0.fOpcode); - kBytes.emplace_back(opcode64x0.fFunct3); - kBytes.emplace_back(opcode64x0.fFunct7); - - // check funct7 type. - switch (opcode64x0.fFunct7) { - // reg to reg means register to register transfer operation. - case kAsmRegToReg: - case kAsmImmediate: { - // \brief how many registers we found. - std::size_t found_some = 0UL; - - for (size_t line_index = 0UL; line_index < line.size(); line_index++) { - if (line[line_index] == kAsmRegisterPrefix[0] && isdigit(line[line_index + 1])) { - std::string register_syntax = kAsmRegisterPrefix; - register_syntax += line[line_index + 1]; - - if (isdigit(line[line_index + 2])) register_syntax += line[line_index + 2]; - - std::string reg_str; - reg_str += line[line_index + 1]; - - if (isdigit(line[line_index + 2])) reg_str += line[line_index + 2]; - - // it ranges from r0 to r19 - // something like r190 doesn't exist in the instruction set. - if (kOutputArch == CompilerKit::kPefArch64000) { - if (isdigit(line[line_index + 3]) && isdigit(line[line_index + 2])) { - reg_str += line[line_index + 3]; - CompilerKit::Detail::print_error( - "invalid register index, r" + reg_str + - "\nnote: The 64x0 accepts registers from r0 to r20.", - file); - throw std::runtime_error("invalid_register_index"); - } - } - - // finally cast to a size_t - std::size_t reg_index = strtol(reg_str.c_str(), nullptr, 10); - - if (reg_index > kAsmRegisterLimit) { - CompilerKit::Detail::print_error("invalid register index, r" + reg_str, file); - throw std::runtime_error("invalid_register_index"); - } - - kBytes.emplace_back(reg_index); - ++found_some; - - if (kVerbose) { - kStdOut << "Assembler64x0: Register found: " << register_syntax << "\n"; - kStdOut << "Assembler64x0: Register amount in instruction: " << found_some << "\n"; - } - } - } - - // we're not in immediate addressing, reg to reg. - if (opcode64x0.fFunct7 != kAsmImmediate) { - // remember! register to register! - if (found_some == 1) { - CompilerKit::Detail::print_error( - "Too few registers.\ntip: each Assembler64x0 register " - "starts with 'r'.\nline: " + - line, - file); - throw std::runtime_error("not_a_register"); - } - } - - if (found_some < 1 && name != "ldw" && name != "lda" && name != "stw") { - CompilerKit::Detail::print_error( - "invalid combination of opcode and registers.\nline: " + line, file); - throw std::runtime_error("invalid_comb_op_reg"); - } else if (found_some == 1 && name == "add") { - CompilerKit::Detail::print_error( - "invalid combination of opcode and registers.\nline: " + line, file); - throw std::runtime_error("invalid_comb_op_reg"); - } else if (found_some == 1 && name == "sub") { - CompilerKit::Detail::print_error( - "invalid combination of opcode and registers.\nline: " + line, file); - throw std::runtime_error("invalid_comb_op_reg"); - } - - if (found_some > 0 && name == "pop") { - CompilerKit::Detail::print_error( - "invalid combination for opcode 'pop'.\ntip: it expects " - "nothing.\nline: " + - line, - file); - throw std::runtime_error("invalid_comb_op_pop"); - } - } - default: - break; - } - - // try to fetch a number from the name - if (name == "stw" || name == "ldw" || name == "lda" || name == "sta") { - auto where_string = name; - - // if we load something, we'd need it's symbol/literal - if (name == "stw" || name == "sta" || name == "ldw" || name == "lda" || name == "sta") - where_string = ","; - - jump_label = line; - - auto found_sym = false; - - while (jump_label.find(where_string) != std::string::npos) { - jump_label = jump_label.substr(jump_label.find(where_string) + where_string.size()); - - while (jump_label.find(" ") != std::string::npos) { - jump_label.erase(jump_label.find(" "), 1); - } - - if (jump_label[0] != kAsmRegisterPrefix[0] && !isdigit(jump_label[1])) { - if (found_sym) { - CompilerKit::Detail::print_error( - "invalid combination of opcode and operands.\nhere -> " + jump_label, file); - throw std::runtime_error("invalid_comb_op_ops"); - } else { - // death trap installed. - found_sym = true; - } - } - } - - cpy_jump_label = jump_label; - - // replace any spaces with $ - if (jump_label[0] == ' ') { - while (jump_label.find(' ') != std::string::npos) { - if (isalnum(jump_label[0]) || isdigit(jump_label[0])) break; - - jump_label.erase(jump_label.find(' '), 1); - } - } - - if (!this->WriteNumber(0, jump_label)) { - // sta expects this: sta 0x000000, r0 - if (name == "sta") { - CompilerKit::Detail::print_error( - "invalid combination of opcode and operands.\nHere ->" + line, file); - throw std::runtime_error("invalid_comb_op_ops"); - } - } else { - if (name == "sta" && cpy_jump_label.find("extern_segment ") != std::string::npos) { - CompilerKit::Detail::print_error("invalid usage extern_segment on 'sta', here: " + line, - file); - throw std::runtime_error("invalid_sta_usage"); - } - } - - goto asm_write_label; - } - - // This is the case where we jump to a label, it is also used as a goto. - if (name == "lda" || name == "sta") { - asm_write_label: - if (cpy_jump_label.find('\n') != std::string::npos) - cpy_jump_label.erase(cpy_jump_label.find('\n'), 1); - - if (cpy_jump_label.find("extern_segment") != std::string::npos) { - cpy_jump_label.erase(cpy_jump_label.find("extern_segment"), strlen("extern_segment")); - - if (name == "sta") { - CompilerKit::Detail::print_error("extern_segment is not allowed on a sta operation.", - file); - throw std::runtime_error("extern_segment_sta_op"); - } else { - goto asm_end_label_cpy; - } - } - - if (name == "lda" || name == "sta") { - for (auto& label : kOriginLabel) { - if (cpy_jump_label == label.first) { - if (kVerbose) { - kStdOut << "Assembler64x0: Replace label " << cpy_jump_label - << " to address: " << label.second << std::endl; - } - - CompilerKit::NumberCast64 num(label.second); - - for (auto& num : num.number) { - kBytes.push_back(num); - } - - goto asm_end_label_cpy; - } - } - - if (cpy_jump_label[0] == '0') { - switch (cpy_jump_label[1]) { - case 'x': - case 'o': - case 'b': - if (this->WriteNumber(0, cpy_jump_label)) goto asm_end_label_cpy; - - break; - default: - break; - } - - if (isdigit(cpy_jump_label[0])) { - if (this->WriteNumber(0, cpy_jump_label)) goto asm_end_label_cpy; - - break; - } - } - } - - if (cpy_jump_label.size() < 1) { - CompilerKit::Detail::print_error("label is empty, can't jump on it.", file); - throw std::runtime_error("label_empty"); - } - - /// don't go any further if: - /// load word (ldw) or store word. (stw) - - if (name == "ldw" || name == "stw") break; - - auto mld_reloc_str = std::to_string(cpy_jump_label.size()); - mld_reloc_str += kUndefinedSymbol; - mld_reloc_str += cpy_jump_label; - - bool ignore_back_slash = false; - - for (auto& reloc_chr : mld_reloc_str) { - if (reloc_chr == '\\') { - ignore_back_slash = true; - continue; - } - - if (ignore_back_slash) { - ignore_back_slash = false; - continue; - } - - kBytes.push_back(reloc_chr); - } - - kBytes.push_back('\0'); - goto asm_end_label_cpy; - } - - asm_end_label_cpy: - kOrigin += k64x0IPAlignment; - - break; - } - } - - return true; -} - -// Last rev 13-1-24 diff --git a/src/CompilerKit/src/Assemblers/Assembler+64x0.cpp b/src/CompilerKit/src/Assemblers/Assembler+64x0.cpp new file mode 100644 index 0000000..bbb8447 --- /dev/null +++ b/src/CompilerKit/src/Assemblers/Assembler+64x0.cpp @@ -0,0 +1,875 @@ +// Copyright 2024-2025, Amlal El Mahrouss (amlal@nekernel.org) +// Licensed under the Apache License, Version 2.0 (See accompanying +// file LICENSE or copy at http://www.apache.org/licenses/LICENSE-2.0) +// Official repository: https://github.com/nekernel-org/nectar + +/// bugs: 0 + +///////////////////////////////////////////////////////////////////////////////////////// + +// @file Assembler+64x0.cc +// @author Amlal El Mahrouss +// @brief 64x000 Assembler. + +// REMINDER: when dealing with an undefined symbol use (string +// size):LinkerFindSymbol:(string) so that ld will look for it. + +///////////////////////////////////////////////////////////////////////////////////////// + +#ifndef __ASM_NEED_64x0__ +#define __ASM_NEED_64x0__ +#endif + +#include +#include +#include +#include +#include + +///////////////////// + +// ANSI ESCAPE CODES + +///////////////////// + +static char kOutputArch = CompilerKit::kPefArch64000; + +constexpr auto k64x0IPAlignment = 0x1U; + +static std::size_t kCounter = 1UL; + +static std::uintptr_t kOrigin = kPefBaseOrigin; +static std::vector> kOriginLabel; + +static std::vector kBytes; + +static CompilerKit::AERecordHeader kCurrentRecord{ + .fName = "", .fKind = CompilerKit::kPefCode, .fSize = 0, .fOffset = 0}; + +static std::vector kRecords; +static std::vector kUndefinedSymbols; + +static const std::string kUndefinedSymbol = ":UndefinedSymbol:"; +static const std::string kRelocSymbol = ":RuntimeSymbol:"; + +// \brief forward decl. +static bool asm_read_attributes(std::string line); + +///////////////////////////////////////////////////////////////////////////////////////// + +// @brief 64x0 assembler entrypoint, the program/module starts here. + +///////////////////////////////////////////////////////////////////////////////////////// + +NECTAR_MODULE(AssemblerMain64x0) { + CompilerKit::install_signal(SIGSEGV, CompilerKit::Detail::drvi_crash_handler); + + for (size_t i = 1; i < argc; ++i) { + if (argv[i][0] == '-') { + if (strcmp(argv[i], "--version") == 0 || strcmp(argv[i], "--v") == 0) { + kStdOut + << "Assembler64x0: 64x0 Assembler.\nAssembler64x0: v1.10\nAssembler64x0: Copyright (c) " + "Amlal El Mahrouss\n"; + return 0; + } else if (strcmp(argv[i], "--help") == 0) { + kStdOut << "Assembler64x0: 64x0 Assembler.\nAssembler64x0: Copyright (c) 2024 Mahrouss " + "Logic.\n"; + kStdOut << "--version: Print program version.\n"; + kStdOut << "--fverbose: Print verbose output.\n"; + kStdOut << "--fbinary: Output as flat binary.\n"; + kStdOut << "--64xxx: Compile for a subset of the X64000.\n"; + + return 0; + } else if (strcmp(argv[i], "--fbinary") == 0) { + kOutputAsBinary = true; + continue; + } else if (strcmp(argv[i], "--fverbose") == 0) { + kVerbose = true; + continue; + } + + kStdOut << "Assembler64x0: ignore " << argv[i] << "\n"; + continue; + } + + if (!std::filesystem::exists(argv[i])) { + kStdOut << "Assembler64x0: can't open: " << argv[i] << std::endl; + goto asm_fail_exit; + } + + std::string object_output(argv[i]); + + for (auto& ext : kAsmFileExts) { + if (object_output.find(ext) != std::string::npos) { + object_output.erase(object_output.find(ext), std::strlen(ext)); + } + } + + object_output += kOutputAsBinary ? kBinaryFileExt : kObjectFileExt; + + std::ifstream file_ptr(argv[i]); + std::ofstream file_ptr_out(object_output, std::ofstream::binary); + + if (file_ptr_out.bad()) { + if (kVerbose) { + kStdOut << "Assembler64x0: error: " << strerror(errno) << "\n"; + } + } + + std::string line; + + CompilerKit::AEHeader hdr{0}; + + memset(hdr.fPad, kAENullType, kAEPad); + + hdr.fMagic[0] = kAEMag0; + hdr.fMagic[1] = kAEMag1; + hdr.fMagic[2] = kAEMag2; + hdr.fSize = sizeof(CompilerKit::AEHeader); + hdr.fArch = kOutputArch; + + ///////////////////////////////////////////////////////////////////////////////////////// + + // COMPILATION LOOP + + ///////////////////////////////////////////////////////////////////////////////////////// + + CompilerKit::Encoder64x0 asm64; + + while (std::getline(file_ptr, line)) { + if (auto ln = asm64.CheckLine(line, argv[i]); !ln.empty()) { + CompilerKit::Detail::print_error(ln, argv[i]); + continue; + } + + try { + asm_read_attributes(line); + asm64.WriteLine(line, argv[i]); + } catch (const std::exception& e) { + if (kVerbose) { + std::string what = e.what(); + CompilerKit::Detail::print_warning("exit because of: " + what, "CompilerKit"); + } + + std::filesystem::remove(object_output); + goto asm_fail_exit; + } + } + + if (!kOutputAsBinary) { + if (kVerbose) { + kStdOut << "Assembler64x0: Writing object file...\n"; + } + + // this is the final step, write everything to the file. + + auto pos = file_ptr_out.tellp(); + + hdr.fCount = kRecords.size() + kUndefinedSymbols.size(); + + file_ptr_out << hdr; + + if (kRecords.empty()) { + kStdErr << "Assembler64x0: At least one record is needed to write an object " + "file.\nAssembler64x0: Make one using `public_segment .code64 foo_bar`.\n"; + + std::filesystem::remove(object_output); + return 1; + } + + kRecords[kRecords.size() - 1].fSize = kBytes.size(); + + std::size_t record_count = 0UL; + + for (auto& rec : kRecords) { + if (kVerbose) kStdOut << "Assembler64x0: Wrote record " << rec.fName << " to file...\n"; + + rec.fFlags |= CompilerKit::kKindRelocationAtRuntime; + rec.fOffset = record_count; + ++record_count; + + file_ptr_out << rec; + } + + // increment once again, so that we won't lie about the kUndefinedSymbols. + ++record_count; + + for (auto& sym : kUndefinedSymbols) { + CompilerKit::AERecordHeader _record_hdr{0}; + + if (kVerbose) kStdOut << "Assembler64x0: Wrote symbol " << sym << " to file...\n"; + + _record_hdr.fKind = kAENullType; + _record_hdr.fSize = sym.size(); + _record_hdr.fOffset = record_count; + + ++record_count; + + memset(_record_hdr.fPad, kAENullType, kAEPad); + memcpy(_record_hdr.fName, sym.c_str(), sym.size()); + + file_ptr_out << _record_hdr; + + ++kCounter; + } + + auto pos_end = file_ptr_out.tellp(); + + file_ptr_out.seekp(pos); + + hdr.fStartCode = pos_end; + hdr.fCodeSize = kBytes.size(); + + file_ptr_out << hdr; + + file_ptr_out.seekp(pos_end); + } else { + if (kVerbose) { + kStdOut << "Assembler64x0: Write raw binary...\n"; + } + } + + // byte from byte, we write this. + for (auto& byte : kBytes) { + file_ptr_out.write(reinterpret_cast(&byte), sizeof(byte)); + } + + if (kVerbose) kStdOut << "Assembler64x0: Wrote file with program in it.\n"; + + file_ptr_out.flush(); + file_ptr_out.close(); + + if (kVerbose) kStdOut << "Assembler64x0: Exit succeeded.\n"; + + return 0; + } + +asm_fail_exit: + + if (kVerbose) kStdOut << "Assembler64x0: Exit failed.\n"; + + return 1; +} + +///////////////////////////////////////////////////////////////////////////////////////// + +// @brief Check for attributes +// returns true if any was found. + +///////////////////////////////////////////////////////////////////////////////////////// + +static bool asm_read_attributes(std::string line) { + // extern_segment is the opposite of public_segment, it signals to the ld + // that we need this symbol. + if (CompilerKit::ast_find_needle(line, "extern_segment")) { + if (kOutputAsBinary) { + CompilerKit::Detail::print_error("Invalid extern_segment directive in flat binary mode.", + "CompilerKit"); + throw std::runtime_error("invalid_extern_segment_bin"); + } + + auto name = line.substr(line.find("extern_segment") + strlen("extern_segment")); + + /// sanity check to avoid stupid linker errors. + if (name.size() == 0) { + CompilerKit::Detail::print_error("Invalid extern_segment", "Nectar"); + throw std::runtime_error("invalid_extern_segment"); + } + + std::string result = std::to_string(name.size()); + result += kUndefinedSymbol; + + // mangle this + for (char& j : name) { + if (j == ' ' || j == ',') j = '$'; + } + + result += name; + + if (name.find(kPefCode64) != std::string::npos) { + // data is treated as code. + kCurrentRecord.fKind = CompilerKit::kPefCode; + } else if (name.find(kPefData64) != std::string::npos) { + // no code will be executed from here. + kCurrentRecord.fKind = CompilerKit::kPefData; + } else if (name.find(kPefZero64) != std::string::npos) { + // this is a bss section. + kCurrentRecord.fKind = CompilerKit::kPefZero; + } + + // this is a special case for the start stub. + // we want this so that ld can find it. + + if (name == kPefStart) { + kCurrentRecord.fKind = CompilerKit::kPefCode; + } + + // now we can tell the code size of the previous kCurrentRecord. + + if (!kRecords.empty()) kRecords[kRecords.size() - 1].fSize = kBytes.size(); + + memset(kCurrentRecord.fName, 0, kAESymbolLen); + memcpy(kCurrentRecord.fName, result.c_str(), result.size()); + + ++kCounter; + + memset(kCurrentRecord.fPad, kAENullType, kAEPad); + + kRecords.emplace_back(kCurrentRecord); + + return true; + } + // public_segment is a special keyword used by Assembler64x0 to tell the AE output stage to + // mark this section as a header. it currently supports .code64, .data64., + // .zero64 + else if (CompilerKit::ast_find_needle(line, "public_segment")) { + if (kOutputAsBinary) { + CompilerKit::Detail::print_error("Invalid public_segment directive in flat binary mode.", + "CompilerKit"); + throw std::runtime_error("invalid_public_segment_bin"); + } + + auto name = line.substr(line.find("public_segment") + strlen("public_segment")); + + std::string name_copy = name; + + for (char& j : name) { + if (j == ' ') j = '$'; + } + + if (name.find(".code64") != std::string::npos) { + // data is treated as code. + + name_copy.erase(name_copy.find(".code64"), strlen(".code64")); + kCurrentRecord.fKind = CompilerKit::kPefCode; + } else if (name.find(".data64") != std::string::npos) { + // no code will be executed from here. + + name_copy.erase(name_copy.find(".data64"), strlen(".data64")); + kCurrentRecord.fKind = CompilerKit::kPefData; + } else if (name.find(".zero64") != std::string::npos) { + // this is a bss section. + + name_copy.erase(name_copy.find(".zero64"), strlen(".zero64")); + kCurrentRecord.fKind = CompilerKit::kPefZero; + } + + // this is a special case for the start stub. + // we want this so that ld can find it. + + if (name == kPefStart) { + kCurrentRecord.fKind = CompilerKit::kPefCode; + } + + while (name_copy.find(" ") != std::string::npos) name_copy.erase(name_copy.find(" "), 1); + + kOriginLabel.push_back(std::make_pair(name_copy, kOrigin)); + ++kOrigin; + + // now we can tell the code size of the previous kCurrentRecord. + + if (!kRecords.empty()) kRecords[kRecords.size() - 1].fSize = kBytes.size(); + + memset(kCurrentRecord.fName, 0, kAESymbolLen); + memcpy(kCurrentRecord.fName, name.c_str(), name.size()); + + ++kCounter; + + memset(kCurrentRecord.fPad, kAENullType, kAEPad); + + kRecords.emplace_back(kCurrentRecord); + + return true; + } + + return false; +} + +// \brief algorithms and helpers. + +namespace CompilerKit::Detail::algorithm { +// \brief authorize a brief set of characters. +static inline bool is_not_alnum_space(char c) { + return !(isalpha(c) || isdigit(c) || (c == ' ') || (c == '\t') || (c == ',') || (c == '(') || + (c == ')') || (c == '"') || (c == '\'') || (c == '[') || (c == ']') || (c == '+') || + (c == '_') || (c == ':') || (c == '@') || (c == '.')); +} + +bool is_valid_64x0(std::string str) { + return std::find_if(str.begin(), str.end(), is_not_alnum_space) == str.end(); +} +} // namespace CompilerKit::Detail::algorithm + +///////////////////////////////////////////////////////////////////////////////////////// + +// @brief Check for line (syntax check) + +///////////////////////////////////////////////////////////////////////////////////////// + +std::string CompilerKit::Encoder64x0::CheckLine(std::string line, std::string file) { + std::string err_str; + + if (line.empty() || CompilerKit::ast_find_needle(line, "extern_segment") || + CompilerKit::ast_find_needle(line, "public_segment") || line.find('#') != std::string::npos || + CompilerKit::ast_find_needle(line, ";")) { + if (line.find('#') != std::string::npos) { + line.erase(line.find('#')); + } else if (line.find(';') != std::string::npos) { + line.erase(line.find(';')); + } else { + // now check the line for validity + if (!CompilerKit::Detail::algorithm::is_valid_64x0(line)) { + err_str = "Line contains non alphanumeric characters.\nhere -> "; + err_str += line; + } + } + + return err_str; + } + + if (!CompilerKit::Detail::algorithm::is_valid_64x0(line)) { + err_str = "Line contains non alphanumeric characters.\nhere -> "; + err_str += line; + + return err_str; + } + + // check for a valid instruction format. + + if (line.find(',') != std::string::npos) { + if (line.find(',') + 1 == line.size()) { + err_str += "\nInstruction lacks right register, here -> "; + err_str += line.substr(line.find(',')); + + return err_str; + } else { + bool nothing_on_right = true; + + if (line.find(',') + 1 > line.size()) { + err_str += "\nInstruction not complete, here -> "; + err_str += line; + + return err_str; + } + + auto substr = line.substr(line.find(',') + 1); + + for (auto& ch : substr) { + if (ch != ' ' && ch != '\t') { + nothing_on_right = false; + } + } + + // this means we found nothing after that ',' . + if (nothing_on_right) { + err_str += "\nInstruction not complete, here -> "; + err_str += line; + + return err_str; + } + } + } + + // these do take an argument. + std::vector operands_inst = {"stw", "ldw", "lda", "sta"}; + + // these don't. + std::vector filter_inst = {"jlr", "jrl", "int"}; + + for (auto& opcode64x0 : kOpcodes64x0) { + if (line.find(opcode64x0.fName) != std::string::npos) { + if (opcode64x0.fFunct7 == kAsmNoArgs) return err_str; + + for (auto& op : operands_inst) { + // if only the instruction was found. + if (line == op) { + err_str += "\nMalformed "; + err_str += op; + err_str += " instruction, here -> "; + err_str += line; + } + } + + // if it is like that -> addr1, 0x0 + if (auto it = std::find(filter_inst.begin(), filter_inst.end(), opcode64x0.fName); + it == filter_inst.cend()) { + if (CompilerKit::ast_find_needle(line, opcode64x0.fName)) { + if (!isspace(line[line.find(opcode64x0.fName) + strlen(opcode64x0.fName)])) { + err_str += "\nMissing space between "; + err_str += opcode64x0.fName; + err_str += " and operands.\nhere -> "; + err_str += line; + } + } + } + + return err_str; + } + } + + err_str += "Unrecognized instruction: " + line; + + return err_str; +} + +bool CompilerKit::Encoder64x0::WriteNumber(const std::size_t& pos, std::string& jump_label) { + if (!isdigit(jump_label[pos])) return false; + + switch (jump_label[pos + 1]) { + case 'x': { + if (auto res = strtol(jump_label.substr(pos + 2).c_str(), nullptr, 16); !res) { + if (errno != 0) { + CompilerKit::Detail::print_error("invalid hex number: " + jump_label, "CompilerKit"); + throw std::runtime_error("invalid_hex_number"); + } + } + + CompilerKit::NumberCast64 num(strtol(jump_label.substr(pos + 2).c_str(), nullptr, 16)); + + for (char& i : num.number) { + kBytes.push_back(i); + } + + if (kVerbose) { + kStdOut << "Assembler64x0: found a base 16 number here: " << jump_label.substr(pos) << "\n"; + } + + return true; + } + case 'b': { + if (auto res = strtol(jump_label.substr(pos + 2).c_str(), nullptr, 2); !res) { + if (errno != 0) { + CompilerKit::Detail::print_error("invalid binary number: " + jump_label, "CompilerKit"); + throw std::runtime_error("invalid_bin"); + } + } + + CompilerKit::NumberCast64 num(strtol(jump_label.substr(pos + 2).c_str(), nullptr, 2)); + + if (kVerbose) { + kStdOut << "Assembler64x0: found a base 2 number here: " << jump_label.substr(pos) << "\n"; + } + + for (char& i : num.number) { + kBytes.push_back(i); + } + + return true; + } + case 'o': { + if (auto res = strtol(jump_label.substr(pos + 2).c_str(), nullptr, 7); !res) { + if (errno != 0) { + CompilerKit::Detail::print_error("invalid octal number: " + jump_label, "CompilerKit"); + throw std::runtime_error("invalid_octal"); + } + } + + CompilerKit::NumberCast64 num(strtol(jump_label.substr(pos + 2).c_str(), nullptr, 7)); + + if (kVerbose) { + kStdOut << "Assembler64x0: found a base 8 number here: " << jump_label.substr(pos) << "\n"; + } + + for (char& i : num.number) { + kBytes.push_back(i); + } + + return true; + } + default: { + break; + } + } + + /* check for errno and stuff like that */ + if (auto res = strtol(jump_label.substr(pos).c_str(), nullptr, 10); !res) { + if (errno != 0) { + return false; + } + } + + CompilerKit::NumberCast64 num(strtol(jump_label.substr(pos).c_str(), nullptr, 10)); + + for (char& i : num.number) { + kBytes.push_back(i); + } + + if (kVerbose) { + kStdOut << "Assembler64x0: found a base 10 number here: " << jump_label.substr(pos) << "\n"; + } + + return true; +} + +///////////////////////////////////////////////////////////////////////////////////////// + +// @brief Read and write an instruction to the output array. + +///////////////////////////////////////////////////////////////////////////////////////// + +bool CompilerKit::Encoder64x0::WriteLine(std::string line, std::string file) { + if (CompilerKit::ast_find_needle(line, "public_segment ")) return true; + + for (auto& opcode64x0 : kOpcodes64x0) { + // strict check here + if (CompilerKit::ast_find_needle(line, opcode64x0.fName) && + CompilerKit::Detail::algorithm::is_valid_64x0(line)) { + std::string name(opcode64x0.fName); + std::string jump_label, cpy_jump_label; + + kBytes.emplace_back(opcode64x0.fOpcode); + kBytes.emplace_back(opcode64x0.fFunct3); + kBytes.emplace_back(opcode64x0.fFunct7); + + // check funct7 type. + switch (opcode64x0.fFunct7) { + // reg to reg means register to register transfer operation. + case kAsmRegToReg: + case kAsmImmediate: { + // \brief how many registers we found. + std::size_t found_some = 0UL; + + for (size_t line_index = 0UL; line_index < line.size(); line_index++) { + if (line[line_index] == kAsmRegisterPrefix[0] && isdigit(line[line_index + 1])) { + std::string register_syntax = kAsmRegisterPrefix; + register_syntax += line[line_index + 1]; + + if (isdigit(line[line_index + 2])) register_syntax += line[line_index + 2]; + + std::string reg_str; + reg_str += line[line_index + 1]; + + if (isdigit(line[line_index + 2])) reg_str += line[line_index + 2]; + + // it ranges from r0 to r19 + // something like r190 doesn't exist in the instruction set. + if (kOutputArch == CompilerKit::kPefArch64000) { + if (isdigit(line[line_index + 3]) && isdigit(line[line_index + 2])) { + reg_str += line[line_index + 3]; + CompilerKit::Detail::print_error( + "invalid register index, r" + reg_str + + "\nnote: The 64x0 accepts registers from r0 to r20.", + file); + throw std::runtime_error("invalid_register_index"); + } + } + + // finally cast to a size_t + std::size_t reg_index = strtol(reg_str.c_str(), nullptr, 10); + + if (reg_index > kAsmRegisterLimit) { + CompilerKit::Detail::print_error("invalid register index, r" + reg_str, file); + throw std::runtime_error("invalid_register_index"); + } + + kBytes.emplace_back(reg_index); + ++found_some; + + if (kVerbose) { + kStdOut << "Assembler64x0: Register found: " << register_syntax << "\n"; + kStdOut << "Assembler64x0: Register amount in instruction: " << found_some << "\n"; + } + } + } + + // we're not in immediate addressing, reg to reg. + if (opcode64x0.fFunct7 != kAsmImmediate) { + // remember! register to register! + if (found_some == 1) { + CompilerKit::Detail::print_error( + "Too few registers.\ntip: each Assembler64x0 register " + "starts with 'r'.\nline: " + + line, + file); + throw std::runtime_error("not_a_register"); + } + } + + if (found_some < 1 && name != "ldw" && name != "lda" && name != "stw") { + CompilerKit::Detail::print_error( + "invalid combination of opcode and registers.\nline: " + line, file); + throw std::runtime_error("invalid_comb_op_reg"); + } else if (found_some == 1 && name == "add") { + CompilerKit::Detail::print_error( + "invalid combination of opcode and registers.\nline: " + line, file); + throw std::runtime_error("invalid_comb_op_reg"); + } else if (found_some == 1 && name == "sub") { + CompilerKit::Detail::print_error( + "invalid combination of opcode and registers.\nline: " + line, file); + throw std::runtime_error("invalid_comb_op_reg"); + } + + if (found_some > 0 && name == "pop") { + CompilerKit::Detail::print_error( + "invalid combination for opcode 'pop'.\ntip: it expects " + "nothing.\nline: " + + line, + file); + throw std::runtime_error("invalid_comb_op_pop"); + } + } + default: + break; + } + + // try to fetch a number from the name + if (name == "stw" || name == "ldw" || name == "lda" || name == "sta") { + auto where_string = name; + + // if we load something, we'd need it's symbol/literal + if (name == "stw" || name == "sta" || name == "ldw" || name == "lda" || name == "sta") + where_string = ","; + + jump_label = line; + + auto found_sym = false; + + while (jump_label.find(where_string) != std::string::npos) { + jump_label = jump_label.substr(jump_label.find(where_string) + where_string.size()); + + while (jump_label.find(" ") != std::string::npos) { + jump_label.erase(jump_label.find(" "), 1); + } + + if (jump_label[0] != kAsmRegisterPrefix[0] && !isdigit(jump_label[1])) { + if (found_sym) { + CompilerKit::Detail::print_error( + "invalid combination of opcode and operands.\nhere -> " + jump_label, file); + throw std::runtime_error("invalid_comb_op_ops"); + } else { + // death trap installed. + found_sym = true; + } + } + } + + cpy_jump_label = jump_label; + + // replace any spaces with $ + if (jump_label[0] == ' ') { + while (jump_label.find(' ') != std::string::npos) { + if (isalnum(jump_label[0]) || isdigit(jump_label[0])) break; + + jump_label.erase(jump_label.find(' '), 1); + } + } + + if (!this->WriteNumber(0, jump_label)) { + // sta expects this: sta 0x000000, r0 + if (name == "sta") { + CompilerKit::Detail::print_error( + "invalid combination of opcode and operands.\nHere ->" + line, file); + throw std::runtime_error("invalid_comb_op_ops"); + } + } else { + if (name == "sta" && cpy_jump_label.find("extern_segment ") != std::string::npos) { + CompilerKit::Detail::print_error("invalid usage extern_segment on 'sta', here: " + line, + file); + throw std::runtime_error("invalid_sta_usage"); + } + } + + goto asm_write_label; + } + + // This is the case where we jump to a label, it is also used as a goto. + if (name == "lda" || name == "sta") { + asm_write_label: + if (cpy_jump_label.find('\n') != std::string::npos) + cpy_jump_label.erase(cpy_jump_label.find('\n'), 1); + + if (cpy_jump_label.find("extern_segment") != std::string::npos) { + cpy_jump_label.erase(cpy_jump_label.find("extern_segment"), strlen("extern_segment")); + + if (name == "sta") { + CompilerKit::Detail::print_error("extern_segment is not allowed on a sta operation.", + file); + throw std::runtime_error("extern_segment_sta_op"); + } else { + goto asm_end_label_cpy; + } + } + + if (name == "lda" || name == "sta") { + for (auto& label : kOriginLabel) { + if (cpy_jump_label == label.first) { + if (kVerbose) { + kStdOut << "Assembler64x0: Replace label " << cpy_jump_label + << " to address: " << label.second << std::endl; + } + + CompilerKit::NumberCast64 num(label.second); + + for (auto& num : num.number) { + kBytes.push_back(num); + } + + goto asm_end_label_cpy; + } + } + + if (cpy_jump_label[0] == '0') { + switch (cpy_jump_label[1]) { + case 'x': + case 'o': + case 'b': + if (this->WriteNumber(0, cpy_jump_label)) goto asm_end_label_cpy; + + break; + default: + break; + } + + if (isdigit(cpy_jump_label[0])) { + if (this->WriteNumber(0, cpy_jump_label)) goto asm_end_label_cpy; + + break; + } + } + } + + if (cpy_jump_label.size() < 1) { + CompilerKit::Detail::print_error("label is empty, can't jump on it.", file); + throw std::runtime_error("label_empty"); + } + + /// don't go any further if: + /// load word (ldw) or store word. (stw) + + if (name == "ldw" || name == "stw") break; + + auto mld_reloc_str = std::to_string(cpy_jump_label.size()); + mld_reloc_str += kUndefinedSymbol; + mld_reloc_str += cpy_jump_label; + + bool ignore_back_slash = false; + + for (auto& reloc_chr : mld_reloc_str) { + if (reloc_chr == '\\') { + ignore_back_slash = true; + continue; + } + + if (ignore_back_slash) { + ignore_back_slash = false; + continue; + } + + kBytes.push_back(reloc_chr); + } + + kBytes.push_back('\0'); + goto asm_end_label_cpy; + } + + asm_end_label_cpy: + kOrigin += k64x0IPAlignment; + + break; + } + } + + return true; +} + +// Last rev 13-1-24 diff --git a/src/CompilerKit/src/Assemblers/Assembler+AMD64.cc b/src/CompilerKit/src/Assemblers/Assembler+AMD64.cc deleted file mode 100644 index fcd8c3e..0000000 --- a/src/CompilerKit/src/Assemblers/Assembler+AMD64.cc +++ /dev/null @@ -1,1848 +0,0 @@ -// Copyright 2024-2025, Amlal El Mahrouss (amlal@nekernel.org) -// Licensed under the Apache License, Version 2.0 (See accompanying -// file LICENSE or copy at http://www.apache.org/licenses/LICENSE-2.0) -// Official repository: https://github.com/nekernel-org/nectar - -///////////////////////////////////////////////////////////////////////////////////////// - -/// @file Assembler+AMD64.cc -/// @author Amlal El Mahrouss -/// @brief AMD64 Assembler. -/// REMINDER: when dealing with an undefined symbol use (string -/// size):LinkerFindSymbol:(string) so that ld will look for it. - -///////////////////////////////////////////////////////////////////////////////////////// - -/// BUG: 0 - -/// Feature request: 1 -/// Encode registers in mov, add, xor... - -///////////////////////////////////////////////////////////////////////////////////////// - -#ifndef __ASM_NEED_AMD64__ -#define __ASM_NEED_AMD64__ -#endif - -#define kAssemblerPragmaSymStr "%" -#define kAssemblerPragmaSym '%' - -#include -#include -#include -#include -///////////////////// - -// ANSI ESCAPE CODES - -///////////////////// - -#define kBlank "\e[0;30m" -#define kRed "\e[0;31m" -#define kWhite "\e[0;97m" -#define kYellow "\e[0;33m" - -static char kOutputArch = CompilerKit::kPefArchAMD64; - -static constexpr auto kIPAlignement = 0x1U; -static auto kCounter = 0x1UL; - -static std::uintptr_t kOrigin = kPefBaseOrigin; -static std::vector> kOriginLabel; - -/// @brief keep it simple by default. -static std::int32_t kRegisterBitWidth = 16U; - -static std::vector kAppBytes; - -static CompilerKit::AERecordHeader kCurrentRecord{ - .fName = "", .fKind = CompilerKit::kPefCode, .fSize = 0, .fOffset = 0}; - -static std::vector kRecords; -static std::vector kDefinedSymbols; -static std::vector kUndefinedSymbols; - -static const std::string kUndefinedSymbol = ":UndefinedSymbol:"; - -// \brief forward decl. -static bool asm_read_attributes(std::string line); - -#include - -///////////////////////////////////////////////////////////////////////////////////////// - -// @brief AMD64 assembler entrypoint, the program/module starts here. - -///////////////////////////////////////////////////////////////////////////////////////// - -NECTAR_MODULE(AssemblerMainAMD64) { - //////////////// CPU OPCODES BEGIN //////////////// - - CompilerKit::install_signal(SIGSEGV, CompilerKit::Detail::drvi_crash_handler); - - std::string opcodes_jump[kJumpLimit] = {"ja", "jae", "jb", "jbe", "jc", "je", "jg", "jge", - "jl", "jle", "jna", "jnae", "jnb", "jnbe", "jnc", "jne", - "jng", "jnge", "jnl", "jnle", "jno", "jnp", "jns", "jnz", - "jo", "jp", "jpe", "jpo", "js", "jz"}; - - for (i64_hword_t i = 0; i < kJumpLimit; i++) { - CpuOpcodeAMD64 code{.fName = opcodes_jump[i], - .fOpcode = static_cast(kAsmJumpOpcode + i)}; - kOpcodesAMD64.push_back(code); - } - - CpuOpcodeAMD64 code{.fName = "jcxz", .fOpcode = 0xE3}; - kOpcodesAMD64.push_back(code); - - for (i64_hword_t i = kJumpLimitStandard; i < kJumpLimitStandardLimit; i++) { - CpuOpcodeAMD64 code{.fName = "jmp", .fOpcode = i}; - kOpcodesAMD64.push_back(code); - } - - CpuOpcodeAMD64 lahf{.fName = "lahf", .fOpcode = 0x9F}; - kOpcodesAMD64.push_back(lahf); - - CpuOpcodeAMD64 lds{.fName = "lds", .fOpcode = 0xC5}; - kOpcodesAMD64.push_back(lds); - - CpuOpcodeAMD64 lea{.fName = "lea", .fOpcode = 0x8D}; - kOpcodesAMD64.push_back(lea); - - CpuOpcodeAMD64 nop{.fName = "nop", .fOpcode = 0x90}; - kOpcodesAMD64.push_back(nop); - - //////////////// CPU OPCODES END //////////////// - - for (size_t i = 1; i < argc; ++i) { - if (argv[i][0] == '-') { - if (strcmp(argv[i], "-version") == 0 || strcmp(argv[i], "-v") == 0) { - kStdOut - << "AssemblerAMD64: AMD64 Assembler Driver.\nAssemblerAMD64: Copyright (c) 2024-2026 " - "Amlal El Mahrouss\n"; - kStdOut - << "AssemblerAMD64: This Software is part of the NeKernel project. (nekernel.org)\n"; - return 0; - } else if (strcmp(argv[i], "-help") == 0) { - kStdOut - << "AssemblerAMD64: AMD64 Assembler Driver.\nAssemblerAMD64: Copyright (c) 2024-2026 " - "Amlal El Mahrouss\n"; - kStdOut - << "AssemblerAMD64: This Software is part of the NeKernel project. (nekernel.org)\n"; - kStdOut << "-version: Print program version.\n"; - kStdOut << "-fverbose: Print verbose output.\n"; - kStdOut << "-fbinary: Output as flat binary.\n"; - - return 0; - } else if (strcmp(argv[i], "-fbinary") == 0) { - kOutputAsBinary = true; - continue; - } else if (strcmp(argv[i], "-fverbose") == 0) { - kVerbose = true; - continue; - } - - kStdOut << "AssemblerAMD64: ignore " << argv[i] << "\n"; - continue; - } - - if (!std::filesystem::exists(argv[i])) { - kStdOut << "AssemblerAMD64: can't open: " << argv[i] << std::endl; - goto asm_fail_exit; - } - - std::string object_output(argv[i]); - std::string asm_input(argv[i]); - - for (auto& ext : kAsmFileExts) { - if (object_output.ends_with(ext)) { - object_output.erase(object_output.find(ext), std::strlen(ext)); - break; - } - } - - object_output += kOutputAsBinary ? kBinaryFileExt : kObjectFileExt; - - std::ifstream file_ptr(argv[i]); - std::ofstream file_ptr_out(object_output, std::ofstream::binary); - - kStdOut << "AssemblerAMD64: Assembling: " << argv[i] << "\n"; - - if (file_ptr_out.bad()) { - if (kVerbose) { - kStdOut << "AssemblerAMD64: error: " << strerror(errno) << "\n"; - } - - return 1; - } - - std::string line; - - CompilerKit::AEHeader hdr{0}; - - memset(hdr.fPad, kAENullType, kAEPad); - - hdr.fMagic[0] = kAEMag0; - hdr.fMagic[1] = kAEMag1; - hdr.fMagic[2] = kAEMag2; - hdr.fSize = sizeof(CompilerKit::AEHeader); - hdr.fArch = kOutputArch; - - ///////////////////////////////////////////////////////////////////////////////////////// - - // COMPILATION LOOP - - ///////////////////////////////////////////////////////////////////////////////////////// - - CompilerKit::EncoderAMD64 asm64; - - if (kVerbose) { - kStdOut << "Compiling: " + asm_input << "\n"; - } - - while (std::getline(file_ptr, line)) { - try { - if (auto ln = asm64.CheckLine(line, argv[i]); !ln.empty()) { - CompilerKit::Detail::print_error(ln, argv[i]); - continue; - } - - asm_read_attributes(line); - asm64.WriteLine(line, argv[i]); - } catch (const std::exception& e) { - if (kVerbose) { - std::string what = e.what(); - CompilerKit::Detail::print_warning("exit because of: " + what, "CompilerKit"); - } - - try { - std::filesystem::remove(object_output); - } catch (...) { - } - - goto asm_fail_exit; - } - } - - if (!kOutputAsBinary) { - if (kVerbose) { - kStdOut << "AssemblerAMD64: Writing object file...\n"; - } - - // this is the final step, write everything to the file. - - auto pos = file_ptr_out.tellp(); - - hdr.fCount = kRecords.size() + kUndefinedSymbols.size(); - - file_ptr_out << hdr; - - if (kRecords.empty()) { - kStdErr << "AssemblerAMD64: At least one record is needed to write an object " - "file.\nAssemblerAMD64: Make one using `public_segment .code64 foo_bar`.\n"; - - std::filesystem::remove(object_output); - return 1; - } - - kRecords[kRecords.size() - 1].fSize = kAppBytes.size(); - - std::size_t record_count = 0UL; - - for (auto& rec : kRecords) { - if (kVerbose) kStdOut << "AssemblerAMD64: Wrote record " << rec.fName << " to file...\n"; - - rec.fFlags |= CompilerKit::kKindRelocationAtRuntime; - rec.fOffset = record_count; - ++record_count; - - file_ptr_out << rec; - } - - // increment once again, so that we won't lie about the kUndefinedSymbols. - ++record_count; - - for (auto& sym : kUndefinedSymbols) { - CompilerKit::AERecordHeader undefined_sym{0}; - - if (kVerbose) kStdOut << "AssemblerAMD64: Wrote symbol " << sym << " to file...\n"; - - undefined_sym.fKind = CompilerKit::kKindRelocationAtRuntime; - undefined_sym.fSize = sym.size(); - undefined_sym.fOffset = record_count; - - ++record_count; - - memset(undefined_sym.fPad, kAENullType, kAEPad); - memcpy(undefined_sym.fName, sym.c_str(), sym.size()); - - file_ptr_out << undefined_sym; - - ++kCounter; - } - - auto pos_end = file_ptr_out.tellp(); - - file_ptr_out.seekp(pos); - - hdr.fStartCode = pos_end; - hdr.fCodeSize = kAppBytes.size(); - - file_ptr_out << hdr; - - file_ptr_out.seekp(pos_end); - } else { - if (kVerbose) { - kStdOut << "AssemblerAMD64: Write raw binary...\n"; - } - } - - // byte from byte, we write this. - for (auto& byte : kAppBytes) { - file_ptr_out << reinterpret_cast(&byte)[0]; - } - - if (kVerbose) kStdOut << "AssemblerAMD64: Wrote file with program in it.\n"; - - file_ptr_out.flush(); - file_ptr_out.close(); - - if (kVerbose) kStdOut << "AssemblerAMD64: Exit succeeded.\n"; - - return 0; - } - -asm_fail_exit: - - if (kVerbose) kStdOut << "AssemblerAMD64: Exit failed.\n"; - - return 1; -} - -///////////////////////////////////////////////////////////////////////////////////////// - -// @brief Check for attributes -// returns true if any was found. - -///////////////////////////////////////////////////////////////////////////////////////// - -static bool asm_read_attributes(std::string line) { - // extern_segment is the opposite of public_segment, it signals to the ld - // that we need this symbol. - if (CompilerKit::ast_find_needle(line, "extern_segment")) { - if (kOutputAsBinary) { - CompilerKit::Detail::print_error("Invalid directive in flat binary mode.", "CompilerKit"); - throw std::runtime_error("invalid_extern_segment_bin"); - } - - auto pos = line.find("extern_segment"); - auto name_pos = pos + strlen("extern_segment") + 1; - - if (pos == std::string::npos || name_pos >= line.size()) { - CompilerKit::Detail::print_error("Invalid extern_segment", "Nectar"); - throw std::runtime_error("invalid_extern_segment"); - } - - auto name = line.substr(name_pos); - - if (name.size() == 0) { - CompilerKit::Detail::print_error("Invalid extern_segment", "Nectar"); - throw std::runtime_error("invalid_extern_segment"); - } - - kUndefinedSymbols.push_back(name); - - std::string result = std::to_string(name.size()); - result += kUndefinedSymbol; - - // mangle this - for (char& j : name) { - if (j == ' ' || j == ',') j = '$'; - } - - result += name; - - if (name.find(kPefCode64) != std::string::npos) { - // data is treated as code. - kCurrentRecord.fKind = CompilerKit::kPefCode; - } else if (name.find(kPefData64) != std::string::npos) { - // no code will be executed from here. - kCurrentRecord.fKind = CompilerKit::kPefData; - } else if (name.find(kPefZero64) != std::string::npos) { - // this is a bss section. - kCurrentRecord.fKind = CompilerKit::kPefZero; - } - - // now we can tell the code size of the previous kCurrentRecord. - - if (!kRecords.empty()) kRecords[kRecords.size() - 1].fSize = kAppBytes.size(); - - memset(kCurrentRecord.fName, 0, kAESymbolLen); - memcpy(kCurrentRecord.fName, result.c_str(), result.size()); - - ++kCounter; - - memset(kCurrentRecord.fPad, kAENullType, kAEPad); - - kRecords.emplace_back(kCurrentRecord); - - return true; - } - // public_segment is a special keyword used by AssemblerAMD64 to tell the AE output stage to - // mark this section as a header. it currently supports .code64, .data64 and - // .zero64. - else if (CompilerKit::ast_find_needle(line, "public_segment")) { - if (kOutputAsBinary) { - CompilerKit::Detail::print_error("Invalid directive in flat binary mode.", "CompilerKit"); - throw std::runtime_error("invalid_public_segment_bin"); - } - - auto res_sym_at = (line.find("public_segment") + strlen("public_segment") + 1); - if (res_sym_at > line.size()) { - CompilerKit::Detail::print_error("Invalid symbol for public_segment.", "CompilerKit"); - throw std::runtime_error("invalid_public_segment_symbol"); - } - - auto name = line.substr(res_sym_at); - - std::string name_copy = name; - - for (char& j : name) { - if (j == ' ') j = '$'; - } - - if (std::find(kDefinedSymbols.begin(), kDefinedSymbols.end(), name) != kDefinedSymbols.end()) { - CompilerKit::Detail::print_error("Symbol already defined.", "CompilerKit"); - throw std::runtime_error("invalid_public_segment_bin"); - } - - kDefinedSymbols.push_back(name); - - if (name.find(kPefCode64) != std::string::npos) { - // data is treated as code. - kCurrentRecord.fKind = CompilerKit::kPefCode; - } else if (name.find(kPefData64) != std::string::npos) { - // no code will be executed from here. - kCurrentRecord.fKind = CompilerKit::kPefData; - } else if (name.find(kPefZero64) != std::string::npos) { - // this is a bss section. - kCurrentRecord.fKind = CompilerKit::kPefZero; - } - - while (name_copy.find(" ") != std::string::npos) name_copy.erase(name_copy.find(" "), 1); - - kOriginLabel.push_back(std::make_pair(name_copy, kOrigin)); - ++kOrigin; - - // now we can tell the code size of the previous kCurrentRecord. - - if (!kRecords.empty()) kRecords[kRecords.size() - 1].fSize = kAppBytes.size(); - - memset(kCurrentRecord.fName, 0, kAESymbolLen); - memcpy(kCurrentRecord.fName, name.c_str(), name.size()); - - ++kCounter; - - memset(kCurrentRecord.fPad, kAENullType, kAEPad); - - kRecords.emplace_back(kCurrentRecord); - - return true; - } - - return false; -} - -// \brief algorithms and helpers. - -namespace CompilerKit::Detail::Algorithm { - -// \brief authorize a brief set of characters. -static inline bool is_not_valid(char c) { - if ((isalpha(c) || isdigit(c)) || - ((c == ' ') || (c == '\t') || (c == ',') || (c == '(') || (c == ')') || (c == '"') || - (c == '*') || (c == '\'') || (c == '[') || (c == ']') || (c == '+') || (c == '_') || - (c == ':') || (c == '@') || (c == '.') || (c == '#') || (c == '%') || (c == '~') || - (c == ';'))) - return false; - - return true; -} - -bool is_valid_amd64(std::string str) { - return std::find_if(str.begin(), str.end(), is_not_valid) == str.end(); -} - -} // namespace CompilerKit::Detail::Algorithm - -///////////////////////////////////////////////////////////////////////////////////////// - -// @brief Check for line (syntax check) - -///////////////////////////////////////////////////////////////////////////////////////// - -std::string CompilerKit::EncoderAMD64::CheckLine(std::string line, std::string file) { - std::string err_str; - - if (line.empty() || CompilerKit::ast_find_needle(line, "extern_segment") || - CompilerKit::ast_find_needle(line, "public_segment") || - CompilerKit::ast_find_needle(line, kAssemblerPragmaSymStr) || - CompilerKit::ast_find_needle(line, ";") || line[0] == kAssemblerPragmaSym) { - if (line.find(';') != std::string::npos) { - line.erase(line.find(';')); - } else { - // now check the line for validity - if (!CompilerKit::Detail::Algorithm::is_valid_amd64(line)) { - err_str = "Line contains non valid characters.\nhere -> "; - err_str += line; - } - } - - return err_str; - } - - // check for a valid instruction format. - - if (line.find(',') != std::string::npos) { - if (line.find(',') + 1 == line.size()) { - err_str += "\nInstruction lacks right register, here -> "; - err_str += line.substr(line.find(',')); - - return err_str; - } else { - bool nothing_on_right = true; - - if (line.find(',') + 1 > line.size()) { - err_str += "\nInstruction not complete, here -> "; - err_str += line; - - return err_str; - } - - auto substr = line.substr(line.find(',') + 1); - - for (auto& ch : substr) { - if (ch != ' ' && ch != '\t') { - nothing_on_right = false; - } - } - - // this means we found nothing after that ',' . - if (nothing_on_right) { - err_str += "\nInstruction not complete, here -> "; - err_str += line; - - return err_str; - } - } - } - for (auto& opcodeAMD64 : kOpcodesAMD64) { - if (CompilerKit::ast_find_needle(line, opcodeAMD64.fName)) { - return err_str; - } - } - - err_str += "\nUnrecognized instruction -> " + line; - - return err_str; -} - -/// @brief Read and write a number to the output array. -bool CompilerKit::EncoderAMD64::WriteNumber(const std::size_t& pos, std::string& jump_label) { - if (!isdigit(jump_label[pos])) return false; - - switch (jump_label[pos + 1]) { - case 'x': { - auto res = strtol(jump_label.substr(pos + 2).c_str(), nullptr, 16); - - if (errno != 0) { - return false; - } - - CompilerKit::NumberCast64 num = CompilerKit::NumberCast64(res); - - for (char& i : num.number) { - kAppBytes.push_back(i); - } - - if (kVerbose) { - kStdOut << "AssemblerAMD64: Found a base 16 number here: " << jump_label.substr(pos) - << "\n"; - } - - return true; - } - case 'b': { - auto res = strtol(jump_label.substr(pos + 2).c_str(), nullptr, 2); - - if (errno != 0) { - return false; - } - - CompilerKit::NumberCast64 num = CompilerKit::NumberCast64(res); - - if (kVerbose) { - kStdOut << "AssemblerAMD64: Found a base 2 number here: " << jump_label.substr(pos) << "\n"; - } - - for (char& i : num.number) { - kAppBytes.push_back(i); - } - - return true; - } - case 'o': - case '0': { - auto res = strtol(jump_label.substr(pos + 2).c_str(), nullptr, 8); - - if (errno != 0) { - return false; - } - - CompilerKit::NumberCast64 num = CompilerKit::NumberCast64(res); - - if (kVerbose) { - kStdOut << "AssemblerAMD64: Found a base 8 number here: " << jump_label.substr(pos) << "\n"; - } - - for (char& i : num.number) { - kAppBytes.push_back(i); - } - - return true; - } - default: { - break; - } - } - - /// @note We assume base 10 here. As other cases have failed. - auto res = strtol(jump_label.substr(pos + 1).c_str(), nullptr, 10); - - if (errno != 0) { - return false; - } - - CompilerKit::NumberCast64 num = CompilerKit::NumberCast64(res); - - for (char& i : num.number) { - kAppBytes.push_back(i); - } - - if (kVerbose) { - kStdOut << "AssemblerAMD64: Found a base 10 number here: " << jump_label.substr(pos + 1) - << "\n"; - } - - return true; -} - -bool CompilerKit::EncoderAMD64::WriteNumber32(const std::size_t& pos, std::string& jump_label) { - if (!isdigit(jump_label[pos])) return false; - - switch (jump_label[pos + 1]) { - case 'x': { - auto res = strtol(jump_label.substr(pos + 2).c_str(), nullptr, 16); - - if (errno != 0) { - return false; - } - - CompilerKit::NumberCast32 num = CompilerKit::NumberCast32(res); - - for (char& i : num.number) { - kAppBytes.push_back(i); - } - - if (kVerbose) { - kStdOut << "AssemblerAMD64: Found a base 16 number here: " << jump_label.substr(pos) - << "\n"; - } - - return true; - } - case 'b': { - auto res = strtol(jump_label.substr(pos + 2).c_str(), nullptr, 2); - - if (errno != 0) { - return false; - } - - CompilerKit::NumberCast32 num = CompilerKit::NumberCast32(res); - - if (kVerbose) { - kStdOut << "AssemblerAMD64: Found a base 2 number here: " << jump_label.substr(pos) << "\n"; - } - - for (char& i : num.number) { - kAppBytes.push_back(i); - } - - return true; - } - case 'o': { - auto res = strtol(jump_label.substr(pos + 2).c_str(), nullptr, 8); - - if (errno != 0) { - return false; - } - - CompilerKit::NumberCast32 num = CompilerKit::NumberCast32(res); - - if (kVerbose) { - kStdOut << "AssemblerAMD64: Found a base 8 number here: " << jump_label.substr(pos) << "\n"; - } - - for (char& i : num.number) { - kAppBytes.push_back(i); - } - - return true; - } - default: { - break; - } - } - - auto res = strtol(jump_label.substr(pos + 2).c_str(), nullptr, 10); - - if (errno != 0) { - return false; - } - - CompilerKit::NumberCast32 num = CompilerKit::NumberCast32(res); - - for (char& i : num.number) { - kAppBytes.push_back(i); - } - - if (kVerbose) { - kStdOut << "AssemblerAMD64: Found a base 10 number here: " << jump_label.substr(pos) << "\n"; - } - - return true; -} - -bool CompilerKit::EncoderAMD64::WriteNumber16(const std::size_t& pos, std::string& jump_label) { - if (!isdigit(jump_label[pos])) return false; - - switch (jump_label[pos + 1]) { - case 'x': { - if (auto res = strtol(jump_label.substr(pos + 2).c_str(), nullptr, 16); !res) { - if (errno != 0) { - CompilerKit::Detail::print_error("Invalid hex number: " + jump_label, "CompilerKit"); - throw std::runtime_error("invalid_hex"); - } - } - - CompilerKit::NumberCast16 num = - CompilerKit::NumberCast16(strtol(jump_label.substr(pos + 2).c_str(), nullptr, 16)); - - for (char& i : num.number) { - kAppBytes.push_back(i); - } - - if (kVerbose) { - kStdOut << "AssemblerAMD64: Found a base 16 number here: " << jump_label.substr(pos) - << "\n"; - } - - return true; - } - case 'b': { - if (auto res = strtol(jump_label.substr(pos + 2).c_str(), nullptr, 2); !res) { - if (errno != 0) { - CompilerKit::Detail::print_error("Invalid binary number: " + jump_label, "CompilerKit"); - throw std::runtime_error("invalid_bin"); - } - } - - CompilerKit::NumberCast16 num = - CompilerKit::NumberCast16(strtol(jump_label.substr(pos + 2).c_str(), nullptr, 2)); - - if (kVerbose) { - kStdOut << "AssemblerAMD64: Found a base 2 number here: " << jump_label.substr(pos) << "\n"; - } - - for (char& i : num.number) { - kAppBytes.push_back(i); - } - - return true; - } - case 'o': { - if (auto res = strtol(jump_label.substr(pos + 2).c_str(), nullptr, 8); !res) { - if (errno != 0) { - CompilerKit::Detail::print_error("Invalid octal number: " + jump_label, "CompilerKit"); - throw std::runtime_error("invalid_octal"); - } - } - - CompilerKit::NumberCast16 num = - CompilerKit::NumberCast16(strtol(jump_label.substr(pos + 2).c_str(), nullptr, 8)); - - if (kVerbose) { - kStdOut << "AssemblerAMD64: Found a base 8 number here: " << jump_label.substr(pos) << "\n"; - } - - for (char& i : num.number) { - kAppBytes.push_back(i); - } - - return true; - } - default: { - break; - } - } - - /* check for errno and stuff like that */ - if (auto res = strtol(jump_label.substr(pos).c_str(), nullptr, 10); !res) { - if (errno != 0) { - return false; - } - } - - CompilerKit::NumberCast16 num = - CompilerKit::NumberCast16(strtol(jump_label.substr(pos).c_str(), nullptr, 10)); - - for (char& i : num.number) { - kAppBytes.push_back(i); - } - - if (kVerbose) { - kStdOut << "AssemblerAMD64: Found a base 10 number here: " << jump_label.substr(pos) << "\n"; - } - - return true; -} - -bool CompilerKit::EncoderAMD64::WriteNumber8(const std::size_t& pos, std::string& jump_label) { - if (!isdigit(jump_label[pos])) return false; - - switch (jump_label[pos + 1]) { - case 'x': { - auto res = strtol(jump_label.substr(pos + 2).c_str(), nullptr, 16); - - if (errno != 0) { - return false; - } - - CompilerKit::NumberCast8 num = CompilerKit::NumberCast8(res); - - kAppBytes.push_back(num.number); - - if (kVerbose) { - kStdOut << "AssemblerAMD64: Found a base 16 number here: " << jump_label.substr(pos) - << "\n"; - } - - return true; - } - case 'b': { - auto res = strtol(jump_label.substr(pos + 2).c_str(), nullptr, 2); - - if (errno != 0) { - return false; - } - - CompilerKit::NumberCast8 num = CompilerKit::NumberCast8(res); - - if (kVerbose) { - kStdOut << "AssemblerAMD64: Found a base 2 number here: " << jump_label.substr(pos) << "\n"; - } - - kAppBytes.push_back(num.number); - - return true; - } - case 'o': { - auto res = strtol(jump_label.substr(pos + 2).c_str(), nullptr, 8); - - if (errno != 0) { - return false; - } - - CompilerKit::NumberCast8 num = CompilerKit::NumberCast8(res); - - if (kVerbose) { - kStdOut << "AssemblerAMD64: Found a base 8 number here: " << jump_label.substr(pos) << "\n"; - } - - kAppBytes.push_back(num.number); - - return true; - } - default: { - break; - } - } - - auto res = strtol(jump_label.substr(pos).c_str(), nullptr, 10); - - if (errno != 0) { - return false; - } - - CompilerKit::NumberCast8 num = CompilerKit::NumberCast8(res); - - kAppBytes.push_back(num.number); - - if (kVerbose) { - kStdOut << "AssemblerAMD64: Found a base 10 number here: " << jump_label.substr(pos) << "\n"; - } - - return true; -} - -///////////////////////////////////////////////////////////////////////////////////////// - -// @brief Read and write an instruction to the output array. - -///////////////////////////////////////////////////////////////////////////////////////// - -bool CompilerKit::EncoderAMD64::WriteLine(std::string line, std::string file) { - if (CompilerKit::ast_find_needle(line, "public_segment ")) return true; - - struct RegMapAMD64 { - CompilerKit::STLString fName; - i64_byte_t fModRM; - }; - - std::vector kRegisterList{ - {.fName = "ax", .fModRM = 0x0}, {.fName = "cx", .fModRM = 1}, - {.fName = "dx", .fModRM = 0x2}, {.fName = "bx", .fModRM = 3}, - {.fName = "sp", .fModRM = 0x4}, {.fName = "bp", .fModRM = 5}, - {.fName = "si", .fModRM = 0x6}, {.fName = "di", .fModRM = 7}, - }; - - bool foundInstruction = false; - - for (auto& opcodeAMD64 : kOpcodesAMD64) { - // strict check here - if (CompilerKit::ast_find_needle(line, opcodeAMD64.fName) && - CompilerKit::Detail::Algorithm::is_valid_amd64(line)) { - foundInstruction = true; - std::string name(opcodeAMD64.fName); - - /// Move instruction handler. - if (line.find(name) != std::string::npos) { - if (name == "mov" || name == "xor") { - std::string substr = line.substr(line.find(name) + name.size()); - - uint64_t bits = kRegisterBitWidth; - - if (substr.find(",") == std::string::npos) { - CompilerKit::Detail::print_error("Syntax error: missing right operand.", "CompilerKit"); - throw std::runtime_error("syntax_err"); - } - - /// Handle [reg+n] or [reg-n] memory addressing for any register - if (substr.find('[') != std::string::npos) { - // Parse the memory operand - auto bracketStart = substr.find('['); - auto bracketEnd = substr.find(']'); - - if (bracketStart == std::string::npos || bracketEnd == std::string::npos) { - CompilerKit::Detail::print_error("Syntax error: malformed memory operand.", file); - throw std::runtime_error("syntax_err"); - } - - std::string memOperand = substr.substr(bracketStart + 1, bracketEnd - bracketStart - 1); - - // Register lookup table - struct RegInfo { - const char* name; - i64_byte_t code; - }; - - RegInfo regs64[] = {{"rax", 0}, {"rcx", 1}, {"rdx", 2}, {"rbx", 3}, - {"rsp", 4}, {"rbp", 5}, {"rsi", 6}, {"rdi", 7}}; - - // Find base register in memory operand - i64_byte_t baseReg = 0; - bool foundBase = false; - - for (auto& reg : regs64) { - if (memOperand.find(reg.name) != std::string::npos) { - baseReg = reg.code; - foundBase = true; - break; - } - } - - if (!foundBase) { - CompilerKit::Detail::print_error("Invalid base register in memory operand.", file); - throw std::runtime_error("invalid_base_reg"); - } - - bool isRbp = (baseReg == 5); - bool isRsp = (baseReg == 4); - - // Parse displacement - int32_t displacement = 0; - bool hasDisp = false; - - auto plusPos = memOperand.find('+'); - auto minusPos = memOperand.find('-'); - - if (plusPos != std::string::npos) { - std::string dispStr = memOperand.substr(plusPos + 1); - displacement = static_cast(strtol(dispStr.c_str(), nullptr, 0)); - hasDisp = true; - } else if (minusPos != std::string::npos) { - std::string dispStr = memOperand.substr(minusPos + 1); - displacement = -static_cast(strtol(dispStr.c_str(), nullptr, 0)); - hasDisp = true; - } - - // Determine if destination is memory or register - auto commaPos = substr.find(','); - bool destIsMemory = bracketStart < commaPos; - - // Find register in the other operand - std::string otherOperand; - if (destIsMemory) { - otherOperand = substr.substr(commaPos + 1); - } else { - otherOperand = substr.substr(0, commaPos); - } - - // Remove whitespace - while (!otherOperand.empty() && (otherOperand[0] == ' ' || otherOperand[0] == '\t')) { - otherOperand.erase(0, 1); - } - - // Check for register in other operand - i64_byte_t regCode = 0; - bool foundReg = false; - bool isImmediate = false; - int64_t immValue = 0; - - for (auto& reg : regs64) { - if (otherOperand.find(reg.name) != std::string::npos) { - regCode = reg.code; - foundReg = true; - break; - } - } - - if (!foundReg) { - // Check if it's an immediate value - std::string immStr = otherOperand; - while (!immStr.empty() && (immStr[0] == ' ' || immStr[0] == '\t')) { - immStr.erase(0, 1); - } - if (!immStr.empty() && (isdigit(immStr[0]) || immStr[0] == '-')) { - isImmediate = true; - immValue = strtol(immStr.c_str(), nullptr, 0); - } - } - - // Determine mod field based on displacement size - // mod=00: [reg] no displacement (except rbp which requires disp8) - // mod=01: [reg+disp8] - // mod=10: [reg+disp32] - i64_byte_t mod = 0; - if (!hasDisp && displacement == 0) { - // [rbp] requires disp8 with 0, can't use mod=00 (it means RIP-relative) - mod = isRbp ? 0x01 : 0x00; - } else if (displacement >= -128 && displacement <= 127) { - mod = 0x01; // 8-bit displacement - } else { - mod = 0x02; // 32-bit displacement - } - - if (destIsMemory) { - if (foundReg) { - // mov [reg+n], reg - kAppBytes.emplace_back(0x48); // REX.W - kAppBytes.emplace_back(0x89); // MOV r/m64, r64 - - // ModR/M: mod | reg << 3 | r/m - i64_byte_t modrm = (mod << 6) | (regCode << 3) | baseReg; - kAppBytes.emplace_back(modrm); - - // RSP needs SIB byte - if (isRsp) { - kAppBytes.emplace_back(0x24); // SIB: scale=0, index=4(none), base=4(rsp) - } - } else if (isImmediate) { - // mov qword [reg+n], imm32 - kAppBytes.emplace_back(0x48); // REX.W - kAppBytes.emplace_back(0xC7); // MOV r/m64, imm32 - - // ModR/M: mod | 0 << 3 | r/m (reg field is 0 for this opcode) - i64_byte_t modrm = (mod << 6) | (0 << 3) | baseReg; - kAppBytes.emplace_back(modrm); - - // RSP needs SIB byte - if (isRsp) { - kAppBytes.emplace_back(0x24); - } - } else { - CompilerKit::Detail::print_error("Invalid source operand for mov to memory.", file); - throw std::runtime_error("invalid_operand"); - } - } else { - // mov reg, [reg+n] - kAppBytes.emplace_back(0x48); // REX.W - kAppBytes.emplace_back(0x8B); // MOV r64, r/m64 - - // ModR/M: mod | reg << 3 | r/m - i64_byte_t modrm = (mod << 6) | (regCode << 3) | baseReg; - kAppBytes.emplace_back(modrm); - - // RSP needs SIB byte - if (isRsp) { - kAppBytes.emplace_back(0x24); - } - } - - // Write displacement - if (mod == 0x01) { - // 8-bit displacement - kAppBytes.emplace_back(static_cast(displacement & 0xFF)); - } else if (mod == 0x02) { - // 32-bit displacement - kAppBytes.emplace_back(static_cast(displacement & 0xFF)); - kAppBytes.emplace_back(static_cast((displacement >> 8) & 0xFF)); - kAppBytes.emplace_back(static_cast((displacement >> 16) & 0xFF)); - kAppBytes.emplace_back(static_cast((displacement >> 24) & 0xFF)); - } else if (isRbp) { - // rbp with mod=00 still needs disp8=0 - kAppBytes.emplace_back(0x00); - } - - // Write immediate if present - if (destIsMemory && isImmediate) { - kAppBytes.emplace_back(static_cast(immValue & 0xFF)); - kAppBytes.emplace_back(static_cast((immValue >> 8) & 0xFF)); - kAppBytes.emplace_back(static_cast((immValue >> 16) & 0xFF)); - kAppBytes.emplace_back(static_cast((immValue >> 24) & 0xFF)); - } - - break; - } - - bool onlyOneReg = true; - - std::vector currentRegList; - - for (auto& reg : kRegisterList) { - std::string registerName; - - if (bits == 32) - registerName.push_back('e'); - else if (bits == 64) - registerName.push_back('r'); - else { - CompilerKit::Detail::print_error("Invalid size for register, current bit width is: " + - std::to_string(kRegisterBitWidth), - file); - throw std::runtime_error("invalid_reg_size"); - } - - registerName += reg.fName; - - while (line.find(registerName) != std::string::npos) { - line.erase(line.find(registerName), registerName.size()); - - if (bits == 16) { - if (registerName[0] == 'r') { - CompilerKit::Detail::print_error( - "Invalid size for register, current bit width is: " + - std::to_string(kRegisterBitWidth), - file); - throw std::runtime_error("invalid_reg_size"); - } - } - - currentRegList.push_back({.fName = registerName, .fModRM = reg.fModRM}); - } - } - - if (currentRegList.size() > 1) onlyOneReg = false; - - bool hasRBasedRegs = false; - - if (!onlyOneReg) { - /// very tricky to understand. - /// but this checks for a r8 through r15 register. - if (currentRegList[0].fName[0] == 'r' || currentRegList[1].fName[0] == 'r') { - if (isdigit(currentRegList[0].fName[1]) && isdigit(currentRegList[1].fName[1])) { - kAppBytes.emplace_back(0x4d); - hasRBasedRegs = true; - } else if (isdigit(currentRegList[0].fName[1]) || - isdigit(currentRegList[1].fName[1])) { - kAppBytes.emplace_back(0x4c); - hasRBasedRegs = true; - } - } - } - - if (name == "mov") { - if (bits == 64 || bits == 32) { - if (!hasRBasedRegs && bits >= 32) { - kAppBytes.emplace_back(opcodeAMD64.fOpcode); - } else if (hasRBasedRegs && bits == 32) { - CompilerKit::Detail::print_error("Invalid combination of operands and registers.", - "CompilerKit"); - throw std::runtime_error("comb_op_reg"); - } - - if (!onlyOneReg) kAppBytes.emplace_back(0x89); - } else if (bits == 16) { - if (hasRBasedRegs) { - CompilerKit::Detail::print_error("Invalid combination of operands and registers.", - "CompilerKit"); - throw std::runtime_error("comb_op_reg"); - } else { - kAppBytes.emplace_back(0x66); - kAppBytes.emplace_back(0x89); - } - } - } else { - if (!hasRBasedRegs && bits >= 32) { - kAppBytes.emplace_back(opcodeAMD64.fOpcode); - } - - kAppBytes.emplace_back(0x31); - } - - if (onlyOneReg) { - auto num = GetNumber32(line, ","); - - auto modrm = (0x3 << 6 | currentRegList[0].fModRM); - - kAppBytes.emplace_back(0xC7); // prefixed before placing the modrm and then the number. - kAppBytes.emplace_back(modrm); - - if (name != "xor") { - kAppBytes.emplace_back(num.number[0]); - kAppBytes.emplace_back(num.number[1]); - kAppBytes.emplace_back(num.number[2]); - kAppBytes.emplace_back(num.number[3]); - } - - break; - } - - if (currentRegList[1].fName[0] == 'r' && currentRegList[0].fName[0] == 'e') { - CompilerKit::Detail::print_error("Invalid combination of operands and registers.", - "CompilerKit"); - throw std::runtime_error("comb_op_reg"); - } - - if (currentRegList[0].fName[0] == 'r' && currentRegList[1].fName[0] == 'e') { - CompilerKit::Detail::print_error("Invalid combination of operands and registers.", - "CompilerKit"); - throw std::runtime_error("comb_op_reg"); - } - - if (bits == 16) { - if (currentRegList[0].fName[0] == 'r' || currentRegList[0].fName[0] == 'e') { - CompilerKit::Detail::print_error("Invalid combination of operands and registers.", - "CompilerKit"); - throw std::runtime_error("comb_op_reg"); - } - - if (currentRegList[1].fName[0] == 'r' || currentRegList[1].fName[0] == 'e') { - CompilerKit::Detail::print_error("Invalid combination of operands and registers.", - "CompilerKit"); - throw std::runtime_error("comb_op_reg"); - } - } else { - if (currentRegList[0].fName[0] != 'r' || currentRegList[0].fName[0] == 'e') { - CompilerKit::Detail::print_error("Invalid combination of operands and registers.", - "CompilerKit"); - throw std::runtime_error("comb_op_reg"); - } - - if (currentRegList[1].fName[0] != 'r' || currentRegList[1].fName[0] == 'e') { - CompilerKit::Detail::print_error("Invalid combination of operands and registers.", - "CompilerKit"); - throw std::runtime_error("comb_op_reg"); - } - } - - /// encode register using the modrm encoding. - - auto modrm = (0x3 << 6 | currentRegList[1].fModRM << 3 | currentRegList[0].fModRM); - - kAppBytes.emplace_back(modrm); - - break; - } - - /// Compare instruction handler. - if (name == "cmp") { - std::string substr = line.substr(line.find(name) + name.size()); - - if (substr.find(",") == std::string::npos) { - CompilerKit::Detail::print_error("Syntax error: missing right operand.", "CompilerKit"); - throw std::runtime_error("syntax_err"); - } - - // Register lookup table - struct RegInfo { - const char* name; - i64_byte_t code; - }; - - RegInfo regs64[] = {{"rax", 0}, {"rcx", 1}, {"rdx", 2}, {"rbx", 3}, - {"rsp", 4}, {"rbp", 5}, {"rsi", 6}, {"rdi", 7}}; - - /// Handle [reg+n] memory addressing - if (substr.find('[') != std::string::npos) { - auto bracketStart = substr.find('['); - auto bracketEnd = substr.find(']'); - - if (bracketEnd == std::string::npos) { - CompilerKit::Detail::print_error("Syntax error: malformed memory operand.", file); - throw std::runtime_error("syntax_err"); - } - - std::string memOperand = substr.substr(bracketStart + 1, bracketEnd - bracketStart - 1); - - // Find base register - i64_byte_t baseReg = 0; - bool foundBase = false; - - for (auto& reg : regs64) { - if (memOperand.find(reg.name) != std::string::npos) { - baseReg = reg.code; - foundBase = true; - break; - } - } - - if (!foundBase) { - CompilerKit::Detail::print_error("Invalid base register in memory operand.", file); - throw std::runtime_error("invalid_base_reg"); - } - - bool isRbp = (baseReg == 5); - bool isRsp = (baseReg == 4); - - // Parse displacement - int32_t displacement = 0; - bool hasDisp = false; - - auto plusPos = memOperand.find('+'); - auto minusPos = memOperand.find('-'); - - if (plusPos != std::string::npos) { - std::string dispStr = memOperand.substr(plusPos + 1); - displacement = static_cast(strtol(dispStr.c_str(), nullptr, 0)); - hasDisp = true; - } else if (minusPos != std::string::npos) { - std::string dispStr = memOperand.substr(minusPos + 1); - displacement = -static_cast(strtol(dispStr.c_str(), nullptr, 0)); - hasDisp = true; - } - - auto commaPos = substr.find(','); - bool destIsMemory = bracketStart < commaPos; - - std::string otherOperand; - if (destIsMemory) { - otherOperand = substr.substr(commaPos + 1); - } else { - otherOperand = substr.substr(0, commaPos); - } - - while (!otherOperand.empty() && (otherOperand[0] == ' ' || otherOperand[0] == '\t')) { - otherOperand.erase(0, 1); - } - - i64_byte_t regCode = 0; - bool foundReg = false; - bool isImmediate = false; - int64_t immValue = 0; - - for (auto& reg : regs64) { - if (otherOperand.find(reg.name) != std::string::npos) { - regCode = reg.code; - foundReg = true; - break; - } - } - - if (!foundReg) { - std::string immStr = otherOperand; - while (!immStr.empty() && (immStr[0] == ' ' || immStr[0] == '\t')) { - immStr.erase(0, 1); - } - if (!immStr.empty() && (isdigit(immStr[0]) || immStr[0] == '-')) { - isImmediate = true; - immValue = strtol(immStr.c_str(), nullptr, 0); - } - } - - // Determine mod field - i64_byte_t mod = 0; - if (!hasDisp && displacement == 0) { - mod = isRbp ? 0x01 : 0x00; - } else if (displacement >= -128 && displacement <= 127) { - mod = 0x01; - } else { - mod = 0x02; - } - - if (destIsMemory) { - if (foundReg) { - // cmp [reg+n], reg - kAppBytes.emplace_back(0x48); // REX.W - kAppBytes.emplace_back(0x39); // CMP r/m64, r64 - - i64_byte_t modrm = (mod << 6) | (regCode << 3) | baseReg; - kAppBytes.emplace_back(modrm); - - if (isRsp) { - kAppBytes.emplace_back(0x24); - } - } else if (isImmediate) { - // cmp qword [reg+n], imm32 - kAppBytes.emplace_back(0x48); // REX.W - kAppBytes.emplace_back(0x81); // CMP r/m64, imm32 - - // reg field = 7 for CMP - i64_byte_t modrm = (mod << 6) | (7 << 3) | baseReg; - kAppBytes.emplace_back(modrm); - - if (isRsp) { - kAppBytes.emplace_back(0x24); - } - } - } else { - // cmp reg, [reg+n] - kAppBytes.emplace_back(0x48); // REX.W - kAppBytes.emplace_back(0x3B); // CMP r64, r/m64 - - i64_byte_t modrm = (mod << 6) | (regCode << 3) | baseReg; - kAppBytes.emplace_back(modrm); - - if (isRsp) { - kAppBytes.emplace_back(0x24); - } - } - - // Write displacement - if (mod == 0x01) { - kAppBytes.emplace_back(static_cast(displacement & 0xFF)); - } else if (mod == 0x02) { - kAppBytes.emplace_back(static_cast(displacement & 0xFF)); - kAppBytes.emplace_back(static_cast((displacement >> 8) & 0xFF)); - kAppBytes.emplace_back(static_cast((displacement >> 16) & 0xFF)); - kAppBytes.emplace_back(static_cast((displacement >> 24) & 0xFF)); - } else if (isRbp) { - kAppBytes.emplace_back(0x00); - } - - // Write immediate - if (destIsMemory && isImmediate) { - kAppBytes.emplace_back(static_cast(immValue & 0xFF)); - kAppBytes.emplace_back(static_cast((immValue >> 8) & 0xFF)); - kAppBytes.emplace_back(static_cast((immValue >> 16) & 0xFF)); - kAppBytes.emplace_back(static_cast((immValue >> 24) & 0xFF)); - } - - break; - } - - // Handle register-to-register and register-to-immediate - i64_byte_t reg1Code = 0; - i64_byte_t reg2Code = 0; - bool foundReg1 = false; - bool foundReg2 = false; - bool isImmediate = false; - int64_t immValue = 0; - - auto commaPos = substr.find(','); - std::string leftOperand = substr.substr(0, commaPos); - std::string rightOperand = substr.substr(commaPos + 1); - - while (!leftOperand.empty() && (leftOperand[0] == ' ' || leftOperand[0] == '\t')) { - leftOperand.erase(0, 1); - } - while (!rightOperand.empty() && (rightOperand[0] == ' ' || rightOperand[0] == '\t')) { - rightOperand.erase(0, 1); - } - - for (auto& reg : regs64) { - if (leftOperand.find(reg.name) != std::string::npos) { - reg1Code = reg.code; - foundReg1 = true; - break; - } - } - - for (auto& reg : regs64) { - if (rightOperand.find(reg.name) != std::string::npos) { - reg2Code = reg.code; - foundReg2 = true; - break; - } - } - - if (!foundReg2) { - if (!rightOperand.empty() && (isdigit(rightOperand[0]) || rightOperand[0] == '-')) { - isImmediate = true; - immValue = strtol(rightOperand.c_str(), nullptr, 0); - } - } - - if (foundReg1 && foundReg2) { - // cmp reg1, reg2 - kAppBytes.emplace_back(0x48); // REX.W - kAppBytes.emplace_back(0x39); // CMP r/m64, r64 - - i64_byte_t modrm = (0x3 << 6) | (reg2Code << 3) | reg1Code; - kAppBytes.emplace_back(modrm); - } else if (foundReg1 && isImmediate) { - // cmp reg, imm - kAppBytes.emplace_back(0x48); // REX.W - kAppBytes.emplace_back(0x81); // CMP r/m64, imm32 - - // reg field = 7 for CMP - i64_byte_t modrm = (0x3 << 6) | (7 << 3) | reg1Code; - kAppBytes.emplace_back(modrm); - - kAppBytes.emplace_back(static_cast(immValue & 0xFF)); - kAppBytes.emplace_back(static_cast((immValue >> 8) & 0xFF)); - kAppBytes.emplace_back(static_cast((immValue >> 16) & 0xFF)); - kAppBytes.emplace_back(static_cast((immValue >> 24) & 0xFF)); - } else { - CompilerKit::Detail::print_error("Invalid operands for cmp instruction.", file); - throw std::runtime_error("invalid_cmp_operands"); - } - - break; - } - - /// LEA instruction handler. - if (name == "lea") { - std::string substr = line.substr(line.find(name) + name.size()); - - // Remove leading whitespace - while (!substr.empty() && (substr[0] == ' ' || substr[0] == '\t')) { - substr.erase(0, 1); - } - - if (substr.find(",") == std::string::npos || substr.find('[') == std::string::npos) { - CompilerKit::Detail::print_error("Syntax error: lea requires reg, [mem] format.", file); - throw std::runtime_error("syntax_err"); - } - - // Register lookup table - struct RegInfo { - const char* name; - i64_byte_t code; - }; - - RegInfo regs64[] = {{"rax", 0}, {"rcx", 1}, {"rdx", 2}, {"rbx", 3}, - {"rsp", 4}, {"rbp", 5}, {"rsi", 6}, {"rdi", 7}}; - - auto commaPos = substr.find(','); - std::string destOperand = substr.substr(0, commaPos); - std::string srcOperand = substr.substr(commaPos + 1); - - // Remove whitespace - while (!destOperand.empty() && (destOperand[0] == ' ' || destOperand[0] == '\t')) { - destOperand.erase(0, 1); - } - while (!srcOperand.empty() && (srcOperand[0] == ' ' || srcOperand[0] == '\t')) { - srcOperand.erase(0, 1); - } - - // Find destination register - i64_byte_t destReg = 0; - bool foundDest = false; - - for (auto& reg : regs64) { - if (destOperand.find(reg.name) != std::string::npos) { - destReg = reg.code; - foundDest = true; - break; - } - } - - if (!foundDest) { - CompilerKit::Detail::print_error("Invalid destination register for lea.", file); - throw std::runtime_error("invalid_dest_reg"); - } - - // Parse memory operand [base+disp] or [base-disp] - auto bracketStart = srcOperand.find('['); - auto bracketEnd = srcOperand.find(']'); - - if (bracketStart == std::string::npos || bracketEnd == std::string::npos) { - CompilerKit::Detail::print_error("Syntax error: malformed memory operand for lea.", - file); - throw std::runtime_error("syntax_err"); - } - - std::string memOperand = - srcOperand.substr(bracketStart + 1, bracketEnd - bracketStart - 1); - - // Find base register - i64_byte_t baseReg = 0; - bool foundBase = false; - - for (auto& reg : regs64) { - if (memOperand.find(reg.name) != std::string::npos) { - baseReg = reg.code; - foundBase = true; - break; - } - } - - if (!foundBase) { - CompilerKit::Detail::print_error("Invalid base register in memory operand for lea.", - file); - throw std::runtime_error("invalid_base_reg"); - } - - bool isRbp = (baseReg == 5); - bool isRsp = (baseReg == 4); - int32_t displacement = 0; - bool hasDisp = false; - - // Look for +/- displacement - auto plusPos = memOperand.find('+'); - auto minusPos = memOperand.find('-'); - - if (plusPos != std::string::npos) { - std::string dispStr = memOperand.substr(plusPos + 1); - displacement = static_cast(strtol(dispStr.c_str(), nullptr, 0)); - hasDisp = true; - } else if (minusPos != std::string::npos) { - std::string dispStr = memOperand.substr(minusPos + 1); - displacement = -static_cast(strtol(dispStr.c_str(), nullptr, 0)); - hasDisp = true; - } - - // Determine mod field - i64_byte_t mod = 0x00; - if (hasDisp || isRbp) { - if (displacement >= -128 && displacement <= 127) { - mod = 0x01; // 8-bit displacement - } else { - mod = 0x02; // 32-bit displacement - } - } - - // Emit REX.W prefix for 64-bit - kAppBytes.emplace_back(0x48); - - // Emit LEA opcode - kAppBytes.emplace_back(0x8D); - - // Emit ModR/M byte - i64_byte_t modrm = (mod << 6) | (destReg << 3) | baseReg; - kAppBytes.emplace_back(modrm); - - // RSP needs SIB byte - if (isRsp) { - kAppBytes.emplace_back(0x24); - } - - // Emit displacement - if (mod == 0x01) { - kAppBytes.emplace_back(static_cast(displacement & 0xFF)); - } else if (mod == 0x02) { - kAppBytes.emplace_back(static_cast(displacement & 0xFF)); - kAppBytes.emplace_back(static_cast((displacement >> 8) & 0xFF)); - kAppBytes.emplace_back(static_cast((displacement >> 16) & 0xFF)); - kAppBytes.emplace_back(static_cast((displacement >> 24) & 0xFF)); - } else if (isRbp) { - // RBP with no displacement needs [rbp+0] - kAppBytes.emplace_back(0x00); - } - - break; - } - - /// Push instruction handler. - if (name == "push" || name == "pop") { - std::string substr = line.substr(line.find(name) + name.size()); - - // Remove leading whitespace - while (!substr.empty() && (substr[0] == ' ' || substr[0] == '\t')) { - substr.erase(0, 1); - } - - i64_byte_t baseOpcode = (name == "push") ? kAsmPushOpcode : kAsmPopOpcode; - bool found = false; - - // Check for extended registers r8-r15 - if (substr.size() >= 2 && substr[0] == 'r' && isdigit(substr[1])) { - int regNum = 0; - - if (substr.size() >= 3 && isdigit(substr[2])) { - regNum = (substr[1] - '0') * 10 + (substr[2] - '0'); - } else { - regNum = substr[1] - '0'; - } - - if (regNum >= 8 && regNum <= 15) { - // REX.B prefix for r8-r15 - kAppBytes.emplace_back(0x41); - kAppBytes.emplace_back(baseOpcode + (regNum - 8)); - found = true; - } - } - - // Check for standard 64-bit registers rax-rdi - if (!found) { - struct RegPushPop { - const char* name; - i64_byte_t offset; - }; - - RegPushPop regs[] = {{"rax", 0}, {"rcx", 1}, {"rdx", 2}, {"rbx", 3}, - {"rsp", 4}, {"rbp", 5}, {"rsi", 6}, {"rdi", 7}}; - - for (auto& reg : regs) { - if (substr.find(reg.name) != std::string::npos) { - kAppBytes.emplace_back(baseOpcode + reg.offset); - found = true; - break; - } - } - } - - if (!found) { - if (isnumber(substr[0])) { - kAppBytes.emplace_back(name == "push" ? 0x68 : 0x8F); - - // push imm always takes a 32-bit immediate (sign-extended in 64-bit mode) - // Parse the immediate value without adding kOrigin - long imm = 0; - if (substr.size() > 2 && substr[0] == '0' && substr[1] == 'x') { - imm = strtol(substr.c_str() + 2, nullptr, 16); - } else if (substr.size() > 2 && substr[0] == '0' && substr[1] == 'b') { - imm = strtol(substr.c_str() + 2, nullptr, 2); - } else if (substr.size() > 2 && substr[0] == '0' && substr[1] == 'o') { - imm = strtol(substr.c_str() + 2, nullptr, 8); - } else { - imm = strtol(substr.c_str(), nullptr, 10); - } - - CompilerKit::NumberCast32 num(imm); - if (kRegisterBitWidth == 64 || kRegisterBitWidth == 32) { - kAppBytes.emplace_back(num.number[0]); - kAppBytes.emplace_back(num.number[1]); - kAppBytes.emplace_back(num.number[2]); - kAppBytes.emplace_back(num.number[3]); - } else if (kRegisterBitWidth == 16) { - kAppBytes.emplace_back(num.number[0]); - kAppBytes.emplace_back(num.number[1]); - } - - break; - } - - CompilerKit::Detail::print_error("Invalid operand for " + name + ": " + substr, - "CompilerKit"); - throw std::runtime_error("invalid_push_pop_operand"); - } - - break; - } - } - - if (name == "int" || name == "into" || name == "intd") { - kAppBytes.emplace_back(opcodeAMD64.fOpcode); - this->WriteNumber8(line.find(name) + name.size() + 1, line); - - break; - } else if (name == "jmp" || name == "call") { - kAppBytes.emplace_back(opcodeAMD64.fOpcode); - - if (auto it = std::find(kUndefinedSymbols.begin(), kUndefinedSymbols.end(), name); - it != kUndefinedSymbols.end()) { - auto number_str = std::to_string(kOrigin + name.size()); - this->WriteNumber(0, number_str); - kOrigin += name.size(); - } - - if (kRegisterBitWidth == 64) { - this->WriteNumber(line.find(name) + name.size() + 1, line); - } else { - this->WriteNumber32(line.find(name) + name.size() + 1, line); - } - - break; - } - - if (name == "syscall") { - kAppBytes.emplace_back(opcodeAMD64.fOpcode); - kAppBytes.emplace_back(0x05); - break; - } else { - kAppBytes.emplace_back(opcodeAMD64.fOpcode); - - break; - } - } - } - - if (line[0] == kAssemblerPragmaSym) { - if (foundInstruction) { - CompilerKit::Detail::print_error("Syntax error: " + line, file); - throw std::runtime_error("syntax_err"); - } - - if (line.find("bits 64") != std::string::npos) { - kRegisterBitWidth = 64U; - } else if (line.find("bits 32") != std::string::npos) { - kRegisterBitWidth = 32U; - } else if (line.find("bits 16") != std::string::npos) { - kRegisterBitWidth = 16U; - } - - if (auto org_pos = line.find("org"); org_pos != std::string::npos) { - auto value_pos = org_pos + strlen("org") + 1; - - if (value_pos >= line.size()) { - CompilerKit::Detail::print_error("Invalid org directive", "CompilerKit"); - throw std::runtime_error("invalid_org"); - } - - size_t base[] = {10, 16, 2, 8}; - - for (size_t i = 0; i < 4; i++) { - if (kOrigin = strtol(line.substr(value_pos).c_str(), nullptr, base[i]); kOrigin) { - if (errno != 0) { - continue; - } else { - if (kVerbose) { - kStdOut << "AssemblerAMD64: Origin Set: " << kOrigin << std::endl; - } - - break; - } - } - } - } - } - /// write a dword - else if (auto pos = line.find(".dword"); pos != std::string::npos) { - this->WriteNumber32(pos + strlen(".dword") + 1, line); - } - /// write a long - else if (auto pos = line.find(".long"); pos != std::string::npos) { - this->WriteNumber(pos + strlen(".long") + 1, line); - } - /// write a 16-bit number - else if (auto pos = line.find(".word"); pos != std::string::npos) { - this->WriteNumber16(pos + strlen(".word") + 1, line); - } - - kOrigin += kIPAlignement; - - return true; -} - -// Last rev 13-1-24 diff --git a/src/CompilerKit/src/Assemblers/Assembler+AMD64.cpp b/src/CompilerKit/src/Assemblers/Assembler+AMD64.cpp new file mode 100644 index 0000000..fcd8c3e --- /dev/null +++ b/src/CompilerKit/src/Assemblers/Assembler+AMD64.cpp @@ -0,0 +1,1848 @@ +// Copyright 2024-2025, Amlal El Mahrouss (amlal@nekernel.org) +// Licensed under the Apache License, Version 2.0 (See accompanying +// file LICENSE or copy at http://www.apache.org/licenses/LICENSE-2.0) +// Official repository: https://github.com/nekernel-org/nectar + +///////////////////////////////////////////////////////////////////////////////////////// + +/// @file Assembler+AMD64.cc +/// @author Amlal El Mahrouss +/// @brief AMD64 Assembler. +/// REMINDER: when dealing with an undefined symbol use (string +/// size):LinkerFindSymbol:(string) so that ld will look for it. + +///////////////////////////////////////////////////////////////////////////////////////// + +/// BUG: 0 + +/// Feature request: 1 +/// Encode registers in mov, add, xor... + +///////////////////////////////////////////////////////////////////////////////////////// + +#ifndef __ASM_NEED_AMD64__ +#define __ASM_NEED_AMD64__ +#endif + +#define kAssemblerPragmaSymStr "%" +#define kAssemblerPragmaSym '%' + +#include +#include +#include +#include +///////////////////// + +// ANSI ESCAPE CODES + +///////////////////// + +#define kBlank "\e[0;30m" +#define kRed "\e[0;31m" +#define kWhite "\e[0;97m" +#define kYellow "\e[0;33m" + +static char kOutputArch = CompilerKit::kPefArchAMD64; + +static constexpr auto kIPAlignement = 0x1U; +static auto kCounter = 0x1UL; + +static std::uintptr_t kOrigin = kPefBaseOrigin; +static std::vector> kOriginLabel; + +/// @brief keep it simple by default. +static std::int32_t kRegisterBitWidth = 16U; + +static std::vector kAppBytes; + +static CompilerKit::AERecordHeader kCurrentRecord{ + .fName = "", .fKind = CompilerKit::kPefCode, .fSize = 0, .fOffset = 0}; + +static std::vector kRecords; +static std::vector kDefinedSymbols; +static std::vector kUndefinedSymbols; + +static const std::string kUndefinedSymbol = ":UndefinedSymbol:"; + +// \brief forward decl. +static bool asm_read_attributes(std::string line); + +#include + +///////////////////////////////////////////////////////////////////////////////////////// + +// @brief AMD64 assembler entrypoint, the program/module starts here. + +///////////////////////////////////////////////////////////////////////////////////////// + +NECTAR_MODULE(AssemblerMainAMD64) { + //////////////// CPU OPCODES BEGIN //////////////// + + CompilerKit::install_signal(SIGSEGV, CompilerKit::Detail::drvi_crash_handler); + + std::string opcodes_jump[kJumpLimit] = {"ja", "jae", "jb", "jbe", "jc", "je", "jg", "jge", + "jl", "jle", "jna", "jnae", "jnb", "jnbe", "jnc", "jne", + "jng", "jnge", "jnl", "jnle", "jno", "jnp", "jns", "jnz", + "jo", "jp", "jpe", "jpo", "js", "jz"}; + + for (i64_hword_t i = 0; i < kJumpLimit; i++) { + CpuOpcodeAMD64 code{.fName = opcodes_jump[i], + .fOpcode = static_cast(kAsmJumpOpcode + i)}; + kOpcodesAMD64.push_back(code); + } + + CpuOpcodeAMD64 code{.fName = "jcxz", .fOpcode = 0xE3}; + kOpcodesAMD64.push_back(code); + + for (i64_hword_t i = kJumpLimitStandard; i < kJumpLimitStandardLimit; i++) { + CpuOpcodeAMD64 code{.fName = "jmp", .fOpcode = i}; + kOpcodesAMD64.push_back(code); + } + + CpuOpcodeAMD64 lahf{.fName = "lahf", .fOpcode = 0x9F}; + kOpcodesAMD64.push_back(lahf); + + CpuOpcodeAMD64 lds{.fName = "lds", .fOpcode = 0xC5}; + kOpcodesAMD64.push_back(lds); + + CpuOpcodeAMD64 lea{.fName = "lea", .fOpcode = 0x8D}; + kOpcodesAMD64.push_back(lea); + + CpuOpcodeAMD64 nop{.fName = "nop", .fOpcode = 0x90}; + kOpcodesAMD64.push_back(nop); + + //////////////// CPU OPCODES END //////////////// + + for (size_t i = 1; i < argc; ++i) { + if (argv[i][0] == '-') { + if (strcmp(argv[i], "-version") == 0 || strcmp(argv[i], "-v") == 0) { + kStdOut + << "AssemblerAMD64: AMD64 Assembler Driver.\nAssemblerAMD64: Copyright (c) 2024-2026 " + "Amlal El Mahrouss\n"; + kStdOut + << "AssemblerAMD64: This Software is part of the NeKernel project. (nekernel.org)\n"; + return 0; + } else if (strcmp(argv[i], "-help") == 0) { + kStdOut + << "AssemblerAMD64: AMD64 Assembler Driver.\nAssemblerAMD64: Copyright (c) 2024-2026 " + "Amlal El Mahrouss\n"; + kStdOut + << "AssemblerAMD64: This Software is part of the NeKernel project. (nekernel.org)\n"; + kStdOut << "-version: Print program version.\n"; + kStdOut << "-fverbose: Print verbose output.\n"; + kStdOut << "-fbinary: Output as flat binary.\n"; + + return 0; + } else if (strcmp(argv[i], "-fbinary") == 0) { + kOutputAsBinary = true; + continue; + } else if (strcmp(argv[i], "-fverbose") == 0) { + kVerbose = true; + continue; + } + + kStdOut << "AssemblerAMD64: ignore " << argv[i] << "\n"; + continue; + } + + if (!std::filesystem::exists(argv[i])) { + kStdOut << "AssemblerAMD64: can't open: " << argv[i] << std::endl; + goto asm_fail_exit; + } + + std::string object_output(argv[i]); + std::string asm_input(argv[i]); + + for (auto& ext : kAsmFileExts) { + if (object_output.ends_with(ext)) { + object_output.erase(object_output.find(ext), std::strlen(ext)); + break; + } + } + + object_output += kOutputAsBinary ? kBinaryFileExt : kObjectFileExt; + + std::ifstream file_ptr(argv[i]); + std::ofstream file_ptr_out(object_output, std::ofstream::binary); + + kStdOut << "AssemblerAMD64: Assembling: " << argv[i] << "\n"; + + if (file_ptr_out.bad()) { + if (kVerbose) { + kStdOut << "AssemblerAMD64: error: " << strerror(errno) << "\n"; + } + + return 1; + } + + std::string line; + + CompilerKit::AEHeader hdr{0}; + + memset(hdr.fPad, kAENullType, kAEPad); + + hdr.fMagic[0] = kAEMag0; + hdr.fMagic[1] = kAEMag1; + hdr.fMagic[2] = kAEMag2; + hdr.fSize = sizeof(CompilerKit::AEHeader); + hdr.fArch = kOutputArch; + + ///////////////////////////////////////////////////////////////////////////////////////// + + // COMPILATION LOOP + + ///////////////////////////////////////////////////////////////////////////////////////// + + CompilerKit::EncoderAMD64 asm64; + + if (kVerbose) { + kStdOut << "Compiling: " + asm_input << "\n"; + } + + while (std::getline(file_ptr, line)) { + try { + if (auto ln = asm64.CheckLine(line, argv[i]); !ln.empty()) { + CompilerKit::Detail::print_error(ln, argv[i]); + continue; + } + + asm_read_attributes(line); + asm64.WriteLine(line, argv[i]); + } catch (const std::exception& e) { + if (kVerbose) { + std::string what = e.what(); + CompilerKit::Detail::print_warning("exit because of: " + what, "CompilerKit"); + } + + try { + std::filesystem::remove(object_output); + } catch (...) { + } + + goto asm_fail_exit; + } + } + + if (!kOutputAsBinary) { + if (kVerbose) { + kStdOut << "AssemblerAMD64: Writing object file...\n"; + } + + // this is the final step, write everything to the file. + + auto pos = file_ptr_out.tellp(); + + hdr.fCount = kRecords.size() + kUndefinedSymbols.size(); + + file_ptr_out << hdr; + + if (kRecords.empty()) { + kStdErr << "AssemblerAMD64: At least one record is needed to write an object " + "file.\nAssemblerAMD64: Make one using `public_segment .code64 foo_bar`.\n"; + + std::filesystem::remove(object_output); + return 1; + } + + kRecords[kRecords.size() - 1].fSize = kAppBytes.size(); + + std::size_t record_count = 0UL; + + for (auto& rec : kRecords) { + if (kVerbose) kStdOut << "AssemblerAMD64: Wrote record " << rec.fName << " to file...\n"; + + rec.fFlags |= CompilerKit::kKindRelocationAtRuntime; + rec.fOffset = record_count; + ++record_count; + + file_ptr_out << rec; + } + + // increment once again, so that we won't lie about the kUndefinedSymbols. + ++record_count; + + for (auto& sym : kUndefinedSymbols) { + CompilerKit::AERecordHeader undefined_sym{0}; + + if (kVerbose) kStdOut << "AssemblerAMD64: Wrote symbol " << sym << " to file...\n"; + + undefined_sym.fKind = CompilerKit::kKindRelocationAtRuntime; + undefined_sym.fSize = sym.size(); + undefined_sym.fOffset = record_count; + + ++record_count; + + memset(undefined_sym.fPad, kAENullType, kAEPad); + memcpy(undefined_sym.fName, sym.c_str(), sym.size()); + + file_ptr_out << undefined_sym; + + ++kCounter; + } + + auto pos_end = file_ptr_out.tellp(); + + file_ptr_out.seekp(pos); + + hdr.fStartCode = pos_end; + hdr.fCodeSize = kAppBytes.size(); + + file_ptr_out << hdr; + + file_ptr_out.seekp(pos_end); + } else { + if (kVerbose) { + kStdOut << "AssemblerAMD64: Write raw binary...\n"; + } + } + + // byte from byte, we write this. + for (auto& byte : kAppBytes) { + file_ptr_out << reinterpret_cast(&byte)[0]; + } + + if (kVerbose) kStdOut << "AssemblerAMD64: Wrote file with program in it.\n"; + + file_ptr_out.flush(); + file_ptr_out.close(); + + if (kVerbose) kStdOut << "AssemblerAMD64: Exit succeeded.\n"; + + return 0; + } + +asm_fail_exit: + + if (kVerbose) kStdOut << "AssemblerAMD64: Exit failed.\n"; + + return 1; +} + +///////////////////////////////////////////////////////////////////////////////////////// + +// @brief Check for attributes +// returns true if any was found. + +///////////////////////////////////////////////////////////////////////////////////////// + +static bool asm_read_attributes(std::string line) { + // extern_segment is the opposite of public_segment, it signals to the ld + // that we need this symbol. + if (CompilerKit::ast_find_needle(line, "extern_segment")) { + if (kOutputAsBinary) { + CompilerKit::Detail::print_error("Invalid directive in flat binary mode.", "CompilerKit"); + throw std::runtime_error("invalid_extern_segment_bin"); + } + + auto pos = line.find("extern_segment"); + auto name_pos = pos + strlen("extern_segment") + 1; + + if (pos == std::string::npos || name_pos >= line.size()) { + CompilerKit::Detail::print_error("Invalid extern_segment", "Nectar"); + throw std::runtime_error("invalid_extern_segment"); + } + + auto name = line.substr(name_pos); + + if (name.size() == 0) { + CompilerKit::Detail::print_error("Invalid extern_segment", "Nectar"); + throw std::runtime_error("invalid_extern_segment"); + } + + kUndefinedSymbols.push_back(name); + + std::string result = std::to_string(name.size()); + result += kUndefinedSymbol; + + // mangle this + for (char& j : name) { + if (j == ' ' || j == ',') j = '$'; + } + + result += name; + + if (name.find(kPefCode64) != std::string::npos) { + // data is treated as code. + kCurrentRecord.fKind = CompilerKit::kPefCode; + } else if (name.find(kPefData64) != std::string::npos) { + // no code will be executed from here. + kCurrentRecord.fKind = CompilerKit::kPefData; + } else if (name.find(kPefZero64) != std::string::npos) { + // this is a bss section. + kCurrentRecord.fKind = CompilerKit::kPefZero; + } + + // now we can tell the code size of the previous kCurrentRecord. + + if (!kRecords.empty()) kRecords[kRecords.size() - 1].fSize = kAppBytes.size(); + + memset(kCurrentRecord.fName, 0, kAESymbolLen); + memcpy(kCurrentRecord.fName, result.c_str(), result.size()); + + ++kCounter; + + memset(kCurrentRecord.fPad, kAENullType, kAEPad); + + kRecords.emplace_back(kCurrentRecord); + + return true; + } + // public_segment is a special keyword used by AssemblerAMD64 to tell the AE output stage to + // mark this section as a header. it currently supports .code64, .data64 and + // .zero64. + else if (CompilerKit::ast_find_needle(line, "public_segment")) { + if (kOutputAsBinary) { + CompilerKit::Detail::print_error("Invalid directive in flat binary mode.", "CompilerKit"); + throw std::runtime_error("invalid_public_segment_bin"); + } + + auto res_sym_at = (line.find("public_segment") + strlen("public_segment") + 1); + if (res_sym_at > line.size()) { + CompilerKit::Detail::print_error("Invalid symbol for public_segment.", "CompilerKit"); + throw std::runtime_error("invalid_public_segment_symbol"); + } + + auto name = line.substr(res_sym_at); + + std::string name_copy = name; + + for (char& j : name) { + if (j == ' ') j = '$'; + } + + if (std::find(kDefinedSymbols.begin(), kDefinedSymbols.end(), name) != kDefinedSymbols.end()) { + CompilerKit::Detail::print_error("Symbol already defined.", "CompilerKit"); + throw std::runtime_error("invalid_public_segment_bin"); + } + + kDefinedSymbols.push_back(name); + + if (name.find(kPefCode64) != std::string::npos) { + // data is treated as code. + kCurrentRecord.fKind = CompilerKit::kPefCode; + } else if (name.find(kPefData64) != std::string::npos) { + // no code will be executed from here. + kCurrentRecord.fKind = CompilerKit::kPefData; + } else if (name.find(kPefZero64) != std::string::npos) { + // this is a bss section. + kCurrentRecord.fKind = CompilerKit::kPefZero; + } + + while (name_copy.find(" ") != std::string::npos) name_copy.erase(name_copy.find(" "), 1); + + kOriginLabel.push_back(std::make_pair(name_copy, kOrigin)); + ++kOrigin; + + // now we can tell the code size of the previous kCurrentRecord. + + if (!kRecords.empty()) kRecords[kRecords.size() - 1].fSize = kAppBytes.size(); + + memset(kCurrentRecord.fName, 0, kAESymbolLen); + memcpy(kCurrentRecord.fName, name.c_str(), name.size()); + + ++kCounter; + + memset(kCurrentRecord.fPad, kAENullType, kAEPad); + + kRecords.emplace_back(kCurrentRecord); + + return true; + } + + return false; +} + +// \brief algorithms and helpers. + +namespace CompilerKit::Detail::Algorithm { + +// \brief authorize a brief set of characters. +static inline bool is_not_valid(char c) { + if ((isalpha(c) || isdigit(c)) || + ((c == ' ') || (c == '\t') || (c == ',') || (c == '(') || (c == ')') || (c == '"') || + (c == '*') || (c == '\'') || (c == '[') || (c == ']') || (c == '+') || (c == '_') || + (c == ':') || (c == '@') || (c == '.') || (c == '#') || (c == '%') || (c == '~') || + (c == ';'))) + return false; + + return true; +} + +bool is_valid_amd64(std::string str) { + return std::find_if(str.begin(), str.end(), is_not_valid) == str.end(); +} + +} // namespace CompilerKit::Detail::Algorithm + +///////////////////////////////////////////////////////////////////////////////////////// + +// @brief Check for line (syntax check) + +///////////////////////////////////////////////////////////////////////////////////////// + +std::string CompilerKit::EncoderAMD64::CheckLine(std::string line, std::string file) { + std::string err_str; + + if (line.empty() || CompilerKit::ast_find_needle(line, "extern_segment") || + CompilerKit::ast_find_needle(line, "public_segment") || + CompilerKit::ast_find_needle(line, kAssemblerPragmaSymStr) || + CompilerKit::ast_find_needle(line, ";") || line[0] == kAssemblerPragmaSym) { + if (line.find(';') != std::string::npos) { + line.erase(line.find(';')); + } else { + // now check the line for validity + if (!CompilerKit::Detail::Algorithm::is_valid_amd64(line)) { + err_str = "Line contains non valid characters.\nhere -> "; + err_str += line; + } + } + + return err_str; + } + + // check for a valid instruction format. + + if (line.find(',') != std::string::npos) { + if (line.find(',') + 1 == line.size()) { + err_str += "\nInstruction lacks right register, here -> "; + err_str += line.substr(line.find(',')); + + return err_str; + } else { + bool nothing_on_right = true; + + if (line.find(',') + 1 > line.size()) { + err_str += "\nInstruction not complete, here -> "; + err_str += line; + + return err_str; + } + + auto substr = line.substr(line.find(',') + 1); + + for (auto& ch : substr) { + if (ch != ' ' && ch != '\t') { + nothing_on_right = false; + } + } + + // this means we found nothing after that ',' . + if (nothing_on_right) { + err_str += "\nInstruction not complete, here -> "; + err_str += line; + + return err_str; + } + } + } + for (auto& opcodeAMD64 : kOpcodesAMD64) { + if (CompilerKit::ast_find_needle(line, opcodeAMD64.fName)) { + return err_str; + } + } + + err_str += "\nUnrecognized instruction -> " + line; + + return err_str; +} + +/// @brief Read and write a number to the output array. +bool CompilerKit::EncoderAMD64::WriteNumber(const std::size_t& pos, std::string& jump_label) { + if (!isdigit(jump_label[pos])) return false; + + switch (jump_label[pos + 1]) { + case 'x': { + auto res = strtol(jump_label.substr(pos + 2).c_str(), nullptr, 16); + + if (errno != 0) { + return false; + } + + CompilerKit::NumberCast64 num = CompilerKit::NumberCast64(res); + + for (char& i : num.number) { + kAppBytes.push_back(i); + } + + if (kVerbose) { + kStdOut << "AssemblerAMD64: Found a base 16 number here: " << jump_label.substr(pos) + << "\n"; + } + + return true; + } + case 'b': { + auto res = strtol(jump_label.substr(pos + 2).c_str(), nullptr, 2); + + if (errno != 0) { + return false; + } + + CompilerKit::NumberCast64 num = CompilerKit::NumberCast64(res); + + if (kVerbose) { + kStdOut << "AssemblerAMD64: Found a base 2 number here: " << jump_label.substr(pos) << "\n"; + } + + for (char& i : num.number) { + kAppBytes.push_back(i); + } + + return true; + } + case 'o': + case '0': { + auto res = strtol(jump_label.substr(pos + 2).c_str(), nullptr, 8); + + if (errno != 0) { + return false; + } + + CompilerKit::NumberCast64 num = CompilerKit::NumberCast64(res); + + if (kVerbose) { + kStdOut << "AssemblerAMD64: Found a base 8 number here: " << jump_label.substr(pos) << "\n"; + } + + for (char& i : num.number) { + kAppBytes.push_back(i); + } + + return true; + } + default: { + break; + } + } + + /// @note We assume base 10 here. As other cases have failed. + auto res = strtol(jump_label.substr(pos + 1).c_str(), nullptr, 10); + + if (errno != 0) { + return false; + } + + CompilerKit::NumberCast64 num = CompilerKit::NumberCast64(res); + + for (char& i : num.number) { + kAppBytes.push_back(i); + } + + if (kVerbose) { + kStdOut << "AssemblerAMD64: Found a base 10 number here: " << jump_label.substr(pos + 1) + << "\n"; + } + + return true; +} + +bool CompilerKit::EncoderAMD64::WriteNumber32(const std::size_t& pos, std::string& jump_label) { + if (!isdigit(jump_label[pos])) return false; + + switch (jump_label[pos + 1]) { + case 'x': { + auto res = strtol(jump_label.substr(pos + 2).c_str(), nullptr, 16); + + if (errno != 0) { + return false; + } + + CompilerKit::NumberCast32 num = CompilerKit::NumberCast32(res); + + for (char& i : num.number) { + kAppBytes.push_back(i); + } + + if (kVerbose) { + kStdOut << "AssemblerAMD64: Found a base 16 number here: " << jump_label.substr(pos) + << "\n"; + } + + return true; + } + case 'b': { + auto res = strtol(jump_label.substr(pos + 2).c_str(), nullptr, 2); + + if (errno != 0) { + return false; + } + + CompilerKit::NumberCast32 num = CompilerKit::NumberCast32(res); + + if (kVerbose) { + kStdOut << "AssemblerAMD64: Found a base 2 number here: " << jump_label.substr(pos) << "\n"; + } + + for (char& i : num.number) { + kAppBytes.push_back(i); + } + + return true; + } + case 'o': { + auto res = strtol(jump_label.substr(pos + 2).c_str(), nullptr, 8); + + if (errno != 0) { + return false; + } + + CompilerKit::NumberCast32 num = CompilerKit::NumberCast32(res); + + if (kVerbose) { + kStdOut << "AssemblerAMD64: Found a base 8 number here: " << jump_label.substr(pos) << "\n"; + } + + for (char& i : num.number) { + kAppBytes.push_back(i); + } + + return true; + } + default: { + break; + } + } + + auto res = strtol(jump_label.substr(pos + 2).c_str(), nullptr, 10); + + if (errno != 0) { + return false; + } + + CompilerKit::NumberCast32 num = CompilerKit::NumberCast32(res); + + for (char& i : num.number) { + kAppBytes.push_back(i); + } + + if (kVerbose) { + kStdOut << "AssemblerAMD64: Found a base 10 number here: " << jump_label.substr(pos) << "\n"; + } + + return true; +} + +bool CompilerKit::EncoderAMD64::WriteNumber16(const std::size_t& pos, std::string& jump_label) { + if (!isdigit(jump_label[pos])) return false; + + switch (jump_label[pos + 1]) { + case 'x': { + if (auto res = strtol(jump_label.substr(pos + 2).c_str(), nullptr, 16); !res) { + if (errno != 0) { + CompilerKit::Detail::print_error("Invalid hex number: " + jump_label, "CompilerKit"); + throw std::runtime_error("invalid_hex"); + } + } + + CompilerKit::NumberCast16 num = + CompilerKit::NumberCast16(strtol(jump_label.substr(pos + 2).c_str(), nullptr, 16)); + + for (char& i : num.number) { + kAppBytes.push_back(i); + } + + if (kVerbose) { + kStdOut << "AssemblerAMD64: Found a base 16 number here: " << jump_label.substr(pos) + << "\n"; + } + + return true; + } + case 'b': { + if (auto res = strtol(jump_label.substr(pos + 2).c_str(), nullptr, 2); !res) { + if (errno != 0) { + CompilerKit::Detail::print_error("Invalid binary number: " + jump_label, "CompilerKit"); + throw std::runtime_error("invalid_bin"); + } + } + + CompilerKit::NumberCast16 num = + CompilerKit::NumberCast16(strtol(jump_label.substr(pos + 2).c_str(), nullptr, 2)); + + if (kVerbose) { + kStdOut << "AssemblerAMD64: Found a base 2 number here: " << jump_label.substr(pos) << "\n"; + } + + for (char& i : num.number) { + kAppBytes.push_back(i); + } + + return true; + } + case 'o': { + if (auto res = strtol(jump_label.substr(pos + 2).c_str(), nullptr, 8); !res) { + if (errno != 0) { + CompilerKit::Detail::print_error("Invalid octal number: " + jump_label, "CompilerKit"); + throw std::runtime_error("invalid_octal"); + } + } + + CompilerKit::NumberCast16 num = + CompilerKit::NumberCast16(strtol(jump_label.substr(pos + 2).c_str(), nullptr, 8)); + + if (kVerbose) { + kStdOut << "AssemblerAMD64: Found a base 8 number here: " << jump_label.substr(pos) << "\n"; + } + + for (char& i : num.number) { + kAppBytes.push_back(i); + } + + return true; + } + default: { + break; + } + } + + /* check for errno and stuff like that */ + if (auto res = strtol(jump_label.substr(pos).c_str(), nullptr, 10); !res) { + if (errno != 0) { + return false; + } + } + + CompilerKit::NumberCast16 num = + CompilerKit::NumberCast16(strtol(jump_label.substr(pos).c_str(), nullptr, 10)); + + for (char& i : num.number) { + kAppBytes.push_back(i); + } + + if (kVerbose) { + kStdOut << "AssemblerAMD64: Found a base 10 number here: " << jump_label.substr(pos) << "\n"; + } + + return true; +} + +bool CompilerKit::EncoderAMD64::WriteNumber8(const std::size_t& pos, std::string& jump_label) { + if (!isdigit(jump_label[pos])) return false; + + switch (jump_label[pos + 1]) { + case 'x': { + auto res = strtol(jump_label.substr(pos + 2).c_str(), nullptr, 16); + + if (errno != 0) { + return false; + } + + CompilerKit::NumberCast8 num = CompilerKit::NumberCast8(res); + + kAppBytes.push_back(num.number); + + if (kVerbose) { + kStdOut << "AssemblerAMD64: Found a base 16 number here: " << jump_label.substr(pos) + << "\n"; + } + + return true; + } + case 'b': { + auto res = strtol(jump_label.substr(pos + 2).c_str(), nullptr, 2); + + if (errno != 0) { + return false; + } + + CompilerKit::NumberCast8 num = CompilerKit::NumberCast8(res); + + if (kVerbose) { + kStdOut << "AssemblerAMD64: Found a base 2 number here: " << jump_label.substr(pos) << "\n"; + } + + kAppBytes.push_back(num.number); + + return true; + } + case 'o': { + auto res = strtol(jump_label.substr(pos + 2).c_str(), nullptr, 8); + + if (errno != 0) { + return false; + } + + CompilerKit::NumberCast8 num = CompilerKit::NumberCast8(res); + + if (kVerbose) { + kStdOut << "AssemblerAMD64: Found a base 8 number here: " << jump_label.substr(pos) << "\n"; + } + + kAppBytes.push_back(num.number); + + return true; + } + default: { + break; + } + } + + auto res = strtol(jump_label.substr(pos).c_str(), nullptr, 10); + + if (errno != 0) { + return false; + } + + CompilerKit::NumberCast8 num = CompilerKit::NumberCast8(res); + + kAppBytes.push_back(num.number); + + if (kVerbose) { + kStdOut << "AssemblerAMD64: Found a base 10 number here: " << jump_label.substr(pos) << "\n"; + } + + return true; +} + +///////////////////////////////////////////////////////////////////////////////////////// + +// @brief Read and write an instruction to the output array. + +///////////////////////////////////////////////////////////////////////////////////////// + +bool CompilerKit::EncoderAMD64::WriteLine(std::string line, std::string file) { + if (CompilerKit::ast_find_needle(line, "public_segment ")) return true; + + struct RegMapAMD64 { + CompilerKit::STLString fName; + i64_byte_t fModRM; + }; + + std::vector kRegisterList{ + {.fName = "ax", .fModRM = 0x0}, {.fName = "cx", .fModRM = 1}, + {.fName = "dx", .fModRM = 0x2}, {.fName = "bx", .fModRM = 3}, + {.fName = "sp", .fModRM = 0x4}, {.fName = "bp", .fModRM = 5}, + {.fName = "si", .fModRM = 0x6}, {.fName = "di", .fModRM = 7}, + }; + + bool foundInstruction = false; + + for (auto& opcodeAMD64 : kOpcodesAMD64) { + // strict check here + if (CompilerKit::ast_find_needle(line, opcodeAMD64.fName) && + CompilerKit::Detail::Algorithm::is_valid_amd64(line)) { + foundInstruction = true; + std::string name(opcodeAMD64.fName); + + /// Move instruction handler. + if (line.find(name) != std::string::npos) { + if (name == "mov" || name == "xor") { + std::string substr = line.substr(line.find(name) + name.size()); + + uint64_t bits = kRegisterBitWidth; + + if (substr.find(",") == std::string::npos) { + CompilerKit::Detail::print_error("Syntax error: missing right operand.", "CompilerKit"); + throw std::runtime_error("syntax_err"); + } + + /// Handle [reg+n] or [reg-n] memory addressing for any register + if (substr.find('[') != std::string::npos) { + // Parse the memory operand + auto bracketStart = substr.find('['); + auto bracketEnd = substr.find(']'); + + if (bracketStart == std::string::npos || bracketEnd == std::string::npos) { + CompilerKit::Detail::print_error("Syntax error: malformed memory operand.", file); + throw std::runtime_error("syntax_err"); + } + + std::string memOperand = substr.substr(bracketStart + 1, bracketEnd - bracketStart - 1); + + // Register lookup table + struct RegInfo { + const char* name; + i64_byte_t code; + }; + + RegInfo regs64[] = {{"rax", 0}, {"rcx", 1}, {"rdx", 2}, {"rbx", 3}, + {"rsp", 4}, {"rbp", 5}, {"rsi", 6}, {"rdi", 7}}; + + // Find base register in memory operand + i64_byte_t baseReg = 0; + bool foundBase = false; + + for (auto& reg : regs64) { + if (memOperand.find(reg.name) != std::string::npos) { + baseReg = reg.code; + foundBase = true; + break; + } + } + + if (!foundBase) { + CompilerKit::Detail::print_error("Invalid base register in memory operand.", file); + throw std::runtime_error("invalid_base_reg"); + } + + bool isRbp = (baseReg == 5); + bool isRsp = (baseReg == 4); + + // Parse displacement + int32_t displacement = 0; + bool hasDisp = false; + + auto plusPos = memOperand.find('+'); + auto minusPos = memOperand.find('-'); + + if (plusPos != std::string::npos) { + std::string dispStr = memOperand.substr(plusPos + 1); + displacement = static_cast(strtol(dispStr.c_str(), nullptr, 0)); + hasDisp = true; + } else if (minusPos != std::string::npos) { + std::string dispStr = memOperand.substr(minusPos + 1); + displacement = -static_cast(strtol(dispStr.c_str(), nullptr, 0)); + hasDisp = true; + } + + // Determine if destination is memory or register + auto commaPos = substr.find(','); + bool destIsMemory = bracketStart < commaPos; + + // Find register in the other operand + std::string otherOperand; + if (destIsMemory) { + otherOperand = substr.substr(commaPos + 1); + } else { + otherOperand = substr.substr(0, commaPos); + } + + // Remove whitespace + while (!otherOperand.empty() && (otherOperand[0] == ' ' || otherOperand[0] == '\t')) { + otherOperand.erase(0, 1); + } + + // Check for register in other operand + i64_byte_t regCode = 0; + bool foundReg = false; + bool isImmediate = false; + int64_t immValue = 0; + + for (auto& reg : regs64) { + if (otherOperand.find(reg.name) != std::string::npos) { + regCode = reg.code; + foundReg = true; + break; + } + } + + if (!foundReg) { + // Check if it's an immediate value + std::string immStr = otherOperand; + while (!immStr.empty() && (immStr[0] == ' ' || immStr[0] == '\t')) { + immStr.erase(0, 1); + } + if (!immStr.empty() && (isdigit(immStr[0]) || immStr[0] == '-')) { + isImmediate = true; + immValue = strtol(immStr.c_str(), nullptr, 0); + } + } + + // Determine mod field based on displacement size + // mod=00: [reg] no displacement (except rbp which requires disp8) + // mod=01: [reg+disp8] + // mod=10: [reg+disp32] + i64_byte_t mod = 0; + if (!hasDisp && displacement == 0) { + // [rbp] requires disp8 with 0, can't use mod=00 (it means RIP-relative) + mod = isRbp ? 0x01 : 0x00; + } else if (displacement >= -128 && displacement <= 127) { + mod = 0x01; // 8-bit displacement + } else { + mod = 0x02; // 32-bit displacement + } + + if (destIsMemory) { + if (foundReg) { + // mov [reg+n], reg + kAppBytes.emplace_back(0x48); // REX.W + kAppBytes.emplace_back(0x89); // MOV r/m64, r64 + + // ModR/M: mod | reg << 3 | r/m + i64_byte_t modrm = (mod << 6) | (regCode << 3) | baseReg; + kAppBytes.emplace_back(modrm); + + // RSP needs SIB byte + if (isRsp) { + kAppBytes.emplace_back(0x24); // SIB: scale=0, index=4(none), base=4(rsp) + } + } else if (isImmediate) { + // mov qword [reg+n], imm32 + kAppBytes.emplace_back(0x48); // REX.W + kAppBytes.emplace_back(0xC7); // MOV r/m64, imm32 + + // ModR/M: mod | 0 << 3 | r/m (reg field is 0 for this opcode) + i64_byte_t modrm = (mod << 6) | (0 << 3) | baseReg; + kAppBytes.emplace_back(modrm); + + // RSP needs SIB byte + if (isRsp) { + kAppBytes.emplace_back(0x24); + } + } else { + CompilerKit::Detail::print_error("Invalid source operand for mov to memory.", file); + throw std::runtime_error("invalid_operand"); + } + } else { + // mov reg, [reg+n] + kAppBytes.emplace_back(0x48); // REX.W + kAppBytes.emplace_back(0x8B); // MOV r64, r/m64 + + // ModR/M: mod | reg << 3 | r/m + i64_byte_t modrm = (mod << 6) | (regCode << 3) | baseReg; + kAppBytes.emplace_back(modrm); + + // RSP needs SIB byte + if (isRsp) { + kAppBytes.emplace_back(0x24); + } + } + + // Write displacement + if (mod == 0x01) { + // 8-bit displacement + kAppBytes.emplace_back(static_cast(displacement & 0xFF)); + } else if (mod == 0x02) { + // 32-bit displacement + kAppBytes.emplace_back(static_cast(displacement & 0xFF)); + kAppBytes.emplace_back(static_cast((displacement >> 8) & 0xFF)); + kAppBytes.emplace_back(static_cast((displacement >> 16) & 0xFF)); + kAppBytes.emplace_back(static_cast((displacement >> 24) & 0xFF)); + } else if (isRbp) { + // rbp with mod=00 still needs disp8=0 + kAppBytes.emplace_back(0x00); + } + + // Write immediate if present + if (destIsMemory && isImmediate) { + kAppBytes.emplace_back(static_cast(immValue & 0xFF)); + kAppBytes.emplace_back(static_cast((immValue >> 8) & 0xFF)); + kAppBytes.emplace_back(static_cast((immValue >> 16) & 0xFF)); + kAppBytes.emplace_back(static_cast((immValue >> 24) & 0xFF)); + } + + break; + } + + bool onlyOneReg = true; + + std::vector currentRegList; + + for (auto& reg : kRegisterList) { + std::string registerName; + + if (bits == 32) + registerName.push_back('e'); + else if (bits == 64) + registerName.push_back('r'); + else { + CompilerKit::Detail::print_error("Invalid size for register, current bit width is: " + + std::to_string(kRegisterBitWidth), + file); + throw std::runtime_error("invalid_reg_size"); + } + + registerName += reg.fName; + + while (line.find(registerName) != std::string::npos) { + line.erase(line.find(registerName), registerName.size()); + + if (bits == 16) { + if (registerName[0] == 'r') { + CompilerKit::Detail::print_error( + "Invalid size for register, current bit width is: " + + std::to_string(kRegisterBitWidth), + file); + throw std::runtime_error("invalid_reg_size"); + } + } + + currentRegList.push_back({.fName = registerName, .fModRM = reg.fModRM}); + } + } + + if (currentRegList.size() > 1) onlyOneReg = false; + + bool hasRBasedRegs = false; + + if (!onlyOneReg) { + /// very tricky to understand. + /// but this checks for a r8 through r15 register. + if (currentRegList[0].fName[0] == 'r' || currentRegList[1].fName[0] == 'r') { + if (isdigit(currentRegList[0].fName[1]) && isdigit(currentRegList[1].fName[1])) { + kAppBytes.emplace_back(0x4d); + hasRBasedRegs = true; + } else if (isdigit(currentRegList[0].fName[1]) || + isdigit(currentRegList[1].fName[1])) { + kAppBytes.emplace_back(0x4c); + hasRBasedRegs = true; + } + } + } + + if (name == "mov") { + if (bits == 64 || bits == 32) { + if (!hasRBasedRegs && bits >= 32) { + kAppBytes.emplace_back(opcodeAMD64.fOpcode); + } else if (hasRBasedRegs && bits == 32) { + CompilerKit::Detail::print_error("Invalid combination of operands and registers.", + "CompilerKit"); + throw std::runtime_error("comb_op_reg"); + } + + if (!onlyOneReg) kAppBytes.emplace_back(0x89); + } else if (bits == 16) { + if (hasRBasedRegs) { + CompilerKit::Detail::print_error("Invalid combination of operands and registers.", + "CompilerKit"); + throw std::runtime_error("comb_op_reg"); + } else { + kAppBytes.emplace_back(0x66); + kAppBytes.emplace_back(0x89); + } + } + } else { + if (!hasRBasedRegs && bits >= 32) { + kAppBytes.emplace_back(opcodeAMD64.fOpcode); + } + + kAppBytes.emplace_back(0x31); + } + + if (onlyOneReg) { + auto num = GetNumber32(line, ","); + + auto modrm = (0x3 << 6 | currentRegList[0].fModRM); + + kAppBytes.emplace_back(0xC7); // prefixed before placing the modrm and then the number. + kAppBytes.emplace_back(modrm); + + if (name != "xor") { + kAppBytes.emplace_back(num.number[0]); + kAppBytes.emplace_back(num.number[1]); + kAppBytes.emplace_back(num.number[2]); + kAppBytes.emplace_back(num.number[3]); + } + + break; + } + + if (currentRegList[1].fName[0] == 'r' && currentRegList[0].fName[0] == 'e') { + CompilerKit::Detail::print_error("Invalid combination of operands and registers.", + "CompilerKit"); + throw std::runtime_error("comb_op_reg"); + } + + if (currentRegList[0].fName[0] == 'r' && currentRegList[1].fName[0] == 'e') { + CompilerKit::Detail::print_error("Invalid combination of operands and registers.", + "CompilerKit"); + throw std::runtime_error("comb_op_reg"); + } + + if (bits == 16) { + if (currentRegList[0].fName[0] == 'r' || currentRegList[0].fName[0] == 'e') { + CompilerKit::Detail::print_error("Invalid combination of operands and registers.", + "CompilerKit"); + throw std::runtime_error("comb_op_reg"); + } + + if (currentRegList[1].fName[0] == 'r' || currentRegList[1].fName[0] == 'e') { + CompilerKit::Detail::print_error("Invalid combination of operands and registers.", + "CompilerKit"); + throw std::runtime_error("comb_op_reg"); + } + } else { + if (currentRegList[0].fName[0] != 'r' || currentRegList[0].fName[0] == 'e') { + CompilerKit::Detail::print_error("Invalid combination of operands and registers.", + "CompilerKit"); + throw std::runtime_error("comb_op_reg"); + } + + if (currentRegList[1].fName[0] != 'r' || currentRegList[1].fName[0] == 'e') { + CompilerKit::Detail::print_error("Invalid combination of operands and registers.", + "CompilerKit"); + throw std::runtime_error("comb_op_reg"); + } + } + + /// encode register using the modrm encoding. + + auto modrm = (0x3 << 6 | currentRegList[1].fModRM << 3 | currentRegList[0].fModRM); + + kAppBytes.emplace_back(modrm); + + break; + } + + /// Compare instruction handler. + if (name == "cmp") { + std::string substr = line.substr(line.find(name) + name.size()); + + if (substr.find(",") == std::string::npos) { + CompilerKit::Detail::print_error("Syntax error: missing right operand.", "CompilerKit"); + throw std::runtime_error("syntax_err"); + } + + // Register lookup table + struct RegInfo { + const char* name; + i64_byte_t code; + }; + + RegInfo regs64[] = {{"rax", 0}, {"rcx", 1}, {"rdx", 2}, {"rbx", 3}, + {"rsp", 4}, {"rbp", 5}, {"rsi", 6}, {"rdi", 7}}; + + /// Handle [reg+n] memory addressing + if (substr.find('[') != std::string::npos) { + auto bracketStart = substr.find('['); + auto bracketEnd = substr.find(']'); + + if (bracketEnd == std::string::npos) { + CompilerKit::Detail::print_error("Syntax error: malformed memory operand.", file); + throw std::runtime_error("syntax_err"); + } + + std::string memOperand = substr.substr(bracketStart + 1, bracketEnd - bracketStart - 1); + + // Find base register + i64_byte_t baseReg = 0; + bool foundBase = false; + + for (auto& reg : regs64) { + if (memOperand.find(reg.name) != std::string::npos) { + baseReg = reg.code; + foundBase = true; + break; + } + } + + if (!foundBase) { + CompilerKit::Detail::print_error("Invalid base register in memory operand.", file); + throw std::runtime_error("invalid_base_reg"); + } + + bool isRbp = (baseReg == 5); + bool isRsp = (baseReg == 4); + + // Parse displacement + int32_t displacement = 0; + bool hasDisp = false; + + auto plusPos = memOperand.find('+'); + auto minusPos = memOperand.find('-'); + + if (plusPos != std::string::npos) { + std::string dispStr = memOperand.substr(plusPos + 1); + displacement = static_cast(strtol(dispStr.c_str(), nullptr, 0)); + hasDisp = true; + } else if (minusPos != std::string::npos) { + std::string dispStr = memOperand.substr(minusPos + 1); + displacement = -static_cast(strtol(dispStr.c_str(), nullptr, 0)); + hasDisp = true; + } + + auto commaPos = substr.find(','); + bool destIsMemory = bracketStart < commaPos; + + std::string otherOperand; + if (destIsMemory) { + otherOperand = substr.substr(commaPos + 1); + } else { + otherOperand = substr.substr(0, commaPos); + } + + while (!otherOperand.empty() && (otherOperand[0] == ' ' || otherOperand[0] == '\t')) { + otherOperand.erase(0, 1); + } + + i64_byte_t regCode = 0; + bool foundReg = false; + bool isImmediate = false; + int64_t immValue = 0; + + for (auto& reg : regs64) { + if (otherOperand.find(reg.name) != std::string::npos) { + regCode = reg.code; + foundReg = true; + break; + } + } + + if (!foundReg) { + std::string immStr = otherOperand; + while (!immStr.empty() && (immStr[0] == ' ' || immStr[0] == '\t')) { + immStr.erase(0, 1); + } + if (!immStr.empty() && (isdigit(immStr[0]) || immStr[0] == '-')) { + isImmediate = true; + immValue = strtol(immStr.c_str(), nullptr, 0); + } + } + + // Determine mod field + i64_byte_t mod = 0; + if (!hasDisp && displacement == 0) { + mod = isRbp ? 0x01 : 0x00; + } else if (displacement >= -128 && displacement <= 127) { + mod = 0x01; + } else { + mod = 0x02; + } + + if (destIsMemory) { + if (foundReg) { + // cmp [reg+n], reg + kAppBytes.emplace_back(0x48); // REX.W + kAppBytes.emplace_back(0x39); // CMP r/m64, r64 + + i64_byte_t modrm = (mod << 6) | (regCode << 3) | baseReg; + kAppBytes.emplace_back(modrm); + + if (isRsp) { + kAppBytes.emplace_back(0x24); + } + } else if (isImmediate) { + // cmp qword [reg+n], imm32 + kAppBytes.emplace_back(0x48); // REX.W + kAppBytes.emplace_back(0x81); // CMP r/m64, imm32 + + // reg field = 7 for CMP + i64_byte_t modrm = (mod << 6) | (7 << 3) | baseReg; + kAppBytes.emplace_back(modrm); + + if (isRsp) { + kAppBytes.emplace_back(0x24); + } + } + } else { + // cmp reg, [reg+n] + kAppBytes.emplace_back(0x48); // REX.W + kAppBytes.emplace_back(0x3B); // CMP r64, r/m64 + + i64_byte_t modrm = (mod << 6) | (regCode << 3) | baseReg; + kAppBytes.emplace_back(modrm); + + if (isRsp) { + kAppBytes.emplace_back(0x24); + } + } + + // Write displacement + if (mod == 0x01) { + kAppBytes.emplace_back(static_cast(displacement & 0xFF)); + } else if (mod == 0x02) { + kAppBytes.emplace_back(static_cast(displacement & 0xFF)); + kAppBytes.emplace_back(static_cast((displacement >> 8) & 0xFF)); + kAppBytes.emplace_back(static_cast((displacement >> 16) & 0xFF)); + kAppBytes.emplace_back(static_cast((displacement >> 24) & 0xFF)); + } else if (isRbp) { + kAppBytes.emplace_back(0x00); + } + + // Write immediate + if (destIsMemory && isImmediate) { + kAppBytes.emplace_back(static_cast(immValue & 0xFF)); + kAppBytes.emplace_back(static_cast((immValue >> 8) & 0xFF)); + kAppBytes.emplace_back(static_cast((immValue >> 16) & 0xFF)); + kAppBytes.emplace_back(static_cast((immValue >> 24) & 0xFF)); + } + + break; + } + + // Handle register-to-register and register-to-immediate + i64_byte_t reg1Code = 0; + i64_byte_t reg2Code = 0; + bool foundReg1 = false; + bool foundReg2 = false; + bool isImmediate = false; + int64_t immValue = 0; + + auto commaPos = substr.find(','); + std::string leftOperand = substr.substr(0, commaPos); + std::string rightOperand = substr.substr(commaPos + 1); + + while (!leftOperand.empty() && (leftOperand[0] == ' ' || leftOperand[0] == '\t')) { + leftOperand.erase(0, 1); + } + while (!rightOperand.empty() && (rightOperand[0] == ' ' || rightOperand[0] == '\t')) { + rightOperand.erase(0, 1); + } + + for (auto& reg : regs64) { + if (leftOperand.find(reg.name) != std::string::npos) { + reg1Code = reg.code; + foundReg1 = true; + break; + } + } + + for (auto& reg : regs64) { + if (rightOperand.find(reg.name) != std::string::npos) { + reg2Code = reg.code; + foundReg2 = true; + break; + } + } + + if (!foundReg2) { + if (!rightOperand.empty() && (isdigit(rightOperand[0]) || rightOperand[0] == '-')) { + isImmediate = true; + immValue = strtol(rightOperand.c_str(), nullptr, 0); + } + } + + if (foundReg1 && foundReg2) { + // cmp reg1, reg2 + kAppBytes.emplace_back(0x48); // REX.W + kAppBytes.emplace_back(0x39); // CMP r/m64, r64 + + i64_byte_t modrm = (0x3 << 6) | (reg2Code << 3) | reg1Code; + kAppBytes.emplace_back(modrm); + } else if (foundReg1 && isImmediate) { + // cmp reg, imm + kAppBytes.emplace_back(0x48); // REX.W + kAppBytes.emplace_back(0x81); // CMP r/m64, imm32 + + // reg field = 7 for CMP + i64_byte_t modrm = (0x3 << 6) | (7 << 3) | reg1Code; + kAppBytes.emplace_back(modrm); + + kAppBytes.emplace_back(static_cast(immValue & 0xFF)); + kAppBytes.emplace_back(static_cast((immValue >> 8) & 0xFF)); + kAppBytes.emplace_back(static_cast((immValue >> 16) & 0xFF)); + kAppBytes.emplace_back(static_cast((immValue >> 24) & 0xFF)); + } else { + CompilerKit::Detail::print_error("Invalid operands for cmp instruction.", file); + throw std::runtime_error("invalid_cmp_operands"); + } + + break; + } + + /// LEA instruction handler. + if (name == "lea") { + std::string substr = line.substr(line.find(name) + name.size()); + + // Remove leading whitespace + while (!substr.empty() && (substr[0] == ' ' || substr[0] == '\t')) { + substr.erase(0, 1); + } + + if (substr.find(",") == std::string::npos || substr.find('[') == std::string::npos) { + CompilerKit::Detail::print_error("Syntax error: lea requires reg, [mem] format.", file); + throw std::runtime_error("syntax_err"); + } + + // Register lookup table + struct RegInfo { + const char* name; + i64_byte_t code; + }; + + RegInfo regs64[] = {{"rax", 0}, {"rcx", 1}, {"rdx", 2}, {"rbx", 3}, + {"rsp", 4}, {"rbp", 5}, {"rsi", 6}, {"rdi", 7}}; + + auto commaPos = substr.find(','); + std::string destOperand = substr.substr(0, commaPos); + std::string srcOperand = substr.substr(commaPos + 1); + + // Remove whitespace + while (!destOperand.empty() && (destOperand[0] == ' ' || destOperand[0] == '\t')) { + destOperand.erase(0, 1); + } + while (!srcOperand.empty() && (srcOperand[0] == ' ' || srcOperand[0] == '\t')) { + srcOperand.erase(0, 1); + } + + // Find destination register + i64_byte_t destReg = 0; + bool foundDest = false; + + for (auto& reg : regs64) { + if (destOperand.find(reg.name) != std::string::npos) { + destReg = reg.code; + foundDest = true; + break; + } + } + + if (!foundDest) { + CompilerKit::Detail::print_error("Invalid destination register for lea.", file); + throw std::runtime_error("invalid_dest_reg"); + } + + // Parse memory operand [base+disp] or [base-disp] + auto bracketStart = srcOperand.find('['); + auto bracketEnd = srcOperand.find(']'); + + if (bracketStart == std::string::npos || bracketEnd == std::string::npos) { + CompilerKit::Detail::print_error("Syntax error: malformed memory operand for lea.", + file); + throw std::runtime_error("syntax_err"); + } + + std::string memOperand = + srcOperand.substr(bracketStart + 1, bracketEnd - bracketStart - 1); + + // Find base register + i64_byte_t baseReg = 0; + bool foundBase = false; + + for (auto& reg : regs64) { + if (memOperand.find(reg.name) != std::string::npos) { + baseReg = reg.code; + foundBase = true; + break; + } + } + + if (!foundBase) { + CompilerKit::Detail::print_error("Invalid base register in memory operand for lea.", + file); + throw std::runtime_error("invalid_base_reg"); + } + + bool isRbp = (baseReg == 5); + bool isRsp = (baseReg == 4); + int32_t displacement = 0; + bool hasDisp = false; + + // Look for +/- displacement + auto plusPos = memOperand.find('+'); + auto minusPos = memOperand.find('-'); + + if (plusPos != std::string::npos) { + std::string dispStr = memOperand.substr(plusPos + 1); + displacement = static_cast(strtol(dispStr.c_str(), nullptr, 0)); + hasDisp = true; + } else if (minusPos != std::string::npos) { + std::string dispStr = memOperand.substr(minusPos + 1); + displacement = -static_cast(strtol(dispStr.c_str(), nullptr, 0)); + hasDisp = true; + } + + // Determine mod field + i64_byte_t mod = 0x00; + if (hasDisp || isRbp) { + if (displacement >= -128 && displacement <= 127) { + mod = 0x01; // 8-bit displacement + } else { + mod = 0x02; // 32-bit displacement + } + } + + // Emit REX.W prefix for 64-bit + kAppBytes.emplace_back(0x48); + + // Emit LEA opcode + kAppBytes.emplace_back(0x8D); + + // Emit ModR/M byte + i64_byte_t modrm = (mod << 6) | (destReg << 3) | baseReg; + kAppBytes.emplace_back(modrm); + + // RSP needs SIB byte + if (isRsp) { + kAppBytes.emplace_back(0x24); + } + + // Emit displacement + if (mod == 0x01) { + kAppBytes.emplace_back(static_cast(displacement & 0xFF)); + } else if (mod == 0x02) { + kAppBytes.emplace_back(static_cast(displacement & 0xFF)); + kAppBytes.emplace_back(static_cast((displacement >> 8) & 0xFF)); + kAppBytes.emplace_back(static_cast((displacement >> 16) & 0xFF)); + kAppBytes.emplace_back(static_cast((displacement >> 24) & 0xFF)); + } else if (isRbp) { + // RBP with no displacement needs [rbp+0] + kAppBytes.emplace_back(0x00); + } + + break; + } + + /// Push instruction handler. + if (name == "push" || name == "pop") { + std::string substr = line.substr(line.find(name) + name.size()); + + // Remove leading whitespace + while (!substr.empty() && (substr[0] == ' ' || substr[0] == '\t')) { + substr.erase(0, 1); + } + + i64_byte_t baseOpcode = (name == "push") ? kAsmPushOpcode : kAsmPopOpcode; + bool found = false; + + // Check for extended registers r8-r15 + if (substr.size() >= 2 && substr[0] == 'r' && isdigit(substr[1])) { + int regNum = 0; + + if (substr.size() >= 3 && isdigit(substr[2])) { + regNum = (substr[1] - '0') * 10 + (substr[2] - '0'); + } else { + regNum = substr[1] - '0'; + } + + if (regNum >= 8 && regNum <= 15) { + // REX.B prefix for r8-r15 + kAppBytes.emplace_back(0x41); + kAppBytes.emplace_back(baseOpcode + (regNum - 8)); + found = true; + } + } + + // Check for standard 64-bit registers rax-rdi + if (!found) { + struct RegPushPop { + const char* name; + i64_byte_t offset; + }; + + RegPushPop regs[] = {{"rax", 0}, {"rcx", 1}, {"rdx", 2}, {"rbx", 3}, + {"rsp", 4}, {"rbp", 5}, {"rsi", 6}, {"rdi", 7}}; + + for (auto& reg : regs) { + if (substr.find(reg.name) != std::string::npos) { + kAppBytes.emplace_back(baseOpcode + reg.offset); + found = true; + break; + } + } + } + + if (!found) { + if (isnumber(substr[0])) { + kAppBytes.emplace_back(name == "push" ? 0x68 : 0x8F); + + // push imm always takes a 32-bit immediate (sign-extended in 64-bit mode) + // Parse the immediate value without adding kOrigin + long imm = 0; + if (substr.size() > 2 && substr[0] == '0' && substr[1] == 'x') { + imm = strtol(substr.c_str() + 2, nullptr, 16); + } else if (substr.size() > 2 && substr[0] == '0' && substr[1] == 'b') { + imm = strtol(substr.c_str() + 2, nullptr, 2); + } else if (substr.size() > 2 && substr[0] == '0' && substr[1] == 'o') { + imm = strtol(substr.c_str() + 2, nullptr, 8); + } else { + imm = strtol(substr.c_str(), nullptr, 10); + } + + CompilerKit::NumberCast32 num(imm); + if (kRegisterBitWidth == 64 || kRegisterBitWidth == 32) { + kAppBytes.emplace_back(num.number[0]); + kAppBytes.emplace_back(num.number[1]); + kAppBytes.emplace_back(num.number[2]); + kAppBytes.emplace_back(num.number[3]); + } else if (kRegisterBitWidth == 16) { + kAppBytes.emplace_back(num.number[0]); + kAppBytes.emplace_back(num.number[1]); + } + + break; + } + + CompilerKit::Detail::print_error("Invalid operand for " + name + ": " + substr, + "CompilerKit"); + throw std::runtime_error("invalid_push_pop_operand"); + } + + break; + } + } + + if (name == "int" || name == "into" || name == "intd") { + kAppBytes.emplace_back(opcodeAMD64.fOpcode); + this->WriteNumber8(line.find(name) + name.size() + 1, line); + + break; + } else if (name == "jmp" || name == "call") { + kAppBytes.emplace_back(opcodeAMD64.fOpcode); + + if (auto it = std::find(kUndefinedSymbols.begin(), kUndefinedSymbols.end(), name); + it != kUndefinedSymbols.end()) { + auto number_str = std::to_string(kOrigin + name.size()); + this->WriteNumber(0, number_str); + kOrigin += name.size(); + } + + if (kRegisterBitWidth == 64) { + this->WriteNumber(line.find(name) + name.size() + 1, line); + } else { + this->WriteNumber32(line.find(name) + name.size() + 1, line); + } + + break; + } + + if (name == "syscall") { + kAppBytes.emplace_back(opcodeAMD64.fOpcode); + kAppBytes.emplace_back(0x05); + break; + } else { + kAppBytes.emplace_back(opcodeAMD64.fOpcode); + + break; + } + } + } + + if (line[0] == kAssemblerPragmaSym) { + if (foundInstruction) { + CompilerKit::Detail::print_error("Syntax error: " + line, file); + throw std::runtime_error("syntax_err"); + } + + if (line.find("bits 64") != std::string::npos) { + kRegisterBitWidth = 64U; + } else if (line.find("bits 32") != std::string::npos) { + kRegisterBitWidth = 32U; + } else if (line.find("bits 16") != std::string::npos) { + kRegisterBitWidth = 16U; + } + + if (auto org_pos = line.find("org"); org_pos != std::string::npos) { + auto value_pos = org_pos + strlen("org") + 1; + + if (value_pos >= line.size()) { + CompilerKit::Detail::print_error("Invalid org directive", "CompilerKit"); + throw std::runtime_error("invalid_org"); + } + + size_t base[] = {10, 16, 2, 8}; + + for (size_t i = 0; i < 4; i++) { + if (kOrigin = strtol(line.substr(value_pos).c_str(), nullptr, base[i]); kOrigin) { + if (errno != 0) { + continue; + } else { + if (kVerbose) { + kStdOut << "AssemblerAMD64: Origin Set: " << kOrigin << std::endl; + } + + break; + } + } + } + } + } + /// write a dword + else if (auto pos = line.find(".dword"); pos != std::string::npos) { + this->WriteNumber32(pos + strlen(".dword") + 1, line); + } + /// write a long + else if (auto pos = line.find(".long"); pos != std::string::npos) { + this->WriteNumber(pos + strlen(".long") + 1, line); + } + /// write a 16-bit number + else if (auto pos = line.find(".word"); pos != std::string::npos) { + this->WriteNumber16(pos + strlen(".word") + 1, line); + } + + kOrigin += kIPAlignement; + + return true; +} + +// Last rev 13-1-24 diff --git a/src/CompilerKit/src/Assemblers/Assembler+ARM64.cc b/src/CompilerKit/src/Assemblers/Assembler+ARM64.cc deleted file mode 100644 index bf567b7..0000000 --- a/src/CompilerKit/src/Assemblers/Assembler+ARM64.cc +++ /dev/null @@ -1,584 +0,0 @@ -// Copyright 2024-2025, Amlal El Mahrouss (amlal@nekernel.org) -// Licensed under the Apache License, Version 2.0 (See accompanying -// file LICENSE or copy at http://www.apache.org/licenses/LICENSE-2.0) -// Official repository: https://github.com/nekernel-org/nectar - -///////////////////////////////////////////////////////////////////////////////////////// - -/// @file Assembler+ARM64.cc -/// @author Amlal El Mahrouss -/// @brief 'ACORN' Assembler. - -/// REMINDER: when dealing with an undefined symbol use (string -/// size):LinkerFindSymbol:(string) so that li will look for it. - -///////////////////////////////////////////////////////////////////////////////////////// - -#ifndef __ASM_NEED_ARM64__ -#define __ASM_NEED_ARM64__ -#endif - -#include -#include -#include -#include -#include -#include -#include - -///////////////////// - -// ANSI ESCAPE CODES - -///////////////////// - -#define kBlank "\e[0;30m" -#define kRed "\e[0;31m" -#define kWhite "\e[0;97m" -#define kYellow "\e[0;33m" - -constexpr auto kArm64Alignment = 0x1U; - -static Char kOutputArch = CompilerKit::kPefArchARM64; - -static std::size_t kCounter = 1UL; - -static std::uintptr_t kOrigin = kPefBaseOrigin; -static std::vector> kOriginLabel; - -static std::vector kBytes; - -static CompilerKit::AERecordHeader kCurrentRecord{ - .fName = "", .fKind = CompilerKit::kPefCode, .fSize = 0, .fOffset = 0}; - -static std::vector kRecords; -static std::vector kUndefinedSymbols; - -static const std::string kUndefinedSymbol = ":UndefinedSymbol:"; -static const std::string kRelocSymbol = ":RuntimeSymbol:"; - -// \brief forward decl. -static bool asm_read_attributes(std::string line); - -///////////////////////////////////////////////////////////////////////////////////////// - -/// @brief POWER assembler entrypoint, the program/module starts here. - -///////////////////////////////////////////////////////////////////////////////////////// - -NECTAR_MODULE(AssemblerMainARM64) { - CompilerKit::install_signal(SIGSEGV, CompilerKit::Detail::drvi_crash_handler); - - for (size_t i = 1; i < argc; ++i) { - if (argv[i][0] == '-') { - if (strcmp(argv[i], "-version") == 0 || strcmp(argv[i], "-v") == 0) { - kStdOut << "ACORN: AARCH64 Assembler Driver.\nACORN: " << kDistVersion - << "\nACORN: " - "Copyright (c) " - "Amlal El Mahrouss\n"; - return 0; - } else if (strcmp(argv[i], "-help") == 0) { - kStdOut << "ACORN: AARCH64 Assembler Driver.\nACORN: Copyright (c) 2024 " - "Amlal El Mahrouss\n"; - kStdOut << "-version,--v: print program version.\n"; - kStdOut << "-fverbose: print verbose output.\n"; - kStdOut << "-fbinary: output as flat binary.\n"; - - return 0; - } else if (strcmp(argv[i], "-fbinary") == 0) { - kOutputAsBinary = true; - continue; - } else if (strcmp(argv[i], "-fverbose") == 0) { - kVerbose = true; - continue; - } - - kStdOut << "ACORN: ignore " << argv[i] << "\n"; - continue; - } - - if (!std::filesystem::exists(argv[i])) { - kStdOut << "ACORN: can't open: " << argv[i] << std::endl; - goto asm_fail_exit; - } - - std::string object_output(argv[i]); - - for (auto& ext : kAsmFileExts) { - if (object_output.find(ext) != std::string::npos) { - object_output.erase(object_output.find(ext), std::strlen(ext)); - } - } - - object_output += kOutputAsBinary ? kBinaryFileExt : kObjectFileExt; - - std::ifstream file_ptr(argv[i]); - std::ofstream file_ptr_out(object_output, std::ofstream::binary); - - if (file_ptr_out.bad()) { - if (kVerbose) { - kStdOut << "ACORN: error: " << strerror(errno) << "\n"; - } - } - - std::string line; - - CompilerKit::AEHeader hdr{0}; - - memset(hdr.fPad, kAENullType, kAEPad); - - hdr.fMagic[0] = kAEMag0; - hdr.fMagic[1] = kAEMag1; - hdr.fMagic[2] = kAEMag2; - hdr.fSize = sizeof(CompilerKit::AEHeader); - hdr.fArch = kOutputArch; - - ///////////////////////////////////////////////////////////////////////////////////////// - - // COMPILATION LOOP - - ///////////////////////////////////////////////////////////////////////////////////////// - - CompilerKit::EncoderARM64 asm64; - - while (std::getline(file_ptr, line)) { - if (auto ln = asm64.CheckLine(line, argv[i]); !ln.empty()) { - CompilerKit::Detail::print_error(ln, argv[i]); - continue; - } - - try { - asm_read_attributes(line); - asm64.WriteLine(line, argv[i]); - } catch (const std::exception& e) { - if (kVerbose) { - std::string what = e.what(); - CompilerKit::Detail::print_warning("exit because of: " + what, "CompilerKit"); - } - - std::filesystem::remove(object_output); - goto asm_fail_exit; - } - } - - if (!kOutputAsBinary) { - if (kVerbose) { - kStdOut << "AssemblerARM64: Writing object file...\n"; - } - - // this is the final step, write everything to the file. - - auto pos = file_ptr_out.tellp(); - - hdr.fCount = kRecords.size() + kUndefinedSymbols.size(); - - file_ptr_out << hdr; - - if (kRecords.empty()) { - kStdErr << "AssemblerARM64: At least one record is needed to write an object " - "file.\nAssemblerARM64: Make one using `public_segment .code64 foo_bar`.\n"; - - std::filesystem::remove(object_output); - return 1; - } - - kRecords[kRecords.size() - 1].fSize = kBytes.size(); - - std::size_t record_count = 0UL; - - for (auto& record_hdr : kRecords) { - record_hdr.fFlags |= CompilerKit::kKindRelocationAtRuntime; - record_hdr.fOffset = record_count; - ++record_count; - - file_ptr_out << record_hdr; - - if (kVerbose) kStdOut << "AssemblerARM64: Wrote record " << record_hdr.fName << "...\n"; - } - - // increment once again, so that we won't lie about the kUndefinedSymbols. - ++record_count; - - for (auto& sym : kUndefinedSymbols) { - CompilerKit::AERecordHeader undefined_sym{0}; - - if (kVerbose) kStdOut << "AssemblerARM64: Wrote symbol " << sym << " to file...\n"; - - undefined_sym.fKind = CompilerKit::kKindRelocationAtRuntime; - undefined_sym.fSize = sym.size(); - undefined_sym.fOffset = record_count; - - ++record_count; - - memset(undefined_sym.fPad, kAENullType, kAEPad); - memcpy(undefined_sym.fName, sym.c_str(), sym.size()); - - file_ptr_out << undefined_sym; - - ++kCounter; - } - - auto pos_end = file_ptr_out.tellp(); - - file_ptr_out.seekp(pos); - - hdr.fStartCode = pos_end; - hdr.fCodeSize = kBytes.size(); - - file_ptr_out << hdr; - - file_ptr_out.seekp(pos_end); - } else { - if (kVerbose) { - kStdOut << "AssemblerARM64: Write raw binary...\n"; - } - } - - // byte from byte, we write this. - for (auto& byte : kBytes) { - file_ptr_out.write(reinterpret_cast(&byte), sizeof(byte)); - } - - if (kVerbose) kStdOut << "AssemblerARM64: Wrote file with program in it.\n"; - - file_ptr_out.flush(); - file_ptr_out.close(); - - if (kVerbose) kStdOut << "AssemblerARM64: Exit succeeded.\n"; - - return 0; - } - -asm_fail_exit: - - if (kVerbose) kStdOut << "AssemblerARM64: Exit failed.\n"; - - return NECTAR_EXEC_ERROR; -} - -///////////////////////////////////////////////////////////////////////////////////////// - -// @brief Check for attributes -// returns true if any was found. - -///////////////////////////////////////////////////////////////////////////////////////// - -static bool asm_read_attributes(std::string line) { - // extern_segment is the opposite of public_segment, it signals to the li - // that we need this symbol. - if (CompilerKit::ast_find_needle(line, "extern_segment")) { - if (kOutputAsBinary) { - CompilerKit::Detail::print_error("Invalid extern_segment directive in flat binary mode.", - "CompilerKit"); - throw std::runtime_error("invalid_extern_segment_bin"); - } - - auto name = line.substr(line.find("extern_segment") + strlen("extern_segment") + 1); - - if (name.size() == 0) { - CompilerKit::Detail::print_error("Invalid extern_segment", "CompilerKit"); - throw std::runtime_error("invalid_extern_segment"); - } - - std::string result = std::to_string(name.size()); - result += kUndefinedSymbol; - - // mangle this - for (char& j : name) { - if (j == ' ' || j == ',') j = '$'; - } - - result += name; - - if (name.find(kPefCode64) != std::string::npos) { - // data is treated as code. - kCurrentRecord.fKind = CompilerKit::kPefCode; - } else if (name.find(kPefData64) != std::string::npos) { - // no code will be executed from here. - kCurrentRecord.fKind = CompilerKit::kPefData; - } else if (name.find(kPefZero64) != std::string::npos) { - // this is a bss section. - kCurrentRecord.fKind = CompilerKit::kPefZero; - } - - // this is a special case for the start stub. - // we want this so that li can find it. - - if (name == kPefStart) { - kCurrentRecord.fKind = CompilerKit::kPefCode; - } - - // now we can tell the code size of the previous kCurrentRecord. - - if (!kRecords.empty()) kRecords[kRecords.size() - 1].fSize = kBytes.size(); - - memset(kCurrentRecord.fName, 0, kAESymbolLen); - memcpy(kCurrentRecord.fName, result.c_str(), result.size()); - - ++kCounter; - - memset(kCurrentRecord.fPad, kAENullType, kAEPad); - - kRecords.emplace_back(kCurrentRecord); - - return true; - } - // public_segment is a special keyword used by Assembler to tell the AE output stage to - // mark this section as a header. it currently supports .code64, .data64., - // .zero64 - else if (CompilerKit::ast_find_needle(line, "public_segment")) { - if (kOutputAsBinary) { - CompilerKit::Detail::print_error("Invalid public_segment directive in flat binary mode.", - "CompilerKit"); - throw std::runtime_error("invalid_public_segment_bin"); - } - - auto name = line.substr(line.find("public_segment") + strlen("public_segment")); - - std::string name_copy = name; - - for (char& j : name) { - if (j == ' ') j = '$'; - } - - if (name.find(".code64") != std::string::npos) { - // data is treated as code. - - name_copy.erase(name_copy.find(".code64"), strlen(".code64")); - kCurrentRecord.fKind = CompilerKit::kPefCode; - } else if (name.find(".data64") != std::string::npos) { - // no code will be executed from here. - - name_copy.erase(name_copy.find(".data64"), strlen(".data64")); - kCurrentRecord.fKind = CompilerKit::kPefData; - } else if (name.find(".zero64") != std::string::npos) { - // this is a bss section. - - name_copy.erase(name_copy.find(".zero64"), strlen(".zero64")); - kCurrentRecord.fKind = CompilerKit::kPefZero; - } - - // this is a special case for the start stub. - // we want this so that li can find it. - - if (name == kPefStart) { - kCurrentRecord.fKind = CompilerKit::kPefCode; - } - - while (name_copy.find(" ") != std::string::npos) name_copy.erase(name_copy.find(" "), 1); - - kOriginLabel.push_back(std::make_pair(name_copy, kOrigin)); - ++kOrigin; - - // now we can tell the code size of the previous kCurrentRecord. - - if (!kRecords.empty()) kRecords[kRecords.size() - 1].fSize = kBytes.size(); - - memset(kCurrentRecord.fName, 0, kAESymbolLen); - memcpy(kCurrentRecord.fName, name.c_str(), name.size()); - - ++kCounter; - - memset(kCurrentRecord.fPad, kAENullType, kAEPad); - - kRecords.emplace_back(kCurrentRecord); - - return true; - } - - return false; -} - -// \brief algorithms and helpers. - -namespace CompilerKit::Detail::algorithm { -// \brief authorize a brief set of characters. -static inline bool is_not_alnum_space(char c) { - return !(isalpha(c) || isdigit(c) || (c == ' ') || (c == '\t') || (c == ',') || (c == '(') || - (c == ')') || (c == '"') || (c == '\'') || (c == '[') || (c == ']') || (c == '+') || - (c == '_') || (c == ':') || (c == '@') || (c == '.')); -} - -bool is_valid_arm64(std::string str) { - return std::find_if(str.begin(), str.end(), is_not_alnum_space) == str.end(); -} -} // namespace CompilerKit::Detail::algorithm - -///////////////////////////////////////////////////////////////////////////////////////// - -// @brief Check for line (syntax check) - -///////////////////////////////////////////////////////////////////////////////////////// - -std::string CompilerKit::EncoderARM64::CheckLine(std::string line, std::string file) { - std::string err_str; - - if (line.empty() || CompilerKit::ast_find_needle(line, "extern_segment") || - CompilerKit::ast_find_needle(line, "public_segment") || line.find('#') != std::string::npos || - CompilerKit::ast_find_needle(line, ";")) { - if (line.find('#') != std::string::npos) { - line.erase(line.find('#')); - } else if (line.find(';') != std::string::npos) { - line.erase(line.find(';')); - } else { - /// does the line contains valid input? - if (!CompilerKit::Detail::algorithm::is_valid_arm64(line)) { - err_str = "Line contains non alphanumeric characters.\nhere -> "; - err_str += line; - } - } - - return err_str; - } - - if (!CompilerKit::Detail::algorithm::is_valid_arm64(line)) { - err_str = "Line contains non alphanumeric characters.\nhere -> "; - err_str += line; - - return err_str; - } - - // check for a valid instruction format. - - if (line.find(',') != std::string::npos) { - if (line.find(',') + 1 == line.size()) { - err_str += "\nInstruction lacks right register, here -> "; - err_str += line.substr(line.find(',')); - - return err_str; - } else { - bool nothing_on_right = true; - - if (line.find(',') + 1 > line.size()) { - err_str += "\nInstruction not complete, here -> "; - err_str += line; - - return err_str; - } - - auto substr = line.substr(line.find(',') + 1); - - for (auto& ch : substr) { - if (ch != ' ' && ch != '\t') { - nothing_on_right = false; - } - } - - // this means we found nothing after that ',' . - if (nothing_on_right) { - err_str += "\nInstruction not complete, here -> "; - err_str += line; - - return err_str; - } - } - } - - return err_str; -} - -bool CompilerKit::EncoderARM64::WriteNumber(const std::size_t& pos, std::string& jump_label) { - if (!isdigit(jump_label[pos])) return false; - - switch (jump_label[pos + 1]) { - case 'x': { - if (auto res = strtol(jump_label.substr(pos + 2).c_str(), nullptr, 16); !res) { - if (errno != 0) { - CompilerKit::Detail::print_error("invalid hex number: " + jump_label, "CompilerKit"); - throw std::runtime_error("invalid_hex"); - } - } - - CompilerKit::NumberCast64 num(strtol(jump_label.substr(pos + 2).c_str(), nullptr, 16)); - - for (char& i : num.number) { - kBytes.push_back(i); - } - - if (kVerbose) { - kStdOut << "AssemblerARM64: found a base 16 number here: " << jump_label.substr(pos) - << "\n"; - } - - return true; - } - case 'b': { - if (auto res = strtol(jump_label.substr(pos + 2).c_str(), nullptr, 2); !res) { - if (errno != 0) { - CompilerKit::Detail::print_error("invalid binary number: " + jump_label, "CompilerKit"); - throw std::runtime_error("invalid_bin"); - } - } - - CompilerKit::NumberCast64 num(strtol(jump_label.substr(pos + 2).c_str(), nullptr, 2)); - - if (kVerbose) { - kStdOut << "AssemblerARM64: found a base 2 number here: " << jump_label.substr(pos) << "\n"; - } - - for (char& i : num.number) { - kBytes.push_back(i); - } - - return true; - } - case 'o': { - if (auto res = strtol(jump_label.substr(pos + 2).c_str(), nullptr, 7); !res) { - if (errno != 0) { - CompilerKit::Detail::print_error("invalid octal number: " + jump_label, "CompilerKit"); - throw std::runtime_error("invalid_octal"); - } - } - - CompilerKit::NumberCast64 num(strtol(jump_label.substr(pos + 2).c_str(), nullptr, 7)); - - if (kVerbose) { - kStdOut << "AssemblerARM64: found a base 8 number here: " << jump_label.substr(pos) << "\n"; - } - - for (char& i : num.number) { - kBytes.push_back(i); - } - - return true; - } - default: { - break; - } - } - - /* check for errno and stuff like that */ - if (auto res = strtol(jump_label.substr(pos).c_str(), nullptr, 10); !res) { - if (errno != 0) { - return false; - } - } - - CompilerKit::NumberCast64 num(strtol(jump_label.substr(pos).c_str(), nullptr, 10)); - - for (char& i : num.number) { - kBytes.push_back(i); - } - - if (kVerbose) { - kStdOut << "AssemblerARM64: found a base 10 number here: " << jump_label.substr(pos) << "\n"; - } - - return true; -} - -///////////////////////////////////////////////////////////////////////////////////////// - -/// @brief Read and write an instruction to the output array. - -///////////////////////////////////////////////////////////////////////////////////////// - -bool CompilerKit::EncoderARM64::WriteLine(std::string line, std::string file) { - if (CompilerKit::ast_find_needle(line, "public_segment")) return false; - - if (!CompilerKit::Detail::algorithm::is_valid_arm64(line)) return false; - - return true; -} - -// Last rev 13-1-24 diff --git a/src/CompilerKit/src/Assemblers/Assembler+ARM64.cpp b/src/CompilerKit/src/Assemblers/Assembler+ARM64.cpp new file mode 100644 index 0000000..bf567b7 --- /dev/null +++ b/src/CompilerKit/src/Assemblers/Assembler+ARM64.cpp @@ -0,0 +1,584 @@ +// Copyright 2024-2025, Amlal El Mahrouss (amlal@nekernel.org) +// Licensed under the Apache License, Version 2.0 (See accompanying +// file LICENSE or copy at http://www.apache.org/licenses/LICENSE-2.0) +// Official repository: https://github.com/nekernel-org/nectar + +///////////////////////////////////////////////////////////////////////////////////////// + +/// @file Assembler+ARM64.cc +/// @author Amlal El Mahrouss +/// @brief 'ACORN' Assembler. + +/// REMINDER: when dealing with an undefined symbol use (string +/// size):LinkerFindSymbol:(string) so that li will look for it. + +///////////////////////////////////////////////////////////////////////////////////////// + +#ifndef __ASM_NEED_ARM64__ +#define __ASM_NEED_ARM64__ +#endif + +#include +#include +#include +#include +#include +#include +#include + +///////////////////// + +// ANSI ESCAPE CODES + +///////////////////// + +#define kBlank "\e[0;30m" +#define kRed "\e[0;31m" +#define kWhite "\e[0;97m" +#define kYellow "\e[0;33m" + +constexpr auto kArm64Alignment = 0x1U; + +static Char kOutputArch = CompilerKit::kPefArchARM64; + +static std::size_t kCounter = 1UL; + +static std::uintptr_t kOrigin = kPefBaseOrigin; +static std::vector> kOriginLabel; + +static std::vector kBytes; + +static CompilerKit::AERecordHeader kCurrentRecord{ + .fName = "", .fKind = CompilerKit::kPefCode, .fSize = 0, .fOffset = 0}; + +static std::vector kRecords; +static std::vector kUndefinedSymbols; + +static const std::string kUndefinedSymbol = ":UndefinedSymbol:"; +static const std::string kRelocSymbol = ":RuntimeSymbol:"; + +// \brief forward decl. +static bool asm_read_attributes(std::string line); + +///////////////////////////////////////////////////////////////////////////////////////// + +/// @brief POWER assembler entrypoint, the program/module starts here. + +///////////////////////////////////////////////////////////////////////////////////////// + +NECTAR_MODULE(AssemblerMainARM64) { + CompilerKit::install_signal(SIGSEGV, CompilerKit::Detail::drvi_crash_handler); + + for (size_t i = 1; i < argc; ++i) { + if (argv[i][0] == '-') { + if (strcmp(argv[i], "-version") == 0 || strcmp(argv[i], "-v") == 0) { + kStdOut << "ACORN: AARCH64 Assembler Driver.\nACORN: " << kDistVersion + << "\nACORN: " + "Copyright (c) " + "Amlal El Mahrouss\n"; + return 0; + } else if (strcmp(argv[i], "-help") == 0) { + kStdOut << "ACORN: AARCH64 Assembler Driver.\nACORN: Copyright (c) 2024 " + "Amlal El Mahrouss\n"; + kStdOut << "-version,--v: print program version.\n"; + kStdOut << "-fverbose: print verbose output.\n"; + kStdOut << "-fbinary: output as flat binary.\n"; + + return 0; + } else if (strcmp(argv[i], "-fbinary") == 0) { + kOutputAsBinary = true; + continue; + } else if (strcmp(argv[i], "-fverbose") == 0) { + kVerbose = true; + continue; + } + + kStdOut << "ACORN: ignore " << argv[i] << "\n"; + continue; + } + + if (!std::filesystem::exists(argv[i])) { + kStdOut << "ACORN: can't open: " << argv[i] << std::endl; + goto asm_fail_exit; + } + + std::string object_output(argv[i]); + + for (auto& ext : kAsmFileExts) { + if (object_output.find(ext) != std::string::npos) { + object_output.erase(object_output.find(ext), std::strlen(ext)); + } + } + + object_output += kOutputAsBinary ? kBinaryFileExt : kObjectFileExt; + + std::ifstream file_ptr(argv[i]); + std::ofstream file_ptr_out(object_output, std::ofstream::binary); + + if (file_ptr_out.bad()) { + if (kVerbose) { + kStdOut << "ACORN: error: " << strerror(errno) << "\n"; + } + } + + std::string line; + + CompilerKit::AEHeader hdr{0}; + + memset(hdr.fPad, kAENullType, kAEPad); + + hdr.fMagic[0] = kAEMag0; + hdr.fMagic[1] = kAEMag1; + hdr.fMagic[2] = kAEMag2; + hdr.fSize = sizeof(CompilerKit::AEHeader); + hdr.fArch = kOutputArch; + + ///////////////////////////////////////////////////////////////////////////////////////// + + // COMPILATION LOOP + + ///////////////////////////////////////////////////////////////////////////////////////// + + CompilerKit::EncoderARM64 asm64; + + while (std::getline(file_ptr, line)) { + if (auto ln = asm64.CheckLine(line, argv[i]); !ln.empty()) { + CompilerKit::Detail::print_error(ln, argv[i]); + continue; + } + + try { + asm_read_attributes(line); + asm64.WriteLine(line, argv[i]); + } catch (const std::exception& e) { + if (kVerbose) { + std::string what = e.what(); + CompilerKit::Detail::print_warning("exit because of: " + what, "CompilerKit"); + } + + std::filesystem::remove(object_output); + goto asm_fail_exit; + } + } + + if (!kOutputAsBinary) { + if (kVerbose) { + kStdOut << "AssemblerARM64: Writing object file...\n"; + } + + // this is the final step, write everything to the file. + + auto pos = file_ptr_out.tellp(); + + hdr.fCount = kRecords.size() + kUndefinedSymbols.size(); + + file_ptr_out << hdr; + + if (kRecords.empty()) { + kStdErr << "AssemblerARM64: At least one record is needed to write an object " + "file.\nAssemblerARM64: Make one using `public_segment .code64 foo_bar`.\n"; + + std::filesystem::remove(object_output); + return 1; + } + + kRecords[kRecords.size() - 1].fSize = kBytes.size(); + + std::size_t record_count = 0UL; + + for (auto& record_hdr : kRecords) { + record_hdr.fFlags |= CompilerKit::kKindRelocationAtRuntime; + record_hdr.fOffset = record_count; + ++record_count; + + file_ptr_out << record_hdr; + + if (kVerbose) kStdOut << "AssemblerARM64: Wrote record " << record_hdr.fName << "...\n"; + } + + // increment once again, so that we won't lie about the kUndefinedSymbols. + ++record_count; + + for (auto& sym : kUndefinedSymbols) { + CompilerKit::AERecordHeader undefined_sym{0}; + + if (kVerbose) kStdOut << "AssemblerARM64: Wrote symbol " << sym << " to file...\n"; + + undefined_sym.fKind = CompilerKit::kKindRelocationAtRuntime; + undefined_sym.fSize = sym.size(); + undefined_sym.fOffset = record_count; + + ++record_count; + + memset(undefined_sym.fPad, kAENullType, kAEPad); + memcpy(undefined_sym.fName, sym.c_str(), sym.size()); + + file_ptr_out << undefined_sym; + + ++kCounter; + } + + auto pos_end = file_ptr_out.tellp(); + + file_ptr_out.seekp(pos); + + hdr.fStartCode = pos_end; + hdr.fCodeSize = kBytes.size(); + + file_ptr_out << hdr; + + file_ptr_out.seekp(pos_end); + } else { + if (kVerbose) { + kStdOut << "AssemblerARM64: Write raw binary...\n"; + } + } + + // byte from byte, we write this. + for (auto& byte : kBytes) { + file_ptr_out.write(reinterpret_cast(&byte), sizeof(byte)); + } + + if (kVerbose) kStdOut << "AssemblerARM64: Wrote file with program in it.\n"; + + file_ptr_out.flush(); + file_ptr_out.close(); + + if (kVerbose) kStdOut << "AssemblerARM64: Exit succeeded.\n"; + + return 0; + } + +asm_fail_exit: + + if (kVerbose) kStdOut << "AssemblerARM64: Exit failed.\n"; + + return NECTAR_EXEC_ERROR; +} + +///////////////////////////////////////////////////////////////////////////////////////// + +// @brief Check for attributes +// returns true if any was found. + +///////////////////////////////////////////////////////////////////////////////////////// + +static bool asm_read_attributes(std::string line) { + // extern_segment is the opposite of public_segment, it signals to the li + // that we need this symbol. + if (CompilerKit::ast_find_needle(line, "extern_segment")) { + if (kOutputAsBinary) { + CompilerKit::Detail::print_error("Invalid extern_segment directive in flat binary mode.", + "CompilerKit"); + throw std::runtime_error("invalid_extern_segment_bin"); + } + + auto name = line.substr(line.find("extern_segment") + strlen("extern_segment") + 1); + + if (name.size() == 0) { + CompilerKit::Detail::print_error("Invalid extern_segment", "CompilerKit"); + throw std::runtime_error("invalid_extern_segment"); + } + + std::string result = std::to_string(name.size()); + result += kUndefinedSymbol; + + // mangle this + for (char& j : name) { + if (j == ' ' || j == ',') j = '$'; + } + + result += name; + + if (name.find(kPefCode64) != std::string::npos) { + // data is treated as code. + kCurrentRecord.fKind = CompilerKit::kPefCode; + } else if (name.find(kPefData64) != std::string::npos) { + // no code will be executed from here. + kCurrentRecord.fKind = CompilerKit::kPefData; + } else if (name.find(kPefZero64) != std::string::npos) { + // this is a bss section. + kCurrentRecord.fKind = CompilerKit::kPefZero; + } + + // this is a special case for the start stub. + // we want this so that li can find it. + + if (name == kPefStart) { + kCurrentRecord.fKind = CompilerKit::kPefCode; + } + + // now we can tell the code size of the previous kCurrentRecord. + + if (!kRecords.empty()) kRecords[kRecords.size() - 1].fSize = kBytes.size(); + + memset(kCurrentRecord.fName, 0, kAESymbolLen); + memcpy(kCurrentRecord.fName, result.c_str(), result.size()); + + ++kCounter; + + memset(kCurrentRecord.fPad, kAENullType, kAEPad); + + kRecords.emplace_back(kCurrentRecord); + + return true; + } + // public_segment is a special keyword used by Assembler to tell the AE output stage to + // mark this section as a header. it currently supports .code64, .data64., + // .zero64 + else if (CompilerKit::ast_find_needle(line, "public_segment")) { + if (kOutputAsBinary) { + CompilerKit::Detail::print_error("Invalid public_segment directive in flat binary mode.", + "CompilerKit"); + throw std::runtime_error("invalid_public_segment_bin"); + } + + auto name = line.substr(line.find("public_segment") + strlen("public_segment")); + + std::string name_copy = name; + + for (char& j : name) { + if (j == ' ') j = '$'; + } + + if (name.find(".code64") != std::string::npos) { + // data is treated as code. + + name_copy.erase(name_copy.find(".code64"), strlen(".code64")); + kCurrentRecord.fKind = CompilerKit::kPefCode; + } else if (name.find(".data64") != std::string::npos) { + // no code will be executed from here. + + name_copy.erase(name_copy.find(".data64"), strlen(".data64")); + kCurrentRecord.fKind = CompilerKit::kPefData; + } else if (name.find(".zero64") != std::string::npos) { + // this is a bss section. + + name_copy.erase(name_copy.find(".zero64"), strlen(".zero64")); + kCurrentRecord.fKind = CompilerKit::kPefZero; + } + + // this is a special case for the start stub. + // we want this so that li can find it. + + if (name == kPefStart) { + kCurrentRecord.fKind = CompilerKit::kPefCode; + } + + while (name_copy.find(" ") != std::string::npos) name_copy.erase(name_copy.find(" "), 1); + + kOriginLabel.push_back(std::make_pair(name_copy, kOrigin)); + ++kOrigin; + + // now we can tell the code size of the previous kCurrentRecord. + + if (!kRecords.empty()) kRecords[kRecords.size() - 1].fSize = kBytes.size(); + + memset(kCurrentRecord.fName, 0, kAESymbolLen); + memcpy(kCurrentRecord.fName, name.c_str(), name.size()); + + ++kCounter; + + memset(kCurrentRecord.fPad, kAENullType, kAEPad); + + kRecords.emplace_back(kCurrentRecord); + + return true; + } + + return false; +} + +// \brief algorithms and helpers. + +namespace CompilerKit::Detail::algorithm { +// \brief authorize a brief set of characters. +static inline bool is_not_alnum_space(char c) { + return !(isalpha(c) || isdigit(c) || (c == ' ') || (c == '\t') || (c == ',') || (c == '(') || + (c == ')') || (c == '"') || (c == '\'') || (c == '[') || (c == ']') || (c == '+') || + (c == '_') || (c == ':') || (c == '@') || (c == '.')); +} + +bool is_valid_arm64(std::string str) { + return std::find_if(str.begin(), str.end(), is_not_alnum_space) == str.end(); +} +} // namespace CompilerKit::Detail::algorithm + +///////////////////////////////////////////////////////////////////////////////////////// + +// @brief Check for line (syntax check) + +///////////////////////////////////////////////////////////////////////////////////////// + +std::string CompilerKit::EncoderARM64::CheckLine(std::string line, std::string file) { + std::string err_str; + + if (line.empty() || CompilerKit::ast_find_needle(line, "extern_segment") || + CompilerKit::ast_find_needle(line, "public_segment") || line.find('#') != std::string::npos || + CompilerKit::ast_find_needle(line, ";")) { + if (line.find('#') != std::string::npos) { + line.erase(line.find('#')); + } else if (line.find(';') != std::string::npos) { + line.erase(line.find(';')); + } else { + /// does the line contains valid input? + if (!CompilerKit::Detail::algorithm::is_valid_arm64(line)) { + err_str = "Line contains non alphanumeric characters.\nhere -> "; + err_str += line; + } + } + + return err_str; + } + + if (!CompilerKit::Detail::algorithm::is_valid_arm64(line)) { + err_str = "Line contains non alphanumeric characters.\nhere -> "; + err_str += line; + + return err_str; + } + + // check for a valid instruction format. + + if (line.find(',') != std::string::npos) { + if (line.find(',') + 1 == line.size()) { + err_str += "\nInstruction lacks right register, here -> "; + err_str += line.substr(line.find(',')); + + return err_str; + } else { + bool nothing_on_right = true; + + if (line.find(',') + 1 > line.size()) { + err_str += "\nInstruction not complete, here -> "; + err_str += line; + + return err_str; + } + + auto substr = line.substr(line.find(',') + 1); + + for (auto& ch : substr) { + if (ch != ' ' && ch != '\t') { + nothing_on_right = false; + } + } + + // this means we found nothing after that ',' . + if (nothing_on_right) { + err_str += "\nInstruction not complete, here -> "; + err_str += line; + + return err_str; + } + } + } + + return err_str; +} + +bool CompilerKit::EncoderARM64::WriteNumber(const std::size_t& pos, std::string& jump_label) { + if (!isdigit(jump_label[pos])) return false; + + switch (jump_label[pos + 1]) { + case 'x': { + if (auto res = strtol(jump_label.substr(pos + 2).c_str(), nullptr, 16); !res) { + if (errno != 0) { + CompilerKit::Detail::print_error("invalid hex number: " + jump_label, "CompilerKit"); + throw std::runtime_error("invalid_hex"); + } + } + + CompilerKit::NumberCast64 num(strtol(jump_label.substr(pos + 2).c_str(), nullptr, 16)); + + for (char& i : num.number) { + kBytes.push_back(i); + } + + if (kVerbose) { + kStdOut << "AssemblerARM64: found a base 16 number here: " << jump_label.substr(pos) + << "\n"; + } + + return true; + } + case 'b': { + if (auto res = strtol(jump_label.substr(pos + 2).c_str(), nullptr, 2); !res) { + if (errno != 0) { + CompilerKit::Detail::print_error("invalid binary number: " + jump_label, "CompilerKit"); + throw std::runtime_error("invalid_bin"); + } + } + + CompilerKit::NumberCast64 num(strtol(jump_label.substr(pos + 2).c_str(), nullptr, 2)); + + if (kVerbose) { + kStdOut << "AssemblerARM64: found a base 2 number here: " << jump_label.substr(pos) << "\n"; + } + + for (char& i : num.number) { + kBytes.push_back(i); + } + + return true; + } + case 'o': { + if (auto res = strtol(jump_label.substr(pos + 2).c_str(), nullptr, 7); !res) { + if (errno != 0) { + CompilerKit::Detail::print_error("invalid octal number: " + jump_label, "CompilerKit"); + throw std::runtime_error("invalid_octal"); + } + } + + CompilerKit::NumberCast64 num(strtol(jump_label.substr(pos + 2).c_str(), nullptr, 7)); + + if (kVerbose) { + kStdOut << "AssemblerARM64: found a base 8 number here: " << jump_label.substr(pos) << "\n"; + } + + for (char& i : num.number) { + kBytes.push_back(i); + } + + return true; + } + default: { + break; + } + } + + /* check for errno and stuff like that */ + if (auto res = strtol(jump_label.substr(pos).c_str(), nullptr, 10); !res) { + if (errno != 0) { + return false; + } + } + + CompilerKit::NumberCast64 num(strtol(jump_label.substr(pos).c_str(), nullptr, 10)); + + for (char& i : num.number) { + kBytes.push_back(i); + } + + if (kVerbose) { + kStdOut << "AssemblerARM64: found a base 10 number here: " << jump_label.substr(pos) << "\n"; + } + + return true; +} + +///////////////////////////////////////////////////////////////////////////////////////// + +/// @brief Read and write an instruction to the output array. + +///////////////////////////////////////////////////////////////////////////////////////// + +bool CompilerKit::EncoderARM64::WriteLine(std::string line, std::string file) { + if (CompilerKit::ast_find_needle(line, "public_segment")) return false; + + if (!CompilerKit::Detail::algorithm::is_valid_arm64(line)) return false; + + return true; +} + +// Last rev 13-1-24 diff --git a/src/CompilerKit/src/Assemblers/Assembler+PowerPC.cc b/src/CompilerKit/src/Assemblers/Assembler+PowerPC.cc deleted file mode 100644 index 05ca0d7..0000000 --- a/src/CompilerKit/src/Assemblers/Assembler+PowerPC.cc +++ /dev/null @@ -1,904 +0,0 @@ -// Copyright 2024-2025, Amlal El Mahrouss (amlal@nekernel.org) -// Licensed under the Apache License, Version 2.0 (See accompanying -// file LICENSE or copy at http://www.apache.org/licenses/LICENSE-2.0) -// Official repository: https://github.com/nekernel-org/nectar - -///////////////////////////////////////////////////////////////////////////////////////// - -/// @file Assembler+PowerPC.cc -/// @author Amlal El Mahrouss -/// @brief POWER Assembler. - -/// REMINDER: when dealing with an undefined symbol use (string -/// size):LinkerFindSymbol:(string) so that li will look for it. - -///////////////////////////////////////////////////////////////////////////////////////// - -#ifndef __ASM_NEED_PPC__ -#define __ASM_NEED_PPC__ -#endif - -#include -#include -#include -#include -#include -#include -#include - -///////////////////// - -// ANSI ESCAPE CODES - -///////////////////// - -#define kBlank "\e[0;30m" -#define kRed "\e[0;31m" -#define kWhite "\e[0;97m" -#define kYellow "\e[0;33m" - -constexpr auto kPowerIPAlignment = 0x1U; - -static Char kOutputArch = CompilerKit::kPefArchPowerPC; - -static std::size_t kCounter = 1UL; - -static std::uintptr_t kOrigin = kPefBaseOrigin; -static std::vector> kOriginLabel; - -static std::vector kBytes; - -static CompilerKit::AERecordHeader kCurrentRecord{ - .fName = "", .fKind = CompilerKit::kPefCode, .fSize = 0, .fOffset = 0}; - -static std::vector kRecords; -static std::vector kUndefinedSymbols; - -static const std::string kUndefinedSymbol = ":UndefinedSymbol:"; -static const std::string kRelocSymbol = ":RuntimeSymbol:"; - -// \brief forward decl. -static bool asm_read_attributes(std::string line); - -///////////////////////////////////////////////////////////////////////////////////////// - -/// @brief POWER assembler entrypoint, the program/module starts here. - -///////////////////////////////////////////////////////////////////////////////////////// - -NECTAR_MODULE(AssemblerMainPower64) { - CompilerKit::install_signal(SIGSEGV, CompilerKit::Detail::drvi_crash_handler); - - for (size_t i = 1; i < argc; ++i) { - if (argv[i][0] == '-') { - if (strcmp(argv[i], "-version") == 0 || strcmp(argv[i], "-v") == 0) { - kStdOut << "POWER: POWER64 Assembler Driver.\nPOWER: " << kDistVersion - << "\nPOWER: " - "Copyright (c) " - "Amlal El Mahrouss\n"; - return 0; - } else if (strcmp(argv[i], "-h") == 0) { - kStdOut << "POWER: POWER64 Assembler Driver.\nPOWER: Copyright (c) 2024 " - "Amlal El Mahrouss\n"; - kStdOut << "-version,-v: print program version.\n"; - kStdOut << "-fverbose: print verbose output.\n"; - kStdOut << "-fbinary: output as flat binary.\n"; - - return 0; - } else if (strcmp(argv[i], "-fbinary") == 0) { - kOutputAsBinary = true; - continue; - } else if (strcmp(argv[i], "-fverbose") == 0) { - kVerbose = true; - continue; - } - - kStdOut << "POWER: ignore " << argv[i] << "\n"; - continue; - } - - if (!std::filesystem::exists(argv[i])) { - kStdOut << "POWER: can't open: " << argv[i] << std::endl; - goto asm_fail_exit; - } - - std::string object_output(argv[i]); - - for (auto& ext : kAsmFileExts) { - if (object_output.find(ext) != std::string::npos) { - object_output.erase(object_output.find(ext), std::strlen(ext)); - } - } - - object_output += kOutputAsBinary ? kBinaryFileExt : kObjectFileExt; - - std::ifstream file_ptr(argv[i]); - std::ofstream file_ptr_out(object_output, std::ofstream::binary); - - if (file_ptr_out.bad()) { - if (kVerbose) { - kStdOut << "POWER: error: " << strerror(errno) << "\n"; - } - } - - std::string line; - - CompilerKit::AEHeader hdr{0}; - - memset(hdr.fPad, kAENullType, kAEPad); - - hdr.fMagic[0] = kAEMag0; - hdr.fMagic[1] = kAEMag1; - hdr.fMagic[2] = kAEMag2; - hdr.fSize = sizeof(CompilerKit::AEHeader); - hdr.fArch = kOutputArch; - - ///////////////////////////////////////////////////////////////////////////////////////// - - // COMPILATION LOOP - - ///////////////////////////////////////////////////////////////////////////////////////// - - CompilerKit::EncoderPowerPC asm64; - - while (std::getline(file_ptr, line)) { - if (auto ln = asm64.CheckLine(line, argv[i]); !ln.empty()) { - CompilerKit::Detail::print_error(ln, argv[i]); - continue; - } - - try { - asm_read_attributes(line); - asm64.WriteLine(line, argv[i]); - } catch (const std::exception& e) { - if (kVerbose) { - std::string what = e.what(); - CompilerKit::Detail::print_warning("exit because of: " + what, "CompilerKit"); - } - - std::filesystem::remove(object_output); - goto asm_fail_exit; - } - } - - if (!kOutputAsBinary) { - if (kVerbose) { - kStdOut << "POWER: Writing object file...\n"; - } - - // this is the final step, write everything to the file. - - auto pos = file_ptr_out.tellp(); - - hdr.fCount = kRecords.size() + kUndefinedSymbols.size(); - - file_ptr_out << hdr; - - if (kRecords.empty()) { - kStdErr << "POWER: At least one record is needed to write an object " - "file.\nPOWER: Make one using `public_segment .code64 foo_bar`.\n"; - - std::filesystem::remove(object_output); - return 1; - } - - kRecords[kRecords.size() - 1].fSize = kBytes.size(); - - std::size_t record_count = 0UL; - - for (auto& record_hdr : kRecords) { - record_hdr.fFlags |= CompilerKit::kKindRelocationAtRuntime; - record_hdr.fOffset = record_count; - ++record_count; - - file_ptr_out << record_hdr; - - if (kVerbose) kStdOut << "POWER: Wrote record " << record_hdr.fName << "...\n"; - } - - // increment once again, so that we won't lie about the kUndefinedSymbols. - ++record_count; - - for (auto& sym : kUndefinedSymbols) { - CompilerKit::AERecordHeader undefined_sym{0}; - - if (kVerbose) kStdOut << "POWER: Wrote symbol " << sym << " to file...\n"; - - undefined_sym.fKind = kAENullType; - undefined_sym.fSize = sym.size(); - undefined_sym.fOffset = record_count; - - ++record_count; - - memset(undefined_sym.fPad, kAENullType, kAEPad); - memcpy(undefined_sym.fName, sym.c_str(), sym.size()); - - file_ptr_out << undefined_sym; - - ++kCounter; - } - - auto pos_end = file_ptr_out.tellp(); - - file_ptr_out.seekp(pos); - - hdr.fStartCode = pos_end; - hdr.fCodeSize = kBytes.size(); - - file_ptr_out << hdr; - - file_ptr_out.seekp(pos_end); - } else { - if (kVerbose) { - kStdOut << "POWER: Write raw binary...\n"; - } - } - - // byte from byte, we write this. - for (auto& byte : kBytes) { - file_ptr_out.write(reinterpret_cast(&byte), sizeof(byte)); - } - - if (kVerbose) kStdOut << "POWER: Wrote file with program in it.\n"; - - file_ptr_out.flush(); - file_ptr_out.close(); - - if (kVerbose) kStdOut << "POWER: Exit succeeded.\n"; - - return 0; - } - -asm_fail_exit: - - if (kVerbose) kStdOut << "POWER: Exit failed.\n"; - - return NECTAR_EXEC_ERROR; -} - -///////////////////////////////////////////////////////////////////////////////////////// - -// @brief Check for attributes -// returns true if any was found. - -///////////////////////////////////////////////////////////////////////////////////////// - -static bool asm_read_attributes(std::string line) { - // extern_segment is the opposite of public_segment, it signals to the li - // that we need this symbol. - if (CompilerKit::ast_find_needle(line, "extern_segment")) { - if (kOutputAsBinary) { - CompilerKit::Detail::print_error("Invalid extern_segment directive in flat binary mode.", - "CompilerKit"); - throw std::runtime_error("invalid_extern_segment_bin"); - } - - auto name = line.substr(line.find("extern_segment") + strlen("extern_segment") + 1); - - if (name.size() == 0) { - CompilerKit::Detail::print_error("Invalid extern_segment", "CompilerKit"); - throw std::runtime_error("invalid_extern_segment"); - } - - std::string result = std::to_string(name.size()); - result += kUndefinedSymbol; - - // mangle this - for (char& j : name) { - if (j == ' ' || j == ',') j = '$'; - } - - result += name; - - if (name.find(kPefCode64) != std::string::npos) { - // data is treated as code. - kCurrentRecord.fKind = CompilerKit::kPefCode; - } else if (name.find(kPefData64) != std::string::npos) { - // no code will be executed from here. - kCurrentRecord.fKind = CompilerKit::kPefData; - } else if (name.find(kPefZero64) != std::string::npos) { - // this is a bss section. - kCurrentRecord.fKind = CompilerKit::kPefZero; - } - - // this is a special case for the start stub. - // we want this so that li can find it. - - if (name == kPefStart) { - kCurrentRecord.fKind = CompilerKit::kPefCode; - } - - // now we can tell the code size of the previous kCurrentRecord. - - if (!kRecords.empty()) kRecords[kRecords.size() - 1].fSize = kBytes.size(); - - memset(kCurrentRecord.fName, 0, kAESymbolLen); - memcpy(kCurrentRecord.fName, result.c_str(), result.size()); - - ++kCounter; - - memset(kCurrentRecord.fPad, kAENullType, kAEPad); - - kRecords.emplace_back(kCurrentRecord); - - return true; - } - // public_segment is a special keyword used by POWER to tell the AE output stage to - // mark this section as a header. it currently supports .code64, .data64., - // .zero64 - else if (CompilerKit::ast_find_needle(line, "public_segment")) { - if (kOutputAsBinary) { - CompilerKit::Detail::print_error("Invalid public_segment directive in flat binary mode.", - "CompilerKit"); - throw std::runtime_error("invalid_public_segment_bin"); - } - - auto name = line.substr(line.find("public_segment") + strlen("public_segment")); - - std::string name_copy = name; - - for (char& j : name) { - if (j == ' ') j = '$'; - } - - kCurrentRecord.fKind = CompilerKit::kKindExportSymbol; - - if (name.find(kPefCode64) != std::string::npos) { - // data is treated as code. - kCurrentRecord.fKind |= CompilerKit::kPefCode; - } else if (name.find(kPefData64) != std::string::npos) { - // no code will be executed from here. - kCurrentRecord.fKind |= CompilerKit::kPefData; - } else if (name.find(kPefZero64) != std::string::npos) { - // this is a bss section. - kCurrentRecord.fKind |= CompilerKit::kPefZero; - } - - // this is a special case for the start stub. - // we want this so that li can find it. - - if (name == kPefStart) { - kCurrentRecord.fKind = CompilerKit::kPefCode; - } - - while (name_copy.find(" ") != std::string::npos) name_copy.erase(name_copy.find(" "), 1); - - kOriginLabel.push_back(std::make_pair(name_copy, kOrigin)); - ++kOrigin; - - // now we can tell the code size of the previous kCurrentRecord. - - if (!kRecords.empty()) kRecords[kRecords.size() - 1].fSize = kBytes.size(); - - memset(kCurrentRecord.fName, 0, kAESymbolLen); - memcpy(kCurrentRecord.fName, name.c_str(), name.size()); - - ++kCounter; - - memset(kCurrentRecord.fPad, kAENullType, kAEPad); - - kRecords.emplace_back(kCurrentRecord); - - return true; - } - - return false; -} - -// \brief algorithms and helpers. - -namespace CompilerKit::Detail::algorithm { -// \brief authorize a brief set of characters. -static inline bool is_not_alnum_space(char c) { - return !(isalpha(c) || isdigit(c) || (c == ' ') || (c == '\t') || (c == ',') || (c == '(') || - (c == ')') || (c == '"') || (c == '\'') || (c == '[') || (c == ']') || (c == '+') || - (c == '_') || (c == ':') || (c == '@') || (c == '.')); -} - -bool is_valid_power64(std::string str) { - return std::find_if(str.begin(), str.end(), is_not_alnum_space) == str.end(); -} -} // namespace CompilerKit::Detail::algorithm - -///////////////////////////////////////////////////////////////////////////////////////// - -// @brief Check for line (syntax check) - -///////////////////////////////////////////////////////////////////////////////////////// - -std::string CompilerKit::EncoderPowerPC::CheckLine(std::string line, std::string file) { - std::string err_str; - - if (line.empty() || CompilerKit::ast_find_needle(line, "extern_segment") || - CompilerKit::ast_find_needle(line, "public_segment") || line.find('#') != std::string::npos || - CompilerKit::ast_find_needle(line, ";")) { - if (line.find('#') != std::string::npos) { - line.erase(line.find('#')); - } else if (line.find(';') != std::string::npos) { - line.erase(line.find(';')); - } else { - /// does the line contains valid input? - if (!CompilerKit::Detail::algorithm::is_valid_power64(line)) { - err_str = "Line contains non alphanumeric characters.\nhere -> "; - err_str += line; - } - } - - return err_str; - } - - if (!CompilerKit::Detail::algorithm::is_valid_power64(line)) { - err_str = "Line contains non alphanumeric characters.\nhere -> "; - err_str += line; - - return err_str; - } - - // check for a valid instruction format. - - if (line.find(',') != std::string::npos) { - if (line.find(',') + 1 == line.size()) { - err_str += "\nInstruction lacks right register, here -> "; - err_str += line.substr(line.find(',')); - - return err_str; - } else { - bool nothing_on_right = true; - - if (line.find(',') + 1 > line.size()) { - err_str += "\nInstruction not complete, here -> "; - err_str += line; - - return err_str; - } - - auto substr = line.substr(line.find(',') + 1); - - for (auto& ch : substr) { - if (ch != ' ' && ch != '\t') { - nothing_on_right = false; - } - } - - // this means we found nothing after that ',' . - if (nothing_on_right) { - err_str += "\nInstruction not complete, here -> "; - err_str += line; - - return err_str; - } - } - } - - // these do take an argument. - std::vector operands_inst = {"stw", "li"}; - - // these don't. - std::vector filter_inst = {"blr", "bl", "sc"}; - - for (auto& opcode_risc : kOpcodesPowerPC) { - if (CompilerKit::ast_find_needle(line, opcode_risc.name)) { - for (auto& op : operands_inst) { - // if only the instruction was found. - if (line == op) { - err_str += "\nMalformed "; - err_str += op; - err_str += " instruction, here -> "; - err_str += line; - } - } - - // if it is like that -> addr1, 0x0 - if (auto it = std::find(filter_inst.begin(), filter_inst.end(), opcode_risc.name); - it == filter_inst.cend()) { - if (CompilerKit::ast_find_needle(line, opcode_risc.name)) { - if (!isspace(line[line.find(opcode_risc.name) + strlen(opcode_risc.name)])) { - err_str += "\nMissing space between "; - err_str += opcode_risc.name; - err_str += " and operands.\nhere -> "; - err_str += line; - } - } - } - - return err_str; - } - } - - err_str += "Unrecognized instruction: " + line; - - return err_str; -} - -bool CompilerKit::EncoderPowerPC::WriteNumber(const std::size_t& pos, std::string& jump_label) { - if (!isdigit(jump_label[pos])) return false; - - switch (jump_label[pos + 1]) { - case 'x': { - if (auto res = strtol(jump_label.substr(pos + 2).c_str(), nullptr, 16); !res) { - if (errno != 0) { - CompilerKit::Detail::print_error("invalid hex number: " + jump_label, "CompilerKit"); - throw std::runtime_error("invalid_hex"); - } - } - - CompilerKit::NumberCast64 num(strtol(jump_label.substr(pos + 2).c_str(), nullptr, 16)); - - for (char& i : num.number) { - kBytes.push_back(i); - } - - if (kVerbose) { - kStdOut << "POWER: found a base 16 number here: " << jump_label.substr(pos) << "\n"; - } - - return true; - } - case 'b': { - if (auto res = strtol(jump_label.substr(pos + 2).c_str(), nullptr, 2); !res) { - if (errno != 0) { - CompilerKit::Detail::print_error("invalid binary number: " + jump_label, "CompilerKit"); - throw std::runtime_error("invalid_bin"); - } - } - - CompilerKit::NumberCast64 num(strtol(jump_label.substr(pos + 2).c_str(), nullptr, 2)); - - if (kVerbose) { - kStdOut << "POWER: found a base 2 number here: " << jump_label.substr(pos) << "\n"; - } - - for (char& i : num.number) { - kBytes.push_back(i); - } - - return true; - } - case 'o': { - if (auto res = strtol(jump_label.substr(pos + 2).c_str(), nullptr, 7); !res) { - if (errno != 0) { - CompilerKit::Detail::print_error("invalid octal number: " + jump_label, "CompilerKit"); - throw std::runtime_error("invalid_octal"); - } - } - - CompilerKit::NumberCast64 num(strtol(jump_label.substr(pos + 2).c_str(), nullptr, 7)); - - if (kVerbose) { - kStdOut << "POWER: found a base 8 number here: " << jump_label.substr(pos) << "\n"; - } - - for (char& i : num.number) { - kBytes.push_back(i); - } - - return true; - } - default: { - break; - } - } - - /* check for errno and stuff like that */ - if (auto res = strtol(jump_label.substr(pos).c_str(), nullptr, 10); !res) { - if (errno != 0) { - return false; - } - } - - CompilerKit::NumberCast64 num(strtol(jump_label.substr(pos).c_str(), nullptr, 10)); - - for (char& i : num.number) { - kBytes.push_back(i); - } - - if (kVerbose) { - kStdOut << "POWER: found a base 10 number here: " << jump_label.substr(pos) << "\n"; - } - - return true; -} - -///////////////////////////////////////////////////////////////////////////////////////// - -/// @brief Read and write an instruction to the output array. - -///////////////////////////////////////////////////////////////////////////////////////// - -bool CompilerKit::EncoderPowerPC::WriteLine(std::string line, std::string file) { - if (CompilerKit::ast_find_needle(line, "public_segment")) return false; - if (!CompilerKit::Detail::algorithm::is_valid_power64(line)) return false; - - for (auto& opcode_risc : kOpcodesPowerPC) { - // strict check here - if (CompilerKit::ast_find_needle(line, opcode_risc.name)) { - std::string name(opcode_risc.name); - std::string jump_label, cpy_jump_label; - std::vector found_registers_index; - - // check funct7 type. - switch (opcode_risc.ops->type) { - default: { - NumberCast32 num(opcode_risc.opcode); - - for (auto ch : num.number) { - kBytes.emplace_back(ch); - } - break; - } - case BADDR: - case PCREL: { - auto num = GetNumber32(line, name); - - kBytes.emplace_back(num.number[0]); - kBytes.emplace_back(num.number[1]); - kBytes.emplace_back(num.number[2]); - kBytes.emplace_back(0x48); - - break; - } - /// General purpose, float, vector operations. Everything that involve - /// registers. - case G0REG: - case FREG: - case VREG: - case GREG: { - // \brief how many registers we found. - std::size_t found_some_count = 0UL; - std::size_t register_count = 0UL; - std::string opcodeName = opcode_risc.name; - std::size_t register_sum = 0; - - NumberCast64 num(opcode_risc.opcode); - - for (size_t line_index = 0UL; line_index < line.size(); line_index++) { - if (line[line_index] == kAsmRegisterPrefix[0] && isdigit(line[line_index + 1])) { - std::string register_syntax = kAsmRegisterPrefix; - register_syntax += line[line_index + 1]; - - if (isdigit(line[line_index + 2])) register_syntax += line[line_index + 2]; - - std::string reg_str; - reg_str += line[line_index + 1]; - - if (isdigit(line[line_index + 2])) reg_str += line[line_index + 2]; - - // it ranges from r0 to r19 - // something like r190 doesn't exist in the instruction set. - if (isdigit(line[line_index + 3]) && isdigit(line[line_index + 2])) { - reg_str += line[line_index + 3]; - CompilerKit::Detail::print_error( - "invalid register index, r" + reg_str + - "\nnote: The POWER accepts registers from r0 to r32.", - file); - throw std::runtime_error("invalid_register_index"); - } - - // finally cast to a size_t - std::size_t reg_index = strtol(reg_str.c_str(), nullptr, 10); - - if (reg_index > kAsmRegisterLimit) { - CompilerKit::Detail::print_error("invalid register index, r" + reg_str, file); - throw std::runtime_error("invalid_register_index"); - } - - if (opcodeName == "li") { - char numIndex = 0; - - for (size_t i = 0; i != reg_index; i++) { - numIndex += 0x20; - } - - auto num = GetNumber32(line, reg_str); - - kBytes.push_back(num.number[0]); - kBytes.push_back(num.number[1]); - kBytes.push_back(numIndex); - kBytes.push_back(0x38); - - // check if bigger than two. - for (size_t i = 2; i < 4; i++) { - if (num.number[i] > 0) { - CompilerKit::Detail::print_warning("number overflow on li operation.", file); - break; - } - } - - break; - } - - if ((opcodeName[0] == 's' && opcodeName[1] == 't')) { - if (register_sum == 0) { - for (size_t indexReg = 0UL; indexReg < reg_index; ++indexReg) { - register_sum += 0x20; - } - } else { - register_sum += reg_index; - } - } - - if (opcodeName == "mr") { - switch (register_count) { - case 0: { - kBytes.push_back(0x78); - - char numIndex = 0x3; - - for (size_t i = 0; i != reg_index; i++) { - numIndex += 0x8; - } - - kBytes.push_back(numIndex); - - break; - } - case 1: { - char numIndex = 0x1; - - for (size_t i = 0; i != reg_index; i++) { - numIndex += 0x20; - } - - for (size_t i = 0; i != reg_index; i++) { - kBytes[kBytes.size() - 1] += 0x8; - } - - kBytes[kBytes.size() - 1] -= 0x8; - - kBytes.push_back(numIndex); - - if (reg_index >= 10 && reg_index < 20) - kBytes.push_back(0x7d); - else if (reg_index >= 20 && reg_index < 30) - kBytes.push_back(0x7e); - else if (reg_index >= 30) - kBytes.push_back(0x7f); - else - kBytes.push_back(0x7c); - - break; - } - default: - break; - } - - ++register_count; - ++found_some_count; - } - - if (opcodeName == "addi") { - if (found_some_count == 2 || found_some_count == 0) - kBytes.emplace_back(reg_index); - else if (found_some_count == 1) - kBytes.emplace_back(0x00); - - ++found_some_count; - - if (found_some_count > 3) { - CompilerKit::Detail::print_error("Too much registers. -> " + line, file); - throw std::runtime_error("too_much_regs"); - } - } - - if (opcodeName.find("cmp") != std::string::npos) { - ++found_some_count; - - if (found_some_count > 3) { - CompilerKit::Detail::print_error("Too much registers. -> " + line, file); - throw std::runtime_error("too_much_regs"); - } - } - - if (opcodeName.find("mf") != std::string::npos || - opcodeName.find("mt") != std::string::npos) { - char numIndex = 0; - - for (size_t i = 0; i != reg_index; i++) { - numIndex += 0x20; - } - - num.number[2] += numIndex; - - ++found_some_count; - - if (found_some_count > 1) { - CompilerKit::Detail::print_error("Too much registers. -> " + line, file); - throw std::runtime_error("too_much_regs"); - } - - if (kVerbose) { - kStdOut << "POWER: Found register: " << register_syntax << "\n"; - kStdOut << "POWER: Amount of registers in instruction: " << found_some_count - << "\n"; - } - - if (reg_index >= 10 && reg_index < 20) - num.number[3] = 0x7d; - else if (reg_index >= 20 && reg_index < 30) - num.number[3] = 0x7e; - else if (reg_index >= 30) - num.number[3] = 0x7f; - else - num.number[3] = 0x7c; - - for (auto ch : num.number) { - kBytes.emplace_back(ch); - } - } - - found_registers_index.push_back(reg_index); - } - } - - if (opcodeName == "addi") { - kBytes.emplace_back(0x38); - } - - if (opcodeName.find("cmp") != std::string::npos) { - char rightReg = 0x0; - - for (size_t i = 0; i != found_registers_index[1]; i++) { - rightReg += 0x08; - } - - kBytes.emplace_back(0x00); - kBytes.emplace_back(rightReg); - kBytes.emplace_back(found_registers_index[0]); - kBytes.emplace_back(0x7c); - } - - if ((opcodeName[0] == 's' && opcodeName[1] == 't')) { - size_t offset = 0UL; - - if (line.find('+') != std::string::npos) { - auto number = GetNumber32(line.substr(line.find("+")), "+"); - offset = number.raw; - } - - kBytes.push_back(offset); - kBytes.push_back(0x00); - kBytes.push_back(register_sum); - - kBytes.emplace_back(0x90); - } - - if (opcodeName == "mr") { - if (register_count == 1) { - CompilerKit::Detail::print_error("Too few registers. -> " + line, file); - throw std::runtime_error("too_few_registers"); - } - } - - // we're not in immediate addressing, reg to reg. - if (opcode_risc.ops->type != GREG) { - // remember! register to register! - if (found_some_count == 1) { - CompilerKit::Detail::print_error( - "Unrecognized register found.\ntip: each POWER register " - "starts with 'r'.\nline: " + - line, - file); - - throw std::runtime_error("not_a_register"); - } - } - - if (found_some_count < 1 && name[0] != 'l' && name[0] != 's') { - CompilerKit::Detail::print_error( - "invalid combination of opcode and registers.\nline: " + line, file); - throw std::runtime_error("invalid_comb_op_reg"); - } - - break; - } - } - - kOrigin += kPowerIPAlignment; - break; - } - } - - return true; -} - -// Last rev 13-1-24 diff --git a/src/CompilerKit/src/Assemblers/Assembler+PowerPC.cpp b/src/CompilerKit/src/Assemblers/Assembler+PowerPC.cpp new file mode 100644 index 0000000..05ca0d7 --- /dev/null +++ b/src/CompilerKit/src/Assemblers/Assembler+PowerPC.cpp @@ -0,0 +1,904 @@ +// Copyright 2024-2025, Amlal El Mahrouss (amlal@nekernel.org) +// Licensed under the Apache License, Version 2.0 (See accompanying +// file LICENSE or copy at http://www.apache.org/licenses/LICENSE-2.0) +// Official repository: https://github.com/nekernel-org/nectar + +///////////////////////////////////////////////////////////////////////////////////////// + +/// @file Assembler+PowerPC.cc +/// @author Amlal El Mahrouss +/// @brief POWER Assembler. + +/// REMINDER: when dealing with an undefined symbol use (string +/// size):LinkerFindSymbol:(string) so that li will look for it. + +///////////////////////////////////////////////////////////////////////////////////////// + +#ifndef __ASM_NEED_PPC__ +#define __ASM_NEED_PPC__ +#endif + +#include +#include +#include +#include +#include +#include +#include + +///////////////////// + +// ANSI ESCAPE CODES + +///////////////////// + +#define kBlank "\e[0;30m" +#define kRed "\e[0;31m" +#define kWhite "\e[0;97m" +#define kYellow "\e[0;33m" + +constexpr auto kPowerIPAlignment = 0x1U; + +static Char kOutputArch = CompilerKit::kPefArchPowerPC; + +static std::size_t kCounter = 1UL; + +static std::uintptr_t kOrigin = kPefBaseOrigin; +static std::vector> kOriginLabel; + +static std::vector kBytes; + +static CompilerKit::AERecordHeader kCurrentRecord{ + .fName = "", .fKind = CompilerKit::kPefCode, .fSize = 0, .fOffset = 0}; + +static std::vector kRecords; +static std::vector kUndefinedSymbols; + +static const std::string kUndefinedSymbol = ":UndefinedSymbol:"; +static const std::string kRelocSymbol = ":RuntimeSymbol:"; + +// \brief forward decl. +static bool asm_read_attributes(std::string line); + +///////////////////////////////////////////////////////////////////////////////////////// + +/// @brief POWER assembler entrypoint, the program/module starts here. + +///////////////////////////////////////////////////////////////////////////////////////// + +NECTAR_MODULE(AssemblerMainPower64) { + CompilerKit::install_signal(SIGSEGV, CompilerKit::Detail::drvi_crash_handler); + + for (size_t i = 1; i < argc; ++i) { + if (argv[i][0] == '-') { + if (strcmp(argv[i], "-version") == 0 || strcmp(argv[i], "-v") == 0) { + kStdOut << "POWER: POWER64 Assembler Driver.\nPOWER: " << kDistVersion + << "\nPOWER: " + "Copyright (c) " + "Amlal El Mahrouss\n"; + return 0; + } else if (strcmp(argv[i], "-h") == 0) { + kStdOut << "POWER: POWER64 Assembler Driver.\nPOWER: Copyright (c) 2024 " + "Amlal El Mahrouss\n"; + kStdOut << "-version,-v: print program version.\n"; + kStdOut << "-fverbose: print verbose output.\n"; + kStdOut << "-fbinary: output as flat binary.\n"; + + return 0; + } else if (strcmp(argv[i], "-fbinary") == 0) { + kOutputAsBinary = true; + continue; + } else if (strcmp(argv[i], "-fverbose") == 0) { + kVerbose = true; + continue; + } + + kStdOut << "POWER: ignore " << argv[i] << "\n"; + continue; + } + + if (!std::filesystem::exists(argv[i])) { + kStdOut << "POWER: can't open: " << argv[i] << std::endl; + goto asm_fail_exit; + } + + std::string object_output(argv[i]); + + for (auto& ext : kAsmFileExts) { + if (object_output.find(ext) != std::string::npos) { + object_output.erase(object_output.find(ext), std::strlen(ext)); + } + } + + object_output += kOutputAsBinary ? kBinaryFileExt : kObjectFileExt; + + std::ifstream file_ptr(argv[i]); + std::ofstream file_ptr_out(object_output, std::ofstream::binary); + + if (file_ptr_out.bad()) { + if (kVerbose) { + kStdOut << "POWER: error: " << strerror(errno) << "\n"; + } + } + + std::string line; + + CompilerKit::AEHeader hdr{0}; + + memset(hdr.fPad, kAENullType, kAEPad); + + hdr.fMagic[0] = kAEMag0; + hdr.fMagic[1] = kAEMag1; + hdr.fMagic[2] = kAEMag2; + hdr.fSize = sizeof(CompilerKit::AEHeader); + hdr.fArch = kOutputArch; + + ///////////////////////////////////////////////////////////////////////////////////////// + + // COMPILATION LOOP + + ///////////////////////////////////////////////////////////////////////////////////////// + + CompilerKit::EncoderPowerPC asm64; + + while (std::getline(file_ptr, line)) { + if (auto ln = asm64.CheckLine(line, argv[i]); !ln.empty()) { + CompilerKit::Detail::print_error(ln, argv[i]); + continue; + } + + try { + asm_read_attributes(line); + asm64.WriteLine(line, argv[i]); + } catch (const std::exception& e) { + if (kVerbose) { + std::string what = e.what(); + CompilerKit::Detail::print_warning("exit because of: " + what, "CompilerKit"); + } + + std::filesystem::remove(object_output); + goto asm_fail_exit; + } + } + + if (!kOutputAsBinary) { + if (kVerbose) { + kStdOut << "POWER: Writing object file...\n"; + } + + // this is the final step, write everything to the file. + + auto pos = file_ptr_out.tellp(); + + hdr.fCount = kRecords.size() + kUndefinedSymbols.size(); + + file_ptr_out << hdr; + + if (kRecords.empty()) { + kStdErr << "POWER: At least one record is needed to write an object " + "file.\nPOWER: Make one using `public_segment .code64 foo_bar`.\n"; + + std::filesystem::remove(object_output); + return 1; + } + + kRecords[kRecords.size() - 1].fSize = kBytes.size(); + + std::size_t record_count = 0UL; + + for (auto& record_hdr : kRecords) { + record_hdr.fFlags |= CompilerKit::kKindRelocationAtRuntime; + record_hdr.fOffset = record_count; + ++record_count; + + file_ptr_out << record_hdr; + + if (kVerbose) kStdOut << "POWER: Wrote record " << record_hdr.fName << "...\n"; + } + + // increment once again, so that we won't lie about the kUndefinedSymbols. + ++record_count; + + for (auto& sym : kUndefinedSymbols) { + CompilerKit::AERecordHeader undefined_sym{0}; + + if (kVerbose) kStdOut << "POWER: Wrote symbol " << sym << " to file...\n"; + + undefined_sym.fKind = kAENullType; + undefined_sym.fSize = sym.size(); + undefined_sym.fOffset = record_count; + + ++record_count; + + memset(undefined_sym.fPad, kAENullType, kAEPad); + memcpy(undefined_sym.fName, sym.c_str(), sym.size()); + + file_ptr_out << undefined_sym; + + ++kCounter; + } + + auto pos_end = file_ptr_out.tellp(); + + file_ptr_out.seekp(pos); + + hdr.fStartCode = pos_end; + hdr.fCodeSize = kBytes.size(); + + file_ptr_out << hdr; + + file_ptr_out.seekp(pos_end); + } else { + if (kVerbose) { + kStdOut << "POWER: Write raw binary...\n"; + } + } + + // byte from byte, we write this. + for (auto& byte : kBytes) { + file_ptr_out.write(reinterpret_cast(&byte), sizeof(byte)); + } + + if (kVerbose) kStdOut << "POWER: Wrote file with program in it.\n"; + + file_ptr_out.flush(); + file_ptr_out.close(); + + if (kVerbose) kStdOut << "POWER: Exit succeeded.\n"; + + return 0; + } + +asm_fail_exit: + + if (kVerbose) kStdOut << "POWER: Exit failed.\n"; + + return NECTAR_EXEC_ERROR; +} + +///////////////////////////////////////////////////////////////////////////////////////// + +// @brief Check for attributes +// returns true if any was found. + +///////////////////////////////////////////////////////////////////////////////////////// + +static bool asm_read_attributes(std::string line) { + // extern_segment is the opposite of public_segment, it signals to the li + // that we need this symbol. + if (CompilerKit::ast_find_needle(line, "extern_segment")) { + if (kOutputAsBinary) { + CompilerKit::Detail::print_error("Invalid extern_segment directive in flat binary mode.", + "CompilerKit"); + throw std::runtime_error("invalid_extern_segment_bin"); + } + + auto name = line.substr(line.find("extern_segment") + strlen("extern_segment") + 1); + + if (name.size() == 0) { + CompilerKit::Detail::print_error("Invalid extern_segment", "CompilerKit"); + throw std::runtime_error("invalid_extern_segment"); + } + + std::string result = std::to_string(name.size()); + result += kUndefinedSymbol; + + // mangle this + for (char& j : name) { + if (j == ' ' || j == ',') j = '$'; + } + + result += name; + + if (name.find(kPefCode64) != std::string::npos) { + // data is treated as code. + kCurrentRecord.fKind = CompilerKit::kPefCode; + } else if (name.find(kPefData64) != std::string::npos) { + // no code will be executed from here. + kCurrentRecord.fKind = CompilerKit::kPefData; + } else if (name.find(kPefZero64) != std::string::npos) { + // this is a bss section. + kCurrentRecord.fKind = CompilerKit::kPefZero; + } + + // this is a special case for the start stub. + // we want this so that li can find it. + + if (name == kPefStart) { + kCurrentRecord.fKind = CompilerKit::kPefCode; + } + + // now we can tell the code size of the previous kCurrentRecord. + + if (!kRecords.empty()) kRecords[kRecords.size() - 1].fSize = kBytes.size(); + + memset(kCurrentRecord.fName, 0, kAESymbolLen); + memcpy(kCurrentRecord.fName, result.c_str(), result.size()); + + ++kCounter; + + memset(kCurrentRecord.fPad, kAENullType, kAEPad); + + kRecords.emplace_back(kCurrentRecord); + + return true; + } + // public_segment is a special keyword used by POWER to tell the AE output stage to + // mark this section as a header. it currently supports .code64, .data64., + // .zero64 + else if (CompilerKit::ast_find_needle(line, "public_segment")) { + if (kOutputAsBinary) { + CompilerKit::Detail::print_error("Invalid public_segment directive in flat binary mode.", + "CompilerKit"); + throw std::runtime_error("invalid_public_segment_bin"); + } + + auto name = line.substr(line.find("public_segment") + strlen("public_segment")); + + std::string name_copy = name; + + for (char& j : name) { + if (j == ' ') j = '$'; + } + + kCurrentRecord.fKind = CompilerKit::kKindExportSymbol; + + if (name.find(kPefCode64) != std::string::npos) { + // data is treated as code. + kCurrentRecord.fKind |= CompilerKit::kPefCode; + } else if (name.find(kPefData64) != std::string::npos) { + // no code will be executed from here. + kCurrentRecord.fKind |= CompilerKit::kPefData; + } else if (name.find(kPefZero64) != std::string::npos) { + // this is a bss section. + kCurrentRecord.fKind |= CompilerKit::kPefZero; + } + + // this is a special case for the start stub. + // we want this so that li can find it. + + if (name == kPefStart) { + kCurrentRecord.fKind = CompilerKit::kPefCode; + } + + while (name_copy.find(" ") != std::string::npos) name_copy.erase(name_copy.find(" "), 1); + + kOriginLabel.push_back(std::make_pair(name_copy, kOrigin)); + ++kOrigin; + + // now we can tell the code size of the previous kCurrentRecord. + + if (!kRecords.empty()) kRecords[kRecords.size() - 1].fSize = kBytes.size(); + + memset(kCurrentRecord.fName, 0, kAESymbolLen); + memcpy(kCurrentRecord.fName, name.c_str(), name.size()); + + ++kCounter; + + memset(kCurrentRecord.fPad, kAENullType, kAEPad); + + kRecords.emplace_back(kCurrentRecord); + + return true; + } + + return false; +} + +// \brief algorithms and helpers. + +namespace CompilerKit::Detail::algorithm { +// \brief authorize a brief set of characters. +static inline bool is_not_alnum_space(char c) { + return !(isalpha(c) || isdigit(c) || (c == ' ') || (c == '\t') || (c == ',') || (c == '(') || + (c == ')') || (c == '"') || (c == '\'') || (c == '[') || (c == ']') || (c == '+') || + (c == '_') || (c == ':') || (c == '@') || (c == '.')); +} + +bool is_valid_power64(std::string str) { + return std::find_if(str.begin(), str.end(), is_not_alnum_space) == str.end(); +} +} // namespace CompilerKit::Detail::algorithm + +///////////////////////////////////////////////////////////////////////////////////////// + +// @brief Check for line (syntax check) + +///////////////////////////////////////////////////////////////////////////////////////// + +std::string CompilerKit::EncoderPowerPC::CheckLine(std::string line, std::string file) { + std::string err_str; + + if (line.empty() || CompilerKit::ast_find_needle(line, "extern_segment") || + CompilerKit::ast_find_needle(line, "public_segment") || line.find('#') != std::string::npos || + CompilerKit::ast_find_needle(line, ";")) { + if (line.find('#') != std::string::npos) { + line.erase(line.find('#')); + } else if (line.find(';') != std::string::npos) { + line.erase(line.find(';')); + } else { + /// does the line contains valid input? + if (!CompilerKit::Detail::algorithm::is_valid_power64(line)) { + err_str = "Line contains non alphanumeric characters.\nhere -> "; + err_str += line; + } + } + + return err_str; + } + + if (!CompilerKit::Detail::algorithm::is_valid_power64(line)) { + err_str = "Line contains non alphanumeric characters.\nhere -> "; + err_str += line; + + return err_str; + } + + // check for a valid instruction format. + + if (line.find(',') != std::string::npos) { + if (line.find(',') + 1 == line.size()) { + err_str += "\nInstruction lacks right register, here -> "; + err_str += line.substr(line.find(',')); + + return err_str; + } else { + bool nothing_on_right = true; + + if (line.find(',') + 1 > line.size()) { + err_str += "\nInstruction not complete, here -> "; + err_str += line; + + return err_str; + } + + auto substr = line.substr(line.find(',') + 1); + + for (auto& ch : substr) { + if (ch != ' ' && ch != '\t') { + nothing_on_right = false; + } + } + + // this means we found nothing after that ',' . + if (nothing_on_right) { + err_str += "\nInstruction not complete, here -> "; + err_str += line; + + return err_str; + } + } + } + + // these do take an argument. + std::vector operands_inst = {"stw", "li"}; + + // these don't. + std::vector filter_inst = {"blr", "bl", "sc"}; + + for (auto& opcode_risc : kOpcodesPowerPC) { + if (CompilerKit::ast_find_needle(line, opcode_risc.name)) { + for (auto& op : operands_inst) { + // if only the instruction was found. + if (line == op) { + err_str += "\nMalformed "; + err_str += op; + err_str += " instruction, here -> "; + err_str += line; + } + } + + // if it is like that -> addr1, 0x0 + if (auto it = std::find(filter_inst.begin(), filter_inst.end(), opcode_risc.name); + it == filter_inst.cend()) { + if (CompilerKit::ast_find_needle(line, opcode_risc.name)) { + if (!isspace(line[line.find(opcode_risc.name) + strlen(opcode_risc.name)])) { + err_str += "\nMissing space between "; + err_str += opcode_risc.name; + err_str += " and operands.\nhere -> "; + err_str += line; + } + } + } + + return err_str; + } + } + + err_str += "Unrecognized instruction: " + line; + + return err_str; +} + +bool CompilerKit::EncoderPowerPC::WriteNumber(const std::size_t& pos, std::string& jump_label) { + if (!isdigit(jump_label[pos])) return false; + + switch (jump_label[pos + 1]) { + case 'x': { + if (auto res = strtol(jump_label.substr(pos + 2).c_str(), nullptr, 16); !res) { + if (errno != 0) { + CompilerKit::Detail::print_error("invalid hex number: " + jump_label, "CompilerKit"); + throw std::runtime_error("invalid_hex"); + } + } + + CompilerKit::NumberCast64 num(strtol(jump_label.substr(pos + 2).c_str(), nullptr, 16)); + + for (char& i : num.number) { + kBytes.push_back(i); + } + + if (kVerbose) { + kStdOut << "POWER: found a base 16 number here: " << jump_label.substr(pos) << "\n"; + } + + return true; + } + case 'b': { + if (auto res = strtol(jump_label.substr(pos + 2).c_str(), nullptr, 2); !res) { + if (errno != 0) { + CompilerKit::Detail::print_error("invalid binary number: " + jump_label, "CompilerKit"); + throw std::runtime_error("invalid_bin"); + } + } + + CompilerKit::NumberCast64 num(strtol(jump_label.substr(pos + 2).c_str(), nullptr, 2)); + + if (kVerbose) { + kStdOut << "POWER: found a base 2 number here: " << jump_label.substr(pos) << "\n"; + } + + for (char& i : num.number) { + kBytes.push_back(i); + } + + return true; + } + case 'o': { + if (auto res = strtol(jump_label.substr(pos + 2).c_str(), nullptr, 7); !res) { + if (errno != 0) { + CompilerKit::Detail::print_error("invalid octal number: " + jump_label, "CompilerKit"); + throw std::runtime_error("invalid_octal"); + } + } + + CompilerKit::NumberCast64 num(strtol(jump_label.substr(pos + 2).c_str(), nullptr, 7)); + + if (kVerbose) { + kStdOut << "POWER: found a base 8 number here: " << jump_label.substr(pos) << "\n"; + } + + for (char& i : num.number) { + kBytes.push_back(i); + } + + return true; + } + default: { + break; + } + } + + /* check for errno and stuff like that */ + if (auto res = strtol(jump_label.substr(pos).c_str(), nullptr, 10); !res) { + if (errno != 0) { + return false; + } + } + + CompilerKit::NumberCast64 num(strtol(jump_label.substr(pos).c_str(), nullptr, 10)); + + for (char& i : num.number) { + kBytes.push_back(i); + } + + if (kVerbose) { + kStdOut << "POWER: found a base 10 number here: " << jump_label.substr(pos) << "\n"; + } + + return true; +} + +///////////////////////////////////////////////////////////////////////////////////////// + +/// @brief Read and write an instruction to the output array. + +///////////////////////////////////////////////////////////////////////////////////////// + +bool CompilerKit::EncoderPowerPC::WriteLine(std::string line, std::string file) { + if (CompilerKit::ast_find_needle(line, "public_segment")) return false; + if (!CompilerKit::Detail::algorithm::is_valid_power64(line)) return false; + + for (auto& opcode_risc : kOpcodesPowerPC) { + // strict check here + if (CompilerKit::ast_find_needle(line, opcode_risc.name)) { + std::string name(opcode_risc.name); + std::string jump_label, cpy_jump_label; + std::vector found_registers_index; + + // check funct7 type. + switch (opcode_risc.ops->type) { + default: { + NumberCast32 num(opcode_risc.opcode); + + for (auto ch : num.number) { + kBytes.emplace_back(ch); + } + break; + } + case BADDR: + case PCREL: { + auto num = GetNumber32(line, name); + + kBytes.emplace_back(num.number[0]); + kBytes.emplace_back(num.number[1]); + kBytes.emplace_back(num.number[2]); + kBytes.emplace_back(0x48); + + break; + } + /// General purpose, float, vector operations. Everything that involve + /// registers. + case G0REG: + case FREG: + case VREG: + case GREG: { + // \brief how many registers we found. + std::size_t found_some_count = 0UL; + std::size_t register_count = 0UL; + std::string opcodeName = opcode_risc.name; + std::size_t register_sum = 0; + + NumberCast64 num(opcode_risc.opcode); + + for (size_t line_index = 0UL; line_index < line.size(); line_index++) { + if (line[line_index] == kAsmRegisterPrefix[0] && isdigit(line[line_index + 1])) { + std::string register_syntax = kAsmRegisterPrefix; + register_syntax += line[line_index + 1]; + + if (isdigit(line[line_index + 2])) register_syntax += line[line_index + 2]; + + std::string reg_str; + reg_str += line[line_index + 1]; + + if (isdigit(line[line_index + 2])) reg_str += line[line_index + 2]; + + // it ranges from r0 to r19 + // something like r190 doesn't exist in the instruction set. + if (isdigit(line[line_index + 3]) && isdigit(line[line_index + 2])) { + reg_str += line[line_index + 3]; + CompilerKit::Detail::print_error( + "invalid register index, r" + reg_str + + "\nnote: The POWER accepts registers from r0 to r32.", + file); + throw std::runtime_error("invalid_register_index"); + } + + // finally cast to a size_t + std::size_t reg_index = strtol(reg_str.c_str(), nullptr, 10); + + if (reg_index > kAsmRegisterLimit) { + CompilerKit::Detail::print_error("invalid register index, r" + reg_str, file); + throw std::runtime_error("invalid_register_index"); + } + + if (opcodeName == "li") { + char numIndex = 0; + + for (size_t i = 0; i != reg_index; i++) { + numIndex += 0x20; + } + + auto num = GetNumber32(line, reg_str); + + kBytes.push_back(num.number[0]); + kBytes.push_back(num.number[1]); + kBytes.push_back(numIndex); + kBytes.push_back(0x38); + + // check if bigger than two. + for (size_t i = 2; i < 4; i++) { + if (num.number[i] > 0) { + CompilerKit::Detail::print_warning("number overflow on li operation.", file); + break; + } + } + + break; + } + + if ((opcodeName[0] == 's' && opcodeName[1] == 't')) { + if (register_sum == 0) { + for (size_t indexReg = 0UL; indexReg < reg_index; ++indexReg) { + register_sum += 0x20; + } + } else { + register_sum += reg_index; + } + } + + if (opcodeName == "mr") { + switch (register_count) { + case 0: { + kBytes.push_back(0x78); + + char numIndex = 0x3; + + for (size_t i = 0; i != reg_index; i++) { + numIndex += 0x8; + } + + kBytes.push_back(numIndex); + + break; + } + case 1: { + char numIndex = 0x1; + + for (size_t i = 0; i != reg_index; i++) { + numIndex += 0x20; + } + + for (size_t i = 0; i != reg_index; i++) { + kBytes[kBytes.size() - 1] += 0x8; + } + + kBytes[kBytes.size() - 1] -= 0x8; + + kBytes.push_back(numIndex); + + if (reg_index >= 10 && reg_index < 20) + kBytes.push_back(0x7d); + else if (reg_index >= 20 && reg_index < 30) + kBytes.push_back(0x7e); + else if (reg_index >= 30) + kBytes.push_back(0x7f); + else + kBytes.push_back(0x7c); + + break; + } + default: + break; + } + + ++register_count; + ++found_some_count; + } + + if (opcodeName == "addi") { + if (found_some_count == 2 || found_some_count == 0) + kBytes.emplace_back(reg_index); + else if (found_some_count == 1) + kBytes.emplace_back(0x00); + + ++found_some_count; + + if (found_some_count > 3) { + CompilerKit::Detail::print_error("Too much registers. -> " + line, file); + throw std::runtime_error("too_much_regs"); + } + } + + if (opcodeName.find("cmp") != std::string::npos) { + ++found_some_count; + + if (found_some_count > 3) { + CompilerKit::Detail::print_error("Too much registers. -> " + line, file); + throw std::runtime_error("too_much_regs"); + } + } + + if (opcodeName.find("mf") != std::string::npos || + opcodeName.find("mt") != std::string::npos) { + char numIndex = 0; + + for (size_t i = 0; i != reg_index; i++) { + numIndex += 0x20; + } + + num.number[2] += numIndex; + + ++found_some_count; + + if (found_some_count > 1) { + CompilerKit::Detail::print_error("Too much registers. -> " + line, file); + throw std::runtime_error("too_much_regs"); + } + + if (kVerbose) { + kStdOut << "POWER: Found register: " << register_syntax << "\n"; + kStdOut << "POWER: Amount of registers in instruction: " << found_some_count + << "\n"; + } + + if (reg_index >= 10 && reg_index < 20) + num.number[3] = 0x7d; + else if (reg_index >= 20 && reg_index < 30) + num.number[3] = 0x7e; + else if (reg_index >= 30) + num.number[3] = 0x7f; + else + num.number[3] = 0x7c; + + for (auto ch : num.number) { + kBytes.emplace_back(ch); + } + } + + found_registers_index.push_back(reg_index); + } + } + + if (opcodeName == "addi") { + kBytes.emplace_back(0x38); + } + + if (opcodeName.find("cmp") != std::string::npos) { + char rightReg = 0x0; + + for (size_t i = 0; i != found_registers_index[1]; i++) { + rightReg += 0x08; + } + + kBytes.emplace_back(0x00); + kBytes.emplace_back(rightReg); + kBytes.emplace_back(found_registers_index[0]); + kBytes.emplace_back(0x7c); + } + + if ((opcodeName[0] == 's' && opcodeName[1] == 't')) { + size_t offset = 0UL; + + if (line.find('+') != std::string::npos) { + auto number = GetNumber32(line.substr(line.find("+")), "+"); + offset = number.raw; + } + + kBytes.push_back(offset); + kBytes.push_back(0x00); + kBytes.push_back(register_sum); + + kBytes.emplace_back(0x90); + } + + if (opcodeName == "mr") { + if (register_count == 1) { + CompilerKit::Detail::print_error("Too few registers. -> " + line, file); + throw std::runtime_error("too_few_registers"); + } + } + + // we're not in immediate addressing, reg to reg. + if (opcode_risc.ops->type != GREG) { + // remember! register to register! + if (found_some_count == 1) { + CompilerKit::Detail::print_error( + "Unrecognized register found.\ntip: each POWER register " + "starts with 'r'.\nline: " + + line, + file); + + throw std::runtime_error("not_a_register"); + } + } + + if (found_some_count < 1 && name[0] != 'l' && name[0] != 's') { + CompilerKit::Detail::print_error( + "invalid combination of opcode and registers.\nline: " + line, file); + throw std::runtime_error("invalid_comb_op_reg"); + } + + break; + } + } + + kOrigin += kPowerIPAlignment; + break; + } + } + + return true; +} + +// Last rev 13-1-24 diff --git a/src/CompilerKit/src/CodeGenerator+AssemblyFactory.cc b/src/CompilerKit/src/CodeGenerator+AssemblyFactory.cc deleted file mode 100644 index 9a54142..0000000 --- a/src/CompilerKit/src/CodeGenerator+AssemblyFactory.cc +++ /dev/null @@ -1,45 +0,0 @@ -// Copyright 2024-2026, Amlal El Mahrouss (amlal@nekernel.org) -// Licensed under the Apache License, Version 2.0 (See accompanying -// file LICENSE or copy at http://www.apache.org/licenses/LICENSE-2.0) -// Official repository: https://github.com/nekernel-org/nectar - -#include - -/** - * @file AssemblyFactory.cc - * @author Amlal El Mahrouss (amlal@nekernel.org) - * @brief Nectar Code Generation API - * @version 0.0.3 - * @copyright Copyright (c) 2024-2026 Amlal El Mahrouss - * - */ - -namespace CompilerKit { -///! @brief Compile for specific format (ELF, PEF, AE) -Int32 AssemblyFactory::Compile(STLString sourceFile, const Int32& arch) { - if (sourceFile.length() < 1) return NECTAR_UNIMPLEMENTED; - - if (!this->fMounted) return NECTAR_UNIMPLEMENTED; - if (arch != this->fMounted->Arch()) return NECTAR_INVALID_ARCH; - - return this->fMounted->CompileToFormat(sourceFile, arch); -} - -///! @brief mount assembly backend. -void AssemblyFactory::Mount(WeakRef mount_ptr) { - if (mount_ptr && !this->fMounted) { - this->fMounted = mount_ptr.Leak(); - } -} - -///! @brief Unmount assembler. -WeakRef AssemblyFactory::Unmount() noexcept { - auto mount_prev = fMounted; - - if (this->fMounted) { - this->fMounted = nullptr; - } - - return WeakRef{mount_prev}; -} -} // namespace CompilerKit diff --git a/src/CompilerKit/src/CodeGenerator+AssemblyFactory.cpp b/src/CompilerKit/src/CodeGenerator+AssemblyFactory.cpp new file mode 100644 index 0000000..9a54142 --- /dev/null +++ b/src/CompilerKit/src/CodeGenerator+AssemblyFactory.cpp @@ -0,0 +1,45 @@ +// Copyright 2024-2026, Amlal El Mahrouss (amlal@nekernel.org) +// Licensed under the Apache License, Version 2.0 (See accompanying +// file LICENSE or copy at http://www.apache.org/licenses/LICENSE-2.0) +// Official repository: https://github.com/nekernel-org/nectar + +#include + +/** + * @file AssemblyFactory.cc + * @author Amlal El Mahrouss (amlal@nekernel.org) + * @brief Nectar Code Generation API + * @version 0.0.3 + * @copyright Copyright (c) 2024-2026 Amlal El Mahrouss + * + */ + +namespace CompilerKit { +///! @brief Compile for specific format (ELF, PEF, AE) +Int32 AssemblyFactory::Compile(STLString sourceFile, const Int32& arch) { + if (sourceFile.length() < 1) return NECTAR_UNIMPLEMENTED; + + if (!this->fMounted) return NECTAR_UNIMPLEMENTED; + if (arch != this->fMounted->Arch()) return NECTAR_INVALID_ARCH; + + return this->fMounted->CompileToFormat(sourceFile, arch); +} + +///! @brief mount assembly backend. +void AssemblyFactory::Mount(WeakRef mount_ptr) { + if (mount_ptr && !this->fMounted) { + this->fMounted = mount_ptr.Leak(); + } +} + +///! @brief Unmount assembler. +WeakRef AssemblyFactory::Unmount() noexcept { + auto mount_prev = fMounted; + + if (this->fMounted) { + this->fMounted = nullptr; + } + + return WeakRef{mount_prev}; +} +} // namespace CompilerKit diff --git a/src/CompilerKit/src/Compilers/NectarCompiler+AMD64.cc b/src/CompilerKit/src/Compilers/NectarCompiler+AMD64.cc deleted file mode 100644 index 5b246f2..0000000 --- a/src/CompilerKit/src/Compilers/NectarCompiler+AMD64.cc +++ /dev/null @@ -1,1749 +0,0 @@ -// Copyright 2024-2026, Amlal El Mahrouss (amlal@nekernel.org) -// Licensed under the Apache License, Version 2.0 (See accompanying -// file LICENSE or copy at http://www.apache.org/licenses/LICENSE-2.0) -// Official repository: https://github.com/nekernel-org/nectar - -/// BUGS: 0 - -/////////////////////// - -// ANSI ESCAPE CODES // - -/////////////////////// - -/////////////////////// - -// MACROS // - -/////////////////////// - -#include -#include -#include -#include -#include - -/* NeKernel NECTAR Compiler Driver. */ -/* This is part of the CompilerKit. */ -/* (c) Amlal El Mahrouss 2024-2025 */ - -/// @author Amlal El Mahrouss (amlal@nekernel.org) -/// @file NectarCompiler+AMD64.cc -/// @brief NECTAR Compiler Driver. - -///////////////////////////////////// - -// INTERNALS OF THE NECTAR COMPILER - -///////////////////////////////////// - -/// @CompilerKit -/// @brief Avoids relative_path which could discard parts of the original. -std::filesystem::path nectar_expand_home(const std::filesystem::path& input) { - const std::string& raw = input.string(); - - if (!raw.empty() && raw[0] == '~') { - const char* home = std::getenv("HOME"); - if (!home) home = std::getenv("USERPROFILE"); - - if (!home) throw std::runtime_error("Home directory not found in environment variables"); - - return std::filesystem::path(home) / raw.substr(1); - } - - return input; -} - -/// \brief Register map, i.e ({foobar, rbp+48}, etc...) -struct CompilerRegisterMap final { - CompilerKit::STLString fName{}; - CompilerKit::STLString fReg{}; -}; - -/// \brief Offsets of struct and classes. -struct CompilerStructMap final { - CompilerKit::STLString fName{}; - CompilerKit::STLString fReg{}; - std::vector> fOffsets; -}; - -/// \brief State machine of the compiler. -struct CompilerState final { - std::vector fStackMapVector; - std::vector fStructMapVector; - CompilerKit::STLString fLastFile{}; - CompilerKit::STLString fLastError{}; -}; - -static CompilerState kState; - -/// \brief Embed Scope of a class. -static Int32 kOnClassScope = 0; - -///////////////////////////////////////////////////////////////////////////////////////// - -// NEW DATA STRUCTURES FOR NECTAR SUPPORT - -///////////////////////////////////////////////////////////////////////////////////////// - -/// \brief Scope kind enumeration -enum class ScopeKind { - kScopeGlobal, - kScopeNamespace, - kScopeClass, - kScopeFunction, -}; - -/// \brief Variable location enumeration -enum class VarLocation { - kRegister, - kStack, - kStackSpill, -}; - -/// \brief Compiler scope information -struct CompilerScope { - ScopeKind fKind{ScopeKind::kScopeGlobal}; - CompilerKit::STLString fName{}; - CompilerKit::STLString fMangledPrefix{}; -}; - -/// \brief Extended variable information -struct VariableInfo { - CompilerKit::STLString fName{}; - VarLocation fLocation{VarLocation::kRegister}; - Int32 fStackOffset{0}; - CompilerKit::STLString fRegister{}; - Int32 fSize{8}; - bool fIsParameter{false}; - CompilerKit::STLString fTypeName{}; - UInt32 fLastUsed{0}; - bool fIsConstant{false}; -}; - -/// \brief Complete compiler context -struct CompilerContext { - std::vector fScopeStack; - std::vector fVariables; - std::vector fStructMapVector; - CompilerKit::STLString fLastFile{}; - CompilerKit::STLString fLastError{}; - Int32 fStackOffset{0}; - Int32 fMaxStackUsed{0}; - UInt32 fInstructionCounter{0}; -}; - -/// \brief Global compiler context (replaces kState) -static CompilerContext kContext; - -///////////////////////////////////////////////////////////////////////////////////////// - -/// \brief Target architecture. -/// \note This shall never change. -static Int32 kMachine = CompilerKit::AssemblyFactory::kArchAMD64; - -///////////////////////////////////////// - -// ARGUMENT REGISTERS (R8, R15) - -///////////////////////////////////////// - -static std::vector kKeywords; - -///////////////////////////////////////// - -// COMPILER PARSING UTILITIES/STATES. - -///////////////////////////////////////// - -static CompilerKit::AssemblyFactory kAssembler; -static bool kInStruct = false; -static bool kOnWhileLoop = false; -static bool kOnForLoop = false; -static bool kInBraces = false; -static size_t kBracesCount = 0UL; - -/// \brief NASM output support: track defined and external symbols -static std::set kDefinedSymbols; -static std::set kExternalSymbols; - -///////////////////////////////////////////////////////////////////////////////////////// - -// HELPER FUNCTION DECLARATIONS - -///////////////////////////////////////////////////////////////////////////////////////// - -// Scope management -static void nectar_push_scope(ScopeKind kind, const CompilerKit::STLString& name); -static void nectar_pop_scope(); - -// Name mangling -static std::vector nectar_extract_function_args( - const CompilerKit::STLString& text); -static CompilerKit::STLString nectar_mangle_name( - const CompilerKit::STLString& identifier, const std::vector& args = {}); - -// Stack frame management -static CompilerKit::STLString nectar_generate_prologue(); -static CompilerKit::STLString nectar_generate_epilogue(); -static Int32 nectar_allocate_stack_variable(const CompilerKit::STLString& var_name, Int32 size = 8, - bool is_constant = false); - -// Register allocation -static CompilerKit::STLString nectar_allocate_register(const CompilerKit::STLString& var_name); -static CompilerKit::STLString nectar_spill_lru_variable(); -static VariableInfo* nectar_find_variable(const CompilerKit::STLString& var_name); -static CompilerKit::STLString nectar_get_variable_ref(const CompilerKit::STLString& var_name, - bool lookup = false); - -// Impl management -static void nectar_add_impl_member(const CompilerKit::STLString& class_name, - const CompilerKit::STLString& member_name, Int32 size); -static Int32 nectar_get_impl_size(const CompilerKit::STLString& class_name); -static CompilerKit::STLString nectar_generate_constructor_call( - const CompilerKit::STLString& class_name, const CompilerKit::STLString& obj_name); -static CompilerKit::STLString nectar_generate_destructor_call( - const CompilerKit::STLString& class_name, const CompilerKit::STLString& obj_name); - -// PEF calling convention -static void nectar_process_function_parameters(const std::vector& args); - -///////////////////////////////////////////////////////////////////////////////////////// - -/* \brief NECTAR compiler backend for the NeKernel NECTAR driver */ -class CompilerFrontendNectarAMD64 final CK_COMPILER_FRONTEND { - public: - explicit CompilerFrontendNectarAMD64() = default; - ~CompilerFrontendNectarAMD64() override = default; - - NECTAR_COPY_DEFAULT(CompilerFrontendNectarAMD64); - - /// \brief Parse Nectar symbols and syntax. - CompilerKit::SyntaxLeafList::SyntaxLeaf Compile(CompilerKit::STLString& text, - const CompilerKit::STLString& file) override; - - /// \brief Returns the language name. - /// \return Language name. - const char* Language() override; - - public: - /// \brief Parse NECTAR namespaces and Impls. - /// \param CompilerKit::SyntaxLeafList::SyntaxLeaf the leaf to build upon. - CompilerKit::SyntaxLeafList::SyntaxLeaf CompileLayout(CompilerKit::STLString& text, - const CompilerKit::STLString& file, - CompilerKit::SyntaxLeafList::SyntaxLeaf&); -}; - -/// @internal compiler variables - -static CompilerFrontendNectarAMD64* kFrontend = nullptr; - -static std::vector kRegisterList = { - "rbx", "rsi", "r10", "r11", "r12", "r13", "r14", "r15", "xmm12", "xmm13", "xmm14", "xmm15", -}; - -/// @brief The PEF calling convention (caller must save rax, rbp) -/// @note callee must return via **rax**. -/// @note caller must read **rax** to grab return value. -static std::vector kRegisterConventionCallList = { - "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", -}; - -static std::size_t kFunctionEmbedLevel{}; -static CompilerKit::STLString kCurrentIfSymbol{}; -static CompilerKit::STLString kCurrentReturnAddress{}; -static bool kCurrentIfCondition{false}; - -/// detail namespaces - -const char* CompilerFrontendNectarAMD64::Language() { - return "NeKernel Nectar"; -} - -static std::uintptr_t kOrigin = kPefBaseOrigin; -static std::vector> kOriginMap; - -///////////////////////////////////////////////////////////////////////////////////////// - -/// @name Compile -/// @brief Generate assembly from a NECTAR source. - -///////////////////////////////////////////////////////////////////////////////////////// - -static auto nectar_get_impl_member(const CompilerKit::STLString& class_name, - const CompilerKit::STLString& member_name) { - // Find or create struct map entry - for (auto& sm : kContext.fStructMapVector) { - if (sm.fName == class_name) { - return sm; - } - } - - return CompilerStructMap{}; -} - -CompilerKit::SyntaxLeafList::SyntaxLeaf CompilerFrontendNectarAMD64::Compile( - CompilerKit::STLString& text, const CompilerKit::STLString& file) { - CompilerKit::SyntaxLeafList::SyntaxLeaf syntax_tree; - CompilerKit::STLString syntax_rem_buffer; - - if (text.empty()) return syntax_tree; - - std::size_t index{}; - std::vector> keywords_list; - - for (auto& keyword : kKeywords) { - if (text.find(keyword.fKeywordName) != std::string::npos) { - switch (keyword.fKeywordKind) { - default: - break; - } - - std::size_t pos = text.find(keyword.fKeywordName); - if (pos == std::string::npos) continue; - - // can't go before start of string - if (pos > 0 && text[pos - 1] == '+' && - keyword.fKeywordKind == CompilerKit::KeywordKind::kKeywordKindVariableAssign) - continue; - - if (pos > 0 && text[pos - 1] == '-' && - keyword.fKeywordKind == CompilerKit::KeywordKind::kKeywordKindVariableAssign) - continue; - - // don't go out of range - if ((pos + keyword.fKeywordName.size()) < text.size() && - text[pos + keyword.fKeywordName.size()] == '=' && - keyword.fKeywordKind == CompilerKit::KeywordKind::kKeywordKindVariableAssign) - continue; - - keywords_list.emplace_back(std::make_pair(keyword, index)); - ++index; - } - } - - for (auto& keyword : keywords_list) { - if (text.find(keyword.first.fKeywordName) == CompilerKit::STLString::npos) continue; - - switch (keyword.first.fKeywordKind) { - case CompilerKit::KeywordKind::kKeywordKindImpl: { - ++kOnClassScope; - break; - } - case CompilerKit::KeywordKind::kKeywordKindIf: { - kCurrentIfCondition = true; - - std::size_t keywordPos = text.find(keyword.first.fKeywordName); - std::size_t openParen = text.find("("); - std::size_t closeParen = text.find("):"); - - if (keywordPos == CompilerKit::STLString::npos || - openParen == CompilerKit::STLString::npos || - closeParen == CompilerKit::STLString::npos || closeParen <= openParen) { - CompilerKit::Detail::print_error("Malformed if expression: " + text, file); - break; - } - - auto left = text.substr(openParen + 1, closeParen - openParen - 1); - - while (left.find(" ") != CompilerKit::STLString::npos) { - left.erase(left.find(" "), 1); - } - - std::vector> operators = { - {"=:", "jne"}, - {"!=:", "je"}, - {">:", "jl"}, - {"<:", "jg"}, - }; - - for (auto& op : operators) { - if (left.find(op.first) == CompilerKit::STLString::npos) continue; - - auto right = left.substr(left.find(op.first) + op.first.size()); - - if (auto res = right.find(":"); res != CompilerKit::STLString::npos) right.erase(res); - - auto tmp = left.substr(0, left.find(op.first)); - - while (tmp.find(" ") != CompilerKit::STLString::npos) tmp.erase(tmp.find(" "), 1); - - while (right.find(" ") != CompilerKit::STLString::npos) right.erase(right.find(" "), 1); - - if (auto var = nectar_find_variable(tmp); var) { - syntax_tree.fUserValue += - "mov rdi, qword [rbp+" + std::to_string(-var->fStackOffset) + "]\n"; - delete var; - } else { - if (!isnumber(tmp[0])) { - CompilerKit::Detail::print_warning("Variable not found, treating as symbol: " + tmp, - file); - } - - syntax_tree.fUserValue += "mov rdi, " + tmp + "\n"; - } - - if (auto var = nectar_find_variable(right); var) { - syntax_tree.fUserValue += - "mov rsi, qword [rbp+" + std::to_string(-var->fStackOffset) + "]\n"; - delete var; - } - - else { - if (!isnumber(right[0])) { - CompilerKit::Detail::print_warning("Variable not found, treating as symbol: " + right, - file); - } - - syntax_tree.fUserValue += "mov rsi, " + right + "\n"; - } - - syntax_tree.fUserValue += "cmp rdi, rsi\n"; - - syntax_tree.fUserValue += - op.second + " __ret_" + std::to_string(kOrigin) + "_" + kCurrentIfSymbol + "\n"; - - kCurrentIfSymbol = std::to_string(kOrigin) + "_" + kCurrentIfSymbol; - - ++kOrigin; - } - - break; - } - case CompilerKit::KeywordKind::kKeywordKindImplInit: { - if (text.find(":= ") == CompilerKit::STLString::npos) - CompilerKit::Detail::print_error("Invalid invokation of Init.", file); - - auto res = text.substr(text.find(":= ") + strlen(":= ")); - - if (auto tmp = res.find("{}"); tmp) { - if (tmp == CompilerKit::STLString::npos) { - break; - } - - res.erase(tmp); - } - - syntax_tree.fUserValue += "call __NECTAR_M_" + res + "\n"; - break; - } - case CompilerKit::KeywordKind::kKeywordKindFunctionStart: { - for (auto& ch : text) { - if (isnumber(ch)) { - goto dont_accept_func; - } - } - - goto accept_func; - - dont_accept_func: - break; - - accept_func: { - if (kFunctionEmbedLevel > 0) - CompilerKit::Detail::print_error("Clojures are a work in progress feature.", file); - - CompilerKit::STLString symbol_name_fn = text; - size_t indexFnName = 0; - - // this one is for the type. - for (auto& ch : text) { - ++indexFnName; - - if (ch == '\t') break; - if (ch == ' ') break; - } - - symbol_name_fn = text.substr(indexFnName); - - if (text.find("return ") != CompilerKit::STLString::npos) { - text.erase(0, text.find("return ")); - break; - } - - // Check if this is a function call (ends with ;) - if (text.ends_with(");")) { - // Handle function call/jump - auto it = std::find_if( - kOriginMap.begin(), kOriginMap.end(), - [&symbol_name_fn](std::pair pair) -> bool { - return symbol_name_fn.find(pair.first) != CompilerKit::STLString::npos; - }); - - if (it != kOriginMap.end()) { - std::stringstream ss; - ss << std::hex << it->second; - - syntax_tree.fUserValue += "jmp " + ss.str() + "\n"; - } - break; - } - - indexFnName = 0; - - // Extract clean function name - CompilerKit::STLString cleanFnName = symbol_name_fn; - - if (cleanFnName.find("(") != CompilerKit::STLString::npos) { - cleanFnName = cleanFnName.substr(0, cleanFnName.find("(")); - } - - // Remove whitespace/tabs - while (!cleanFnName.empty() && (cleanFnName.back() == ' ' || cleanFnName.back() == '\t')) { - cleanFnName.pop_back(); - } - while (!cleanFnName.empty() && - (cleanFnName.front() == ' ' || cleanFnName.front() == '\t')) { - cleanFnName.erase(0, 1); - } - - // Extract function arguments - auto args = nectar_extract_function_args(text); - - // Generate mangled name - auto mangled_name = nectar_mangle_name(cleanFnName, args); - - // Generate function label and prologue - - while (mangled_name.find(" ") != CompilerKit::STLString::npos) { - mangled_name.erase(mangled_name.find(" "), 1); - } - - // Track defined symbol for NASM extern resolution - kDefinedSymbols.insert(mangled_name); - - if (mangled_name.starts_with("__NECTAR") == false) { - mangled_name = "_" + mangled_name; - } - - if (!kNasmOutput) - syntax_tree.fUserValue += "public_segment .code64 " + mangled_name + "\n"; - else - syntax_tree.fUserValue += - "section .text\nglobal " + mangled_name + "\n" + mangled_name + ":\n"; - - syntax_tree.fUserValue += nectar_generate_prologue(); - - // Initialize function-local state - kContext.fVariables.clear(); - kContext.fStackOffset = 0; - kContext.fMaxStackUsed = 0; - - // Process function parameters - nectar_process_function_parameters(args); - - // Push function scope - nectar_push_scope(ScopeKind::kScopeFunction, cleanFnName); - - ++kFunctionEmbedLevel; - - kOriginMap.push_back({mangled_name, kOrigin}); - ++kOrigin; - - break; - } - } - case CompilerKit::KeywordKind::kKeywordKindFunctionEnd: { - if (kOnClassScope) --kOnClassScope; - - if (text.ends_with(";")) break; - - if (kFunctionEmbedLevel) { - --kFunctionEmbedLevel; - } - - // Pop function scope - nectar_pop_scope(); - - break; - } - case CompilerKit::KeywordKind::kKeywordKindDelete: { - if (auto pos = syntax_tree.fUserValue.find(keyword.first.fKeywordName); - pos != CompilerKit::STLString::npos) - if (!kNasmOutput) { - syntax_tree.fUserValue.replace(pos, keyword.first.fKeywordName.size(), - "__operator_delete"); - } - continue; - } - case CompilerKit::KeywordKind::kKeywordKindNew: { - if (auto pos = syntax_tree.fUserValue.find(keyword.first.fKeywordName); - pos != CompilerKit::STLString::npos) { - if (!kNasmOutput) { - syntax_tree.fUserValue.replace(pos, keyword.first.fKeywordName.size(), - "__operator_new"); - } - } - - continue; - } - case CompilerKit::KeywordKind::kKeywordKindAccess: - case CompilerKit::KeywordKind::kKeywordKindFunctionAccess: - case CompilerKit::KeywordKind::kKeywordKindAccessChecked: { - if (text.find("return ") != CompilerKit::STLString::npos) { - break; - } - - if (text.find("if ") != CompilerKit::STLString::npos) { - break; - } - - if (text.find("const ") != CompilerKit::STLString::npos) { - break; - } - - if (text.find("let ") != CompilerKit::STLString::npos) { - break; - } - - if (text.find("):") != CompilerKit::STLString::npos) { - break; - } - - CompilerKit::STLString valueOfVar = - text.substr(text.find(keyword.first.fKeywordName) + keyword.first.fKeywordName.size()); - - CompilerKit::STLString args; - - if (valueOfVar.find("{") != CompilerKit::STLString::npos) { - break; - } - - if (CompilerKit::KeywordKind::kKeywordKindFunctionAccess == keyword.first.fKeywordKind) - args = text.substr(text.find(keyword.first.fKeywordName)); - else - args = valueOfVar.substr(valueOfVar.find("(") + 1); - - auto nameVar = text.substr(0, text.find(keyword.first.fKeywordName)); - - while (nameVar.find(" ") != CompilerKit::STLString::npos) { - nameVar.erase(nameVar.find(" "), 1); - } - - while (nameVar.find("\t") != CompilerKit::STLString::npos) { - nameVar.erase(nameVar.find("\t"), 1); - } - - auto method = text.substr(0, text.find(keyword.first.fKeywordName)); - - if (method.find("let ") != CompilerKit::STLString::npos) { - method.erase(0, method.find("let ") + strlen("let ")); - } else if (method.find("const ") != CompilerKit::STLString::npos) { - method.erase(0, method.find("const ") + strlen("const ")); - } - - if (method.find(":=") != CompilerKit::STLString::npos) { - method.erase(0, method.find(":=") + strlen(":=")); - } - - while (method.find(" ") != CompilerKit::STLString::npos) { - method.erase(method.find(" "), 1); - } - - if (!nectar_get_variable_ref(nameVar).empty()) - syntax_tree.fUserValue += "lea r8, " + nectar_get_variable_ref(nameVar) + "\n"; - - if (CompilerKit::KeywordKind::kKeywordKindFunctionAccess != keyword.first.fKeywordKind) - method = valueOfVar.erase(valueOfVar.find("(")); - - valueOfVar += "\n"; - - CompilerKit::STLString arg; - auto index = 9; - auto cnter = 0; - - CompilerKit::STLString buf; - - for (auto& ch : args) { - if (ch == ',' || ch == ')') { - if (index <= 15) { - auto val = nectar_get_variable_ref(arg); - - if (val.empty()) { - val = arg; - - while (val.find(" ") != CompilerKit::STLString::npos) { - val.erase(val.find(" "), 1); - } - - if (!isnumber(val[0])) { - val = "0x0"; - } - } - - if (!arg.empty()) buf += "mov r" + std::to_string(index) + ", " + val + "\n"; - - arg.clear(); - ++index; - ++cnter; - } - - continue; - } - - arg += ch; - } - - if (!nectar_get_variable_ref(nameVar).empty()) { - if (!kNasmOutput) { - syntax_tree.fUserValue += buf; - syntax_tree.fUserValue += "call "; - syntax_tree.fUserValue += - (keyword.first.fKeywordName.ends_with('>') ? " __ptr __offset " : " __offset ") + - nectar_get_variable_ref(nameVar) + method + "\n"; - } else { - // NASM: Generate standard call through computed address - auto varRef = nectar_get_variable_ref(nameVar); - - if (keyword.first.fKeywordName.ends_with('>')) { - // Pointer dereference: load pointer then call through it - syntax_tree.fUserValue += "mov rax, " + varRef + "\n"; - syntax_tree.fUserValue += "call [rax + " + method + "]\n"; - } else { - // Direct offset call - syntax_tree.fUserValue += "lea rax, " + varRef + "\n"; - syntax_tree.fUserValue += "call [rax + " + method + "]\n"; - } - } - } else { - auto res = buf; - if (method.starts_with("__NECTAR") == false) - res += "call _" + method + "\n"; - else - res += "call " + method + "\n"; - - res += syntax_rem_buffer; - - syntax_tree.fUserValue += res; - } - - break; - } - case CompilerKit::KeywordKind::kKeywordKindEndLine: - case CompilerKit::KeywordKind::kKeywordKindVariableInc: - case CompilerKit::KeywordKind::kKeywordKindVariableDec: - case CompilerKit::KeywordKind::kKeywordKindVariableAssign: { - CompilerKit::STLString valueOfVar = ""; - - if (keyword.first.fKeywordKind == CompilerKit::KeywordKind::kKeywordKindVariableInc) { - valueOfVar = text.substr(text.find("+=") + 2); - } else if (keyword.first.fKeywordKind == - CompilerKit::KeywordKind::kKeywordKindVariableDec) { - valueOfVar = text.substr(text.find("-=") + 2); - } else if (keyword.first.fKeywordKind == - CompilerKit::KeywordKind::kKeywordKindVariableAssign) { - valueOfVar = text.substr(text.find(keyword.first.fKeywordName) + - keyword.first.fKeywordName.size()); - } else if (keyword.first.fKeywordKind == CompilerKit::KeywordKind::kKeywordKindEndLine) { - break; - } - - if (valueOfVar.empty()) { - CompilerKit::Detail::print_error("Undefined Right-Value for variable", file); - } - - while (valueOfVar.find(";") != CompilerKit::STLString::npos && - keyword.first.fKeywordKind != CompilerKit::KeywordKind::kKeywordKindEndLine) { - valueOfVar.erase(valueOfVar.find(";")); - } - - CompilerKit::STLString varName = text; - - if (keyword.first.fKeywordKind == CompilerKit::KeywordKind::kKeywordKindVariableInc) { - varName.erase(varName.find("+=")); - } else if (keyword.first.fKeywordKind == - CompilerKit::KeywordKind::kKeywordKindVariableDec) { - varName.erase(varName.find("-=")); - } else if (keyword.first.fKeywordKind == - CompilerKit::KeywordKind::kKeywordKindVariableAssign) { - varName.erase(varName.find(keyword.first.fKeywordName)); - } else if (keyword.first.fKeywordKind == CompilerKit::KeywordKind::kKeywordKindEndLine) { - varName.erase(varName.find(";")); - } - - static bool typeFound = false; - - for (auto& keyword : kKeywords) { - if (keyword.fKeywordKind == CompilerKit::KeywordKind::kKeywordKindVariable) { - if (text.find(keyword.fKeywordName) != CompilerKit::STLString::npos) { - if (text[text.find(keyword.fKeywordName)] == ' ') { - typeFound = false; - continue; - } - - typeFound = true; - } - } - } - - CompilerKit::STLString instr = "mov "; - - std::vector newVars; - - if (typeFound && - keyword.first.fKeywordKind != CompilerKit::KeywordKind::kKeywordKindVariableInc && - keyword.first.fKeywordKind != CompilerKit::KeywordKind::kKeywordKindVariableDec) { - // Remove whitespace only (keep operators and quotes) - while (!valueOfVar.empty() && (valueOfVar[0] == ' ' || valueOfVar[0] == '\t')) { - valueOfVar.erase(0, 1); - } - } - - if (keyword.second > 0 && kKeywords[keyword.second - 1].fKeywordKind == - CompilerKit::KeywordKind::kKeywordKindVariable) { - syntax_tree.fUserValue += "\n"; - continue; - } - - if (keyword.first.fKeywordKind == CompilerKit::KeywordKind::kKeywordKindEndLine) { - syntax_tree.fUserValue += "\n"; - continue; - } - - if (keyword.first.fKeywordKind == CompilerKit::KeywordKind::kKeywordKindVariableInc) { - instr = "add "; - } else if (keyword.first.fKeywordKind == - CompilerKit::KeywordKind::kKeywordKindVariableDec) { - instr = "sub "; - } - - CompilerKit::STLString varErrCpy = varName; - - std::size_t indxReg = 0UL; - - while (!valueOfVar.empty() && (valueOfVar[0] == ' ' || valueOfVar[0] == '\t')) { - valueOfVar.erase(0, 1); - } - - while (valueOfVar.find(" ") != CompilerKit::STLString::npos) { - valueOfVar.erase(valueOfVar.find(" "), 1); - } - - while (valueOfVar.find("\t") != CompilerKit::STLString::npos) { - valueOfVar.erase(valueOfVar.find("\t"), 1); - } - - auto pos = 0; - - if (varName.find("let ") != CompilerKit::STLString::npos) { - pos = varName.find("let "); - varName = varName.substr(pos + std::string{"let "}.size()); - } - - while (varName.find(" ") != CompilerKit::STLString::npos) { - varName.erase(varName.find(" "), 1); - } - - while (varName.find("\t") != CompilerKit::STLString::npos) { - varName.erase(varName.find("\t"), 1); - } - - nectar_allocate_stack_variable(varName, 8, - text.find("const ") != CompilerKit::STLString::npos); - - CompilerKit::STLString mangled; - - if (valueOfVar.find(".") != CompilerKit::STLString::npos) { - if (!kNasmOutput) { - CompilerKit::STLString value = "__offset "; - valueOfVar.erase(0, valueOfVar.find(".") + strlen(".")); - valueOfVar.insert(0, value, value.size()); - } else { - valueOfVar.erase(0, valueOfVar.find(".") + strlen(".")); - } - - mangled = "__NECTAR_SM_"; - } - - if (valueOfVar.find("->") != CompilerKit::STLString::npos) { - if (!kNasmOutput) { - CompilerKit::STLString value = "__ptr __offset "; - valueOfVar.erase(0, valueOfVar.find("->") + strlen("->")); - valueOfVar.insert(0, value, value.size()); - } else { - valueOfVar.erase(0, valueOfVar.find("->") + strlen("->")); - } - mangled = "__NECTAR_RM_"; - } - - if (valueOfVar.find(")") != CompilerKit::STLString::npos) { - if (valueOfVar.find("(") != CompilerKit::STLString::npos) - valueOfVar.erase(valueOfVar.find("(")); - - if (!valueOfVar.empty()) { - // Track as potential external symbol for NASM - kExternalSymbols.insert(mangled + valueOfVar); - - if (!kNasmOutput) { - if (valueOfVar.ends_with(")") && - valueOfVar.find("->") != CompilerKit::STLString::npos || - valueOfVar.find(".") != CompilerKit::STLString::npos) - syntax_tree.fUserValue += instr + nectar_get_variable_ref(varName) + - ", __thiscall " + mangled + valueOfVar + "\n"; - else - syntax_tree.fUserValue += - instr + nectar_get_variable_ref(varName) + ", " + mangled + valueOfVar + "\n"; - } else { - syntax_rem_buffer = instr + nectar_get_variable_ref(varName) + ", rax\n"; - } - } - - break; - } - - if (valueOfVar.ends_with("{}")) valueOfVar = "rax"; // impl init returns back to rax. - - syntax_tree.fUserValue += - instr + nectar_get_variable_ref(varName) + ", " + valueOfVar + "\n"; - - break; - } - case CompilerKit::KeywordKind::kKeywordKindExport: { - auto tmp = text; - - if (tmp.find(";") != CompilerKit::STLString::npos) tmp.erase(tmp.find(";")); - - while (tmp.find(" ") != CompilerKit::STLString::npos) { - tmp.erase(tmp.find(" "), 1); - } - - if (!kNasmOutput) - syntax_tree.fUserValue += - "public_segment .code64 _" + - tmp.substr(tmp.find(keyword.first.fKeywordName) + keyword.first.fKeywordName.size()) + - "\n"; - else - syntax_tree.fUserValue += - "section .text\nglobal _" + - tmp.substr(tmp.find(keyword.first.fKeywordName) + keyword.first.fKeywordName.size()) + - "\n"; - - break; - } - case CompilerKit::KeywordKind::kKeywordKindImport: { - auto tmp = text; - - if (tmp.find(";") != CompilerKit::STLString::npos) tmp.erase(tmp.find(";")); - - while (tmp.find(" ") != CompilerKit::STLString::npos) { - tmp.erase(tmp.find(" "), 1); - } - - if (!kNasmOutput) - syntax_tree.fUserValue += - "extern_segment .zero64 _" + - tmp.substr(tmp.find(keyword.first.fKeywordName) + keyword.first.fKeywordName.size()) + - "\n"; - else - syntax_tree.fUserValue += - "section .data\nextern _" + - tmp.substr(tmp.find(keyword.first.fKeywordName) + keyword.first.fKeywordName.size()) + - "\n"; - - break; - } - case CompilerKit::KeywordKind::kKeywordKindExtern: { - auto tmp = text; - - if (tmp.find(";") != CompilerKit::STLString::npos) tmp.erase(tmp.find(";")); - - while (tmp.find(" ") != CompilerKit::STLString::npos) { - tmp.erase(tmp.find(" "), 1); - } - - if (!kNasmOutput) - syntax_tree.fUserValue += - "extern_segment .code64 _" + - tmp.substr(tmp.find(keyword.first.fKeywordName) + keyword.first.fKeywordName.size()) + - "\n"; - else - syntax_tree.fUserValue += - "section .text\nextern _" + - tmp.substr(tmp.find(keyword.first.fKeywordName) + keyword.first.fKeywordName.size()) + - "\n"; - - break; - } - case CompilerKit::KeywordKind::kKeywordKindReturn: { - try { - auto pos = text.find("return"); - - if (pos == CompilerKit::STLString::npos) { - syntax_tree.fUserValue += nectar_generate_epilogue(); - syntax_tree.fUserValue += "ret\n"; - ++kOrigin; - break; - } - - pos += std::string("return").size() + 1; - - CompilerKit::STLString subText = text.substr(pos); - - subText = subText.erase(subText.find(";")); - size_t indxReg = 0UL; - - // Extract and set up call arguments before erasing them - if (subText.find("):") != CompilerKit::STLString::npos) { - auto argStart = subText.find("(") + 1; - auto argEnd = subText.find("):"); - - if (argEnd != CompilerKit::STLString::npos && argEnd > argStart) { - auto argsStr = subText.substr(argStart, argEnd - argStart); - auto regIdx = 9; - - CompilerKit::STLString currentArg; - for (std::size_t i = 0; i <= argsStr.size(); ++i) { - if (i == argsStr.size() || argsStr[i] == ',') { - while (!currentArg.empty() && currentArg[0] == ' ') currentArg.erase(0, 1); - while (!currentArg.empty() && currentArg.back() == ' ') currentArg.pop_back(); - - if (!currentArg.empty() && regIdx <= 15) { - auto val = nectar_get_variable_ref(currentArg); - if (val.empty()) val = currentArg; - - syntax_tree.fUserValue += "mov r" + std::to_string(regIdx) + ", " + val + "\n"; - ++regIdx; - } - - currentArg.clear(); - } else { - currentArg += argsStr[i]; - } - } - } - - subText.erase(subText.find("(")); - } - - auto ref = nectar_get_variable_ref(subText); - - if (ref.empty() == false) syntax_tree.fUserValue += "lea rax, " + ref + "\n"; - - if (subText.starts_with("'") || isnumber(subText[0])) - syntax_tree.fUserValue += "mov rax, " + subText + "\n"; - else if (text.find("(") != CompilerKit::STLString::npos && - text.find(");") != CompilerKit::STLString::npos) { - // Track as potential external symbol for NASM. - - subText.erase(subText.find("(")); - - for (const auto& keyword : kKeywords) { - if (keyword.fKeywordName == subText) - CompilerKit::Detail::print_error("A nectar keyword cannot be used there.", file); - } - - kExternalSymbols.insert(subText); - - if (!kNasmOutput) { - syntax_tree.fUserValue += "mov rax, __call " + subText + "\n"; - } else { - // NASM: call function, result is in rax - syntax_tree.fUserValue += "call " + subText + "\n"; - } - } - - syntax_tree.fUserValue += nectar_generate_epilogue() + "ret\n"; - ++kOrigin; - } catch (...) { - syntax_tree.fUserValue += nectar_generate_epilogue() + "ret\n"; - ++kOrigin; - } - - if (kCurrentIfCondition) { - if (!kNasmOutput) - syntax_tree.fUserValue += - "public_segment .code64 __ret_" + kCurrentIfSymbol + "\nnop\n"; - else - syntax_tree.fUserValue += "__ret_" + kCurrentIfSymbol + ":\n"; - - kCurrentIfSymbol.clear(); - kCurrentIfCondition = false; - } - } - default: { - continue; - } - } - } - - return this->CompileLayout(text, file, syntax_tree); -} - -/// \brief Parse NECTAR Impls. -/// \param CompilerKit::SyntaxLeafList::SyntaxLeaf the leaf to build upon. -CompilerKit::SyntaxLeafList::SyntaxLeaf CompilerFrontendNectarAMD64::CompileLayout( - CompilerKit::STLString& text, const CompilerKit::STLString& file, - CompilerKit::SyntaxLeafList::SyntaxLeaf& syntax_tree) { - if ((text.find("impl") != CompilerKit::STLString::npos)) { - CompilerKit::STLString keyword = "impl"; - auto classPos = text.find(keyword) + keyword.length(); - auto bracePos = text.find("{"); - - auto className = text.substr(classPos, bracePos - classPos); - - // Trim whitespace - while (!className.empty() && (className.front() == ' ' || className.front() == '\t')) { - className.erase(0, 1); - } - while (!className.empty() && (className.back() == ' ' || className.back() == '\t')) { - className.pop_back(); - } - - if (!className.empty()) { - nectar_push_scope(ScopeKind::kScopeClass, className); - ++kOnClassScope; - } - - syntax_tree.fUserValue += ";; HINT: " + className + "\n"; - } - - // Handle class exit - if (text.find("};") != CompilerKit::STLString::npos) { - --kOnClassScope; - nectar_pop_scope(); - - syntax_tree.fUserValue += ";; HINT: END NAMESPACE\n"; - } - - return syntax_tree; -} - -///////////////////////////////////////////////////////////////////////////////////////// - -// HELPER FUNCTION IMPLEMENTATIONS - -///////////////////////////////////////////////////////////////////////////////////////// - -/// \brief Push a new scope onto the scope stack -static void nectar_push_scope(ScopeKind kind, const CompilerKit::STLString& name) { - CompilerScope scope; - scope.fKind = kind; - scope.fName = name; - - // Build mangled prefix based on current scope stack - for (const auto& s : kContext.fScopeStack) { - if (s.fKind == ScopeKind::kScopeNamespace) { - scope.fMangledPrefix += "N_" + s.fName; - } else if (s.fKind == ScopeKind::kScopeClass) { - scope.fMangledPrefix += "C_" + s.fName; - } - } - - kContext.fScopeStack.push_back(scope); -} - -/// \brief Pop the current scope from the scope stack -static void nectar_pop_scope() { - if (!kContext.fScopeStack.empty()) { - kContext.fScopeStack.pop_back(); - } -} - -/// \brief Extract function arguments from function declaration -static std::vector nectar_extract_function_args( - const CompilerKit::STLString& text) { - std::vector args; - - auto openParen = text.find("("); - auto closeParen = text.find(")"); - - if (openParen == CompilerKit::STLString::npos || closeParen == CompilerKit::STLString::npos || - closeParen <= openParen) { - return args; - } - - auto argsText = text.substr(openParen + 1, closeParen - openParen - 1); - - // Trim whitespace - while (!argsText.empty() && (argsText.front() == ' ' || argsText.front() == '\t')) { - argsText.erase(0, 1); - } - while (!argsText.empty() && (argsText.back() == ' ' || argsText.back() == '\t')) { - argsText.pop_back(); - } - - if (argsText.empty() || argsText == "void") { - return args; - } - - // Simple comma-separated parsing - std::size_t pos = 0; - while (pos < argsText.size()) { - auto commaPos = argsText.find(",", pos); - if (commaPos == CompilerKit::STLString::npos) { - commaPos = argsText.size(); - } - - auto arg = argsText.substr(pos, commaPos - pos); - - // Extract type name (skip variable name) - std::size_t lastSpace = arg.rfind(' '); - if (lastSpace != CompilerKit::STLString::npos) { - arg = arg.substr(0, lastSpace); - } - - // Trim - while (!arg.empty() && (arg.front() == ' ' || arg.front() == '\t')) { - arg.erase(0, 1); - } - while (!arg.empty() && (arg.back() == ' ' || arg.back() == '\t')) { - arg.pop_back(); - } - - if (!arg.empty()) { - args.push_back(arg); - } - - pos = commaPos + 1; - } - - return args; -} - -/// \brief Mangle a function or method name according to Nectar mangling scheme -static CompilerKit::STLString nectar_mangle_name(const CompilerKit::STLString& identifier, - const std::vector& args) { - CompilerKit::STLString mangled = "__NECTAR_"; - - // Add scope chain - for (const auto& scope : kContext.fScopeStack) { - if (scope.fKind == ScopeKind::kScopeNamespace) { - mangled += "N_" + scope.fName; - } - } - - // Check if we're in a class scope for member functions - bool inClass = false; - - for (const auto& scope : kContext.fScopeStack) { - if (scope.fKind == ScopeKind::kScopeClass) { - inClass = true; - break; - } - } - - CompilerKit::STLString identifierCopy = identifier; - - if (auto pos = identifierCopy.find("let "); pos != CompilerKit::STLString::npos) { - identifierCopy = identifierCopy.substr(pos + 3); - } else if (auto pos = identifierCopy.find("const "); pos != CompilerKit::STLString::npos) { - identifierCopy = identifierCopy.substr(pos + 5); - } - - while (auto pos = identifierCopy.find(" ")) { - if (pos == CompilerKit::STLString::npos) break; - identifierCopy.erase(pos, 1); - } - - if (inClass) { - mangled += "M_" + identifierCopy; - } else { - return identifierCopy; - } - - return mangled; -} - -/// \brief Generate function prologue -static CompilerKit::STLString nectar_generate_prologue() { - return "push rbp\nmov rbp, rsp\n"; -} - -/// \brief Generate function epilogue -static CompilerKit::STLString nectar_generate_epilogue() { - return "mov rsp, rbp\npop rbp\n"; -} - -/// \brief Allocate a variable on the stack -static Int32 nectar_allocate_stack_variable(const CompilerKit::STLString& var_name, Int32 size, - bool is_constant) { - kContext.fStackOffset -= size; - kContext.fMaxStackUsed = std::min(kContext.fStackOffset, kContext.fMaxStackUsed); - - if (auto var = nectar_find_variable(var_name); var) { - if (var->fIsConstant) - CompilerKit::Detail::print_error( - "Variable " + var_name.substr(var_name.find("const") + strlen("const")) + " is constant.", - "CompilerKit"); - - if (var->fStackOffset > 0) - CompilerKit::Detail::print_error("Variable " + var_name + " is already defined.", - "CompilerKit"); - - delete var; - } - - VariableInfo varInfo; - varInfo.fName = var_name; - varInfo.fLocation = VarLocation::kStack; - varInfo.fStackOffset = kContext.fStackOffset; - varInfo.fSize = size; - varInfo.fLastUsed = kContext.fInstructionCounter; - varInfo.fIsConstant = is_constant; - kContext.fVariables.push_back(varInfo); - - return kContext.fStackOffset; -} - -/// \brief Find a variable by name -static VariableInfo* nectar_find_variable(const CompilerKit::STLString& var_name) { - for (auto& var : kContext.fVariables) { - if (var.fName == var_name) { - return new VariableInfo(var); - } - } - return nullptr; -} - -/// \brief Get variable reference (register or stack location) -static CompilerKit::STLString nectar_get_variable_ref(const CompilerKit::STLString& var_name, - bool lookup) { - auto* varInfo = nectar_find_variable(var_name); - - if (!varInfo || var_name.empty() || !isnumber(var_name[0])) { - if (!isnumber(var_name[0]) && lookup) - CompilerKit::Detail::print_error("Variable " + var_name + " not found.", "CompilerKit"); - } - - if (!varInfo) { - return ""; - } - - if (varInfo->fIsConstant) { - CompilerKit::Detail::print_error("Invalid use of constant " + - var_name.substr(var_name.find("const") + strlen("const")) + - " as variable.", - "CompilerKit"); - return "call __abort"; - } - - varInfo->fLastUsed = kContext.fInstructionCounter; - - if (varInfo->fLocation == VarLocation::kRegister) { - auto reg = varInfo->fRegister; - delete varInfo; - return reg; - } else { - // Stack or spilled - auto reg = "qword [rbp+" + std::to_string(-varInfo->fStackOffset) + "]"; - delete varInfo; - return reg; - } - - return ""; -} - -/// \brief Allocate a register for a variable -static CompilerKit::STLString nectar_allocate_register(const CompilerKit::STLString& var_name) { - // Check if variable already has a register - VariableInfo* existing = nullptr; - - for (auto& var : kContext.fVariables) { - if (var.fName == var_name) { - existing = &var; - break; - } - } - - if (existing && existing->fLocation == VarLocation::kRegister) { - return existing->fRegister; - } - - // Find a free register - for (const auto& reg : kRegisterList) { - bool inUse = false; - for (const auto& var : kContext.fVariables) { - if (var.fLocation == VarLocation::kRegister && var.fRegister == reg) { - inUse = true; - break; - } - } - - if (!inUse) { - // Allocate this register - if (existing) { - if (existing->fIsConstant) { - CompilerKit::Detail::print_error("Invalid use of constant " + var_name + " as variable.", - "CompilerKit"); - return "__call __abort"; - } - - existing->fLocation = VarLocation::kRegister; - existing->fRegister = reg; - existing->fLastUsed = kContext.fInstructionCounter; - } else { - VariableInfo varInfo; - varInfo.fName = var_name; - varInfo.fLocation = VarLocation::kRegister; - varInfo.fRegister = reg; - varInfo.fLastUsed = kContext.fInstructionCounter; - varInfo.fIsConstant = existing->fIsConstant; - - kContext.fVariables.push_back(varInfo); - } - return reg; - } - } - - // No free register - return ""; -} - -/// \brief Spill the least recently used variable to stack -static CompilerKit::STLString nectar_spill_lru_variable() { - CompilerKit::STLString spillCode; - - // Find LRU variable in register (that's not a parameter) - VariableInfo* lruVar = nullptr; - UInt32 minLastUsed = UINT32_MAX; - - for (auto& var : kContext.fVariables) { - if (var.fLocation == VarLocation::kRegister && !var.fIsParameter && - var.fLastUsed < minLastUsed) { - lruVar = &var; - minLastUsed = var.fLastUsed; - } - } - - if (!lruVar) { - return ""; // No variable to spill - } - - // Allocate stack space - kContext.fStackOffset -= lruVar->fSize; - kContext.fMaxStackUsed = std::min(kContext.fStackOffset, kContext.fMaxStackUsed); - - // Generate spill code - - /// if impl init - if (!lruVar->fRegister.ends_with("{}")) - spillCode = "mov qword [rbp+" + std::to_string(-kContext.fStackOffset) + "], " + - lruVar->fRegister + "\n"; - else - spillCode = "mov qword [rbp+" + std::to_string(-kContext.fStackOffset) + "], rax\n"; - - // Update variable info - lruVar->fLocation = VarLocation::kStackSpill; - lruVar->fStackOffset = kContext.fStackOffset; - auto spilledReg = lruVar->fRegister; - lruVar->fRegister = ""; - - return spillCode; -} - -/// \brief Add a class member to the struct map -static void nectar_add_impl_member(const CompilerKit::STLString& class_name, - const CompilerKit::STLString& member_name, Int32 size) { - // Find or create struct map entry - CompilerStructMap* structMap = nullptr; - for (auto& sm : kContext.fStructMapVector) { - if (sm.fName == class_name) { - structMap = &sm; - break; - } - } - - if (!structMap) { - CompilerStructMap newMap; - newMap.fName = class_name; - kContext.fStructMapVector.push_back(newMap); - structMap = &kContext.fStructMapVector.back(); - } - - // Calculate offset - UInt32 offset = 0; - if (!structMap->fOffsets.empty()) { - offset = structMap->fOffsets.back().first + 8; // Assume 8-byte members for now - } - - structMap->fOffsets.emplace_back(offset, member_name); -} - -/// \brief Get the size of a class -static Int32 nectar_get_impl_size(const CompilerKit::STLString& class_name) { - for (const auto& sm : kContext.fStructMapVector) { - if (sm.fName == class_name) { - if (sm.fOffsets.empty()) { - return 0; - } - return sm.fOffsets.back().first + 8; // Last offset + size - } - } - return 0; -} - -/// \brief Generate constructor call -static CompilerKit::STLString nectar_generate_constructor_call( - const CompilerKit::STLString& class_name, const CompilerKit::STLString& obj_name) { - auto size = nectar_get_impl_size(class_name); - auto offset = nectar_allocate_stack_variable( - obj_name, size == 0 ? 8 : size, obj_name.find("_const_") != CompilerKit::STLString::npos); - - nectar_push_scope(ScopeKind::kScopeClass, class_name); - auto ctor_mangled = nectar_mangle_name(class_name); - nectar_pop_scope(); - - CompilerKit::STLString code; - code += "lea r8, [rbp+" + std::to_string(offset) + "]\n"; - code += "call " + ctor_mangled + "\n"; - return code; -} - -/// \brief Generate destructor call -static CompilerKit::STLString nectar_generate_destructor_call( - const CompilerKit::STLString& class_name, const CompilerKit::STLString& obj_name) { - auto* varInfo = nectar_find_variable(obj_name); - - if (!varInfo) { - return ""; - } - - nectar_push_scope(ScopeKind::kScopeClass, class_name); - auto dtor_mangled = nectar_mangle_name("~" + class_name); - nectar_pop_scope(); - - CompilerKit::STLString code; - if (varInfo->fLocation == VarLocation::kStack || varInfo->fLocation == VarLocation::kStackSpill) { - code += "lea r8, [rbp+" + std::to_string(varInfo->fStackOffset) + "]\n"; - } else { - code += "mov r8, " + varInfo->fRegister + "\n"; - } - - delete varInfo; - - code += "call " + dtor_mangled + "\n"; - return code; -} - -/// \brief Process function parameters per PEF calling convention. -/// \note Assumes args are already extracted. -static void nectar_process_function_parameters(const std::vector& args) { - for (size_t i = 0; i < args.size() && i < 8; ++i) { - VariableInfo param; - param.fName = "arg" + std::to_string(i); - param.fLocation = VarLocation::kRegister; - param.fRegister = kRegisterConventionCallList[i]; - param.fIsParameter = true; - param.fTypeName = args[i]; - param.fLastUsed = kContext.fInstructionCounter; - kContext.fVariables.push_back(param); - } - - // Args beyond r15 go on stack - for (size_t i = 8; i < args.size(); ++i) { - Int32 offset = 16 + (i - 8) * 8; - VariableInfo param; - param.fName = "arg" + std::to_string(i); - param.fLocation = VarLocation::kStack; - param.fStackOffset = offset; // Positive (before rbp) - param.fIsParameter = true; - param.fTypeName = args[i]; - param.fLastUsed = kContext.fInstructionCounter; - kContext.fVariables.push_back(param); - } -} - -///////////////////////////////////////////////////////////////////////////////////////// - -/** - * @brief NECTAR assembler class. - */ - -///////////////////////////////////////////////////////////////////////////////////////// - -#define kExtListCxx {".nc", ".pp.nc"} - -class AssemblyNectarInterfaceAMD64 final CK_ASSEMBLY_INTERFACE { - public: - explicit AssemblyNectarInterfaceAMD64() = default; - ~AssemblyNectarInterfaceAMD64() override = default; - - NECTAR_COPY_DEFAULT(AssemblyNectarInterfaceAMD64); - - UInt32 Arch() noexcept override { return CompilerKit::AssemblyFactory::kArchAMD64; } - - Int32 CompileToFormat(CompilerKit::STLString src, Int32 arch) override { - if (kFrontend == nullptr) return EXIT_FAILURE; - - CompilerKit::STLString dest = src; - std::vector ext = kExtListCxx; - - dest.erase(dest.find(ext[0])); - - dest += ".masm"; - - std::ofstream out_fp(dest); - std::ifstream src_fp = std::ifstream(src); - - CompilerKit::STLString line_source; - - std::stringstream ss; - ss << std::hex << kOrigin; - - // Clear symbol tracking sets for this compilation unit - kDefinedSymbols.clear(); - kExternalSymbols.clear(); - - // First pass: compile all lines and collect symbols - CompilerKit::STLString compiledCode; - std::size_t lastRes{}; - std::string prevRes; - std::string nextRes; - - while (std::getline(src_fp, line_source)) { - auto res = kFrontend->Compile(line_source, src); - if (kAcceptableErrors > 0) return EXIT_FAILURE; - - if (res.fPlaceType == CompilerKit::SyntaxLeafList::SyntaxLeaf::kPlaceBefore) { - compiledCode.insert(compiledCode.find(prevRes), res.fUserValue, 0, res.fUserValue.size()); - } else if (res.fPlaceType == CompilerKit::SyntaxLeafList::SyntaxLeaf::kPlaceAfter) { - nextRes = res.fUserValue; - continue; - } else { - compiledCode += res.fUserValue; - if (!nextRes.empty()) { - compiledCode += nextRes; - nextRes.clear(); - } - } - - lastRes = res.fUserValue.size(); - prevRes = res.fUserValue; - } - - // Output header - if (!kNasmOutput) - out_fp << "%bits 64\n"; - else { - out_fp << "[bits 64]\n"; - out_fp << "extern __operator_new\nextern __operator_delete\n"; - } - - // For NASM output: emit extern declarations for undefined symbols - if (kNasmOutput) { - for (const auto& sym : kExternalSymbols) { - // Only declare as extern if not defined in this file - if (kDefinedSymbols.find(sym) == kDefinedSymbols.end() && !sym.empty()) { - out_fp << "extern " << sym << "\n"; - } - } - if (!kExternalSymbols.empty()) { - out_fp << "\n"; - } - } - - // Output compiled code - out_fp << compiledCode; - - return EXIT_SUCCESS; - } -}; - -///////////////////////////////////////////////////////////////////////////////////////// - -///////////////////////////////////////////////////////////////////////////////////////// - -NECTAR_MODULE(CompilerNectarAMD64) { - bool skip = false; - - kKeywords.emplace_back("impl", CompilerKit::KeywordKind::kKeywordKindImpl); - kKeywords.emplace_back("trait", CompilerKit::KeywordKind::kKeywordKindTrait); - kKeywords.emplace_back("{", CompilerKit::KeywordKind::kKeywordKindBodyStart); - kKeywords.emplace_back("}", CompilerKit::KeywordKind::kKeywordKindBodyEnd); - kKeywords.emplace_back("{}", CompilerKit::KeywordKind::kKeywordKindImplInit); - kKeywords.emplace_back("(", CompilerKit::KeywordKind::kKeywordKindFunctionStart); - kKeywords.emplace_back(")", CompilerKit::KeywordKind::kKeywordKindFunctionEnd); - kKeywords.emplace_back(":=", CompilerKit::KeywordKind::kKeywordKindVariableAssign); - kKeywords.emplace_back("+=", CompilerKit::KeywordKind::kKeywordKindVariableInc); - kKeywords.emplace_back("-=", CompilerKit::KeywordKind::kKeywordKindVariableDec); - kKeywords.emplace_back("const", CompilerKit::KeywordKind::kKeywordKindVariable); - kKeywords.emplace_back("let", CompilerKit::KeywordKind::kKeywordKindVariable); - kKeywords.emplace_back("new", CompilerKit::KeywordKind::kKeywordKindNew); - kKeywords.emplace_back("delete", CompilerKit::KeywordKind::kKeywordKindDelete); - kKeywords.emplace_back(".", CompilerKit::KeywordKind::kKeywordKindAccess); - kKeywords.emplace_back("->", CompilerKit::KeywordKind::kKeywordKindAccessChecked); - kKeywords.emplace_back("(", CompilerKit::KeywordKind::kKeywordKindFunctionAccess); - kKeywords.emplace_back(";", CompilerKit::KeywordKind::kKeywordKindEndLine); - kKeywords.emplace_back("return", CompilerKit::KeywordKind::kKeywordKindReturn); - kKeywords.emplace_back("extern", CompilerKit::KeywordKind::kKeywordKindExtern); - kKeywords.emplace_back("import", CompilerKit::KeywordKind::kKeywordKindImport); - kKeywords.emplace_back("export", CompilerKit::KeywordKind::kKeywordKindExport); - - kKeywords.emplace_back("if", CompilerKit::KeywordKind::kKeywordKindIf); - - kErrorLimit = 0; - - kFrontend = new CompilerFrontendNectarAMD64(); - - CompilerKit::StrongRef mntPnt{new AssemblyNectarInterfaceAMD64()}; - kAssembler.Mount({mntPnt.Leak()}); - - CompilerKit::install_signal(SIGSEGV, CompilerKit::Detail::drvi_crash_handler); - - // Ensure cleanup on exit - std::atexit([]() { - delete kFrontend; - kFrontend = nullptr; - }); - - for (auto index = 1UL; index < argc; ++index) { - if (!argv[index]) break; - - if (argv[index][0] == '-') { - if (skip) { - skip = false; - continue; - } - - if (strcmp(argv[index], "-fverbose") == 0) { - kVerbose = true; - continue; - } - - if (strcmp(argv[index], "-fuse-masm") == 0) { - kNasmOutput = false; - continue; - } - - if (strcmp(argv[index], "-fuse-nasm") == 0) { - kNasmOutput = true; - continue; - } - - if (strcmp(argv[index], "-fprint-dialect") == 0) { - if (kFrontend) std::cout << kFrontend->Language() << "\n"; - - return NECTAR_SUCCESS; - } - - CompilerKit::STLString err = "Unknown option: "; - err += argv[index]; - - CompilerKit::Detail::print_error(err, "Nectar"); - - continue; - } - - CompilerKit::STLString argv_i = argv[index]; - - std::vector exts = kExtListCxx; - - for (CompilerKit::STLString ext : exts) { - if (argv_i.ends_with(ext)) { - if (kAssembler.Compile(argv_i, kMachine) != EXIT_SUCCESS) { - return NECTAR_INVALID_DATA; - } - - break; - } - } - } - - kAssembler.Unmount(); - - return NECTAR_SUCCESS; -} - -// -// Last rev 25-8-7 -// diff --git a/src/CompilerKit/src/Compilers/NectarCompiler+AMD64.cpp b/src/CompilerKit/src/Compilers/NectarCompiler+AMD64.cpp new file mode 100644 index 0000000..5b246f2 --- /dev/null +++ b/src/CompilerKit/src/Compilers/NectarCompiler+AMD64.cpp @@ -0,0 +1,1749 @@ +// Copyright 2024-2026, Amlal El Mahrouss (amlal@nekernel.org) +// Licensed under the Apache License, Version 2.0 (See accompanying +// file LICENSE or copy at http://www.apache.org/licenses/LICENSE-2.0) +// Official repository: https://github.com/nekernel-org/nectar + +/// BUGS: 0 + +/////////////////////// + +// ANSI ESCAPE CODES // + +/////////////////////// + +/////////////////////// + +// MACROS // + +/////////////////////// + +#include +#include +#include +#include +#include + +/* NeKernel NECTAR Compiler Driver. */ +/* This is part of the CompilerKit. */ +/* (c) Amlal El Mahrouss 2024-2025 */ + +/// @author Amlal El Mahrouss (amlal@nekernel.org) +/// @file NectarCompiler+AMD64.cc +/// @brief NECTAR Compiler Driver. + +///////////////////////////////////// + +// INTERNALS OF THE NECTAR COMPILER + +///////////////////////////////////// + +/// @CompilerKit +/// @brief Avoids relative_path which could discard parts of the original. +std::filesystem::path nectar_expand_home(const std::filesystem::path& input) { + const std::string& raw = input.string(); + + if (!raw.empty() && raw[0] == '~') { + const char* home = std::getenv("HOME"); + if (!home) home = std::getenv("USERPROFILE"); + + if (!home) throw std::runtime_error("Home directory not found in environment variables"); + + return std::filesystem::path(home) / raw.substr(1); + } + + return input; +} + +/// \brief Register map, i.e ({foobar, rbp+48}, etc...) +struct CompilerRegisterMap final { + CompilerKit::STLString fName{}; + CompilerKit::STLString fReg{}; +}; + +/// \brief Offsets of struct and classes. +struct CompilerStructMap final { + CompilerKit::STLString fName{}; + CompilerKit::STLString fReg{}; + std::vector> fOffsets; +}; + +/// \brief State machine of the compiler. +struct CompilerState final { + std::vector fStackMapVector; + std::vector fStructMapVector; + CompilerKit::STLString fLastFile{}; + CompilerKit::STLString fLastError{}; +}; + +static CompilerState kState; + +/// \brief Embed Scope of a class. +static Int32 kOnClassScope = 0; + +///////////////////////////////////////////////////////////////////////////////////////// + +// NEW DATA STRUCTURES FOR NECTAR SUPPORT + +///////////////////////////////////////////////////////////////////////////////////////// + +/// \brief Scope kind enumeration +enum class ScopeKind { + kScopeGlobal, + kScopeNamespace, + kScopeClass, + kScopeFunction, +}; + +/// \brief Variable location enumeration +enum class VarLocation { + kRegister, + kStack, + kStackSpill, +}; + +/// \brief Compiler scope information +struct CompilerScope { + ScopeKind fKind{ScopeKind::kScopeGlobal}; + CompilerKit::STLString fName{}; + CompilerKit::STLString fMangledPrefix{}; +}; + +/// \brief Extended variable information +struct VariableInfo { + CompilerKit::STLString fName{}; + VarLocation fLocation{VarLocation::kRegister}; + Int32 fStackOffset{0}; + CompilerKit::STLString fRegister{}; + Int32 fSize{8}; + bool fIsParameter{false}; + CompilerKit::STLString fTypeName{}; + UInt32 fLastUsed{0}; + bool fIsConstant{false}; +}; + +/// \brief Complete compiler context +struct CompilerContext { + std::vector fScopeStack; + std::vector fVariables; + std::vector fStructMapVector; + CompilerKit::STLString fLastFile{}; + CompilerKit::STLString fLastError{}; + Int32 fStackOffset{0}; + Int32 fMaxStackUsed{0}; + UInt32 fInstructionCounter{0}; +}; + +/// \brief Global compiler context (replaces kState) +static CompilerContext kContext; + +///////////////////////////////////////////////////////////////////////////////////////// + +/// \brief Target architecture. +/// \note This shall never change. +static Int32 kMachine = CompilerKit::AssemblyFactory::kArchAMD64; + +///////////////////////////////////////// + +// ARGUMENT REGISTERS (R8, R15) + +///////////////////////////////////////// + +static std::vector kKeywords; + +///////////////////////////////////////// + +// COMPILER PARSING UTILITIES/STATES. + +///////////////////////////////////////// + +static CompilerKit::AssemblyFactory kAssembler; +static bool kInStruct = false; +static bool kOnWhileLoop = false; +static bool kOnForLoop = false; +static bool kInBraces = false; +static size_t kBracesCount = 0UL; + +/// \brief NASM output support: track defined and external symbols +static std::set kDefinedSymbols; +static std::set kExternalSymbols; + +///////////////////////////////////////////////////////////////////////////////////////// + +// HELPER FUNCTION DECLARATIONS + +///////////////////////////////////////////////////////////////////////////////////////// + +// Scope management +static void nectar_push_scope(ScopeKind kind, const CompilerKit::STLString& name); +static void nectar_pop_scope(); + +// Name mangling +static std::vector nectar_extract_function_args( + const CompilerKit::STLString& text); +static CompilerKit::STLString nectar_mangle_name( + const CompilerKit::STLString& identifier, const std::vector& args = {}); + +// Stack frame management +static CompilerKit::STLString nectar_generate_prologue(); +static CompilerKit::STLString nectar_generate_epilogue(); +static Int32 nectar_allocate_stack_variable(const CompilerKit::STLString& var_name, Int32 size = 8, + bool is_constant = false); + +// Register allocation +static CompilerKit::STLString nectar_allocate_register(const CompilerKit::STLString& var_name); +static CompilerKit::STLString nectar_spill_lru_variable(); +static VariableInfo* nectar_find_variable(const CompilerKit::STLString& var_name); +static CompilerKit::STLString nectar_get_variable_ref(const CompilerKit::STLString& var_name, + bool lookup = false); + +// Impl management +static void nectar_add_impl_member(const CompilerKit::STLString& class_name, + const CompilerKit::STLString& member_name, Int32 size); +static Int32 nectar_get_impl_size(const CompilerKit::STLString& class_name); +static CompilerKit::STLString nectar_generate_constructor_call( + const CompilerKit::STLString& class_name, const CompilerKit::STLString& obj_name); +static CompilerKit::STLString nectar_generate_destructor_call( + const CompilerKit::STLString& class_name, const CompilerKit::STLString& obj_name); + +// PEF calling convention +static void nectar_process_function_parameters(const std::vector& args); + +///////////////////////////////////////////////////////////////////////////////////////// + +/* \brief NECTAR compiler backend for the NeKernel NECTAR driver */ +class CompilerFrontendNectarAMD64 final CK_COMPILER_FRONTEND { + public: + explicit CompilerFrontendNectarAMD64() = default; + ~CompilerFrontendNectarAMD64() override = default; + + NECTAR_COPY_DEFAULT(CompilerFrontendNectarAMD64); + + /// \brief Parse Nectar symbols and syntax. + CompilerKit::SyntaxLeafList::SyntaxLeaf Compile(CompilerKit::STLString& text, + const CompilerKit::STLString& file) override; + + /// \brief Returns the language name. + /// \return Language name. + const char* Language() override; + + public: + /// \brief Parse NECTAR namespaces and Impls. + /// \param CompilerKit::SyntaxLeafList::SyntaxLeaf the leaf to build upon. + CompilerKit::SyntaxLeafList::SyntaxLeaf CompileLayout(CompilerKit::STLString& text, + const CompilerKit::STLString& file, + CompilerKit::SyntaxLeafList::SyntaxLeaf&); +}; + +/// @internal compiler variables + +static CompilerFrontendNectarAMD64* kFrontend = nullptr; + +static std::vector kRegisterList = { + "rbx", "rsi", "r10", "r11", "r12", "r13", "r14", "r15", "xmm12", "xmm13", "xmm14", "xmm15", +}; + +/// @brief The PEF calling convention (caller must save rax, rbp) +/// @note callee must return via **rax**. +/// @note caller must read **rax** to grab return value. +static std::vector kRegisterConventionCallList = { + "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", +}; + +static std::size_t kFunctionEmbedLevel{}; +static CompilerKit::STLString kCurrentIfSymbol{}; +static CompilerKit::STLString kCurrentReturnAddress{}; +static bool kCurrentIfCondition{false}; + +/// detail namespaces + +const char* CompilerFrontendNectarAMD64::Language() { + return "NeKernel Nectar"; +} + +static std::uintptr_t kOrigin = kPefBaseOrigin; +static std::vector> kOriginMap; + +///////////////////////////////////////////////////////////////////////////////////////// + +/// @name Compile +/// @brief Generate assembly from a NECTAR source. + +///////////////////////////////////////////////////////////////////////////////////////// + +static auto nectar_get_impl_member(const CompilerKit::STLString& class_name, + const CompilerKit::STLString& member_name) { + // Find or create struct map entry + for (auto& sm : kContext.fStructMapVector) { + if (sm.fName == class_name) { + return sm; + } + } + + return CompilerStructMap{}; +} + +CompilerKit::SyntaxLeafList::SyntaxLeaf CompilerFrontendNectarAMD64::Compile( + CompilerKit::STLString& text, const CompilerKit::STLString& file) { + CompilerKit::SyntaxLeafList::SyntaxLeaf syntax_tree; + CompilerKit::STLString syntax_rem_buffer; + + if (text.empty()) return syntax_tree; + + std::size_t index{}; + std::vector> keywords_list; + + for (auto& keyword : kKeywords) { + if (text.find(keyword.fKeywordName) != std::string::npos) { + switch (keyword.fKeywordKind) { + default: + break; + } + + std::size_t pos = text.find(keyword.fKeywordName); + if (pos == std::string::npos) continue; + + // can't go before start of string + if (pos > 0 && text[pos - 1] == '+' && + keyword.fKeywordKind == CompilerKit::KeywordKind::kKeywordKindVariableAssign) + continue; + + if (pos > 0 && text[pos - 1] == '-' && + keyword.fKeywordKind == CompilerKit::KeywordKind::kKeywordKindVariableAssign) + continue; + + // don't go out of range + if ((pos + keyword.fKeywordName.size()) < text.size() && + text[pos + keyword.fKeywordName.size()] == '=' && + keyword.fKeywordKind == CompilerKit::KeywordKind::kKeywordKindVariableAssign) + continue; + + keywords_list.emplace_back(std::make_pair(keyword, index)); + ++index; + } + } + + for (auto& keyword : keywords_list) { + if (text.find(keyword.first.fKeywordName) == CompilerKit::STLString::npos) continue; + + switch (keyword.first.fKeywordKind) { + case CompilerKit::KeywordKind::kKeywordKindImpl: { + ++kOnClassScope; + break; + } + case CompilerKit::KeywordKind::kKeywordKindIf: { + kCurrentIfCondition = true; + + std::size_t keywordPos = text.find(keyword.first.fKeywordName); + std::size_t openParen = text.find("("); + std::size_t closeParen = text.find("):"); + + if (keywordPos == CompilerKit::STLString::npos || + openParen == CompilerKit::STLString::npos || + closeParen == CompilerKit::STLString::npos || closeParen <= openParen) { + CompilerKit::Detail::print_error("Malformed if expression: " + text, file); + break; + } + + auto left = text.substr(openParen + 1, closeParen - openParen - 1); + + while (left.find(" ") != CompilerKit::STLString::npos) { + left.erase(left.find(" "), 1); + } + + std::vector> operators = { + {"=:", "jne"}, + {"!=:", "je"}, + {">:", "jl"}, + {"<:", "jg"}, + }; + + for (auto& op : operators) { + if (left.find(op.first) == CompilerKit::STLString::npos) continue; + + auto right = left.substr(left.find(op.first) + op.first.size()); + + if (auto res = right.find(":"); res != CompilerKit::STLString::npos) right.erase(res); + + auto tmp = left.substr(0, left.find(op.first)); + + while (tmp.find(" ") != CompilerKit::STLString::npos) tmp.erase(tmp.find(" "), 1); + + while (right.find(" ") != CompilerKit::STLString::npos) right.erase(right.find(" "), 1); + + if (auto var = nectar_find_variable(tmp); var) { + syntax_tree.fUserValue += + "mov rdi, qword [rbp+" + std::to_string(-var->fStackOffset) + "]\n"; + delete var; + } else { + if (!isnumber(tmp[0])) { + CompilerKit::Detail::print_warning("Variable not found, treating as symbol: " + tmp, + file); + } + + syntax_tree.fUserValue += "mov rdi, " + tmp + "\n"; + } + + if (auto var = nectar_find_variable(right); var) { + syntax_tree.fUserValue += + "mov rsi, qword [rbp+" + std::to_string(-var->fStackOffset) + "]\n"; + delete var; + } + + else { + if (!isnumber(right[0])) { + CompilerKit::Detail::print_warning("Variable not found, treating as symbol: " + right, + file); + } + + syntax_tree.fUserValue += "mov rsi, " + right + "\n"; + } + + syntax_tree.fUserValue += "cmp rdi, rsi\n"; + + syntax_tree.fUserValue += + op.second + " __ret_" + std::to_string(kOrigin) + "_" + kCurrentIfSymbol + "\n"; + + kCurrentIfSymbol = std::to_string(kOrigin) + "_" + kCurrentIfSymbol; + + ++kOrigin; + } + + break; + } + case CompilerKit::KeywordKind::kKeywordKindImplInit: { + if (text.find(":= ") == CompilerKit::STLString::npos) + CompilerKit::Detail::print_error("Invalid invokation of Init.", file); + + auto res = text.substr(text.find(":= ") + strlen(":= ")); + + if (auto tmp = res.find("{}"); tmp) { + if (tmp == CompilerKit::STLString::npos) { + break; + } + + res.erase(tmp); + } + + syntax_tree.fUserValue += "call __NECTAR_M_" + res + "\n"; + break; + } + case CompilerKit::KeywordKind::kKeywordKindFunctionStart: { + for (auto& ch : text) { + if (isnumber(ch)) { + goto dont_accept_func; + } + } + + goto accept_func; + + dont_accept_func: + break; + + accept_func: { + if (kFunctionEmbedLevel > 0) + CompilerKit::Detail::print_error("Clojures are a work in progress feature.", file); + + CompilerKit::STLString symbol_name_fn = text; + size_t indexFnName = 0; + + // this one is for the type. + for (auto& ch : text) { + ++indexFnName; + + if (ch == '\t') break; + if (ch == ' ') break; + } + + symbol_name_fn = text.substr(indexFnName); + + if (text.find("return ") != CompilerKit::STLString::npos) { + text.erase(0, text.find("return ")); + break; + } + + // Check if this is a function call (ends with ;) + if (text.ends_with(");")) { + // Handle function call/jump + auto it = std::find_if( + kOriginMap.begin(), kOriginMap.end(), + [&symbol_name_fn](std::pair pair) -> bool { + return symbol_name_fn.find(pair.first) != CompilerKit::STLString::npos; + }); + + if (it != kOriginMap.end()) { + std::stringstream ss; + ss << std::hex << it->second; + + syntax_tree.fUserValue += "jmp " + ss.str() + "\n"; + } + break; + } + + indexFnName = 0; + + // Extract clean function name + CompilerKit::STLString cleanFnName = symbol_name_fn; + + if (cleanFnName.find("(") != CompilerKit::STLString::npos) { + cleanFnName = cleanFnName.substr(0, cleanFnName.find("(")); + } + + // Remove whitespace/tabs + while (!cleanFnName.empty() && (cleanFnName.back() == ' ' || cleanFnName.back() == '\t')) { + cleanFnName.pop_back(); + } + while (!cleanFnName.empty() && + (cleanFnName.front() == ' ' || cleanFnName.front() == '\t')) { + cleanFnName.erase(0, 1); + } + + // Extract function arguments + auto args = nectar_extract_function_args(text); + + // Generate mangled name + auto mangled_name = nectar_mangle_name(cleanFnName, args); + + // Generate function label and prologue + + while (mangled_name.find(" ") != CompilerKit::STLString::npos) { + mangled_name.erase(mangled_name.find(" "), 1); + } + + // Track defined symbol for NASM extern resolution + kDefinedSymbols.insert(mangled_name); + + if (mangled_name.starts_with("__NECTAR") == false) { + mangled_name = "_" + mangled_name; + } + + if (!kNasmOutput) + syntax_tree.fUserValue += "public_segment .code64 " + mangled_name + "\n"; + else + syntax_tree.fUserValue += + "section .text\nglobal " + mangled_name + "\n" + mangled_name + ":\n"; + + syntax_tree.fUserValue += nectar_generate_prologue(); + + // Initialize function-local state + kContext.fVariables.clear(); + kContext.fStackOffset = 0; + kContext.fMaxStackUsed = 0; + + // Process function parameters + nectar_process_function_parameters(args); + + // Push function scope + nectar_push_scope(ScopeKind::kScopeFunction, cleanFnName); + + ++kFunctionEmbedLevel; + + kOriginMap.push_back({mangled_name, kOrigin}); + ++kOrigin; + + break; + } + } + case CompilerKit::KeywordKind::kKeywordKindFunctionEnd: { + if (kOnClassScope) --kOnClassScope; + + if (text.ends_with(";")) break; + + if (kFunctionEmbedLevel) { + --kFunctionEmbedLevel; + } + + // Pop function scope + nectar_pop_scope(); + + break; + } + case CompilerKit::KeywordKind::kKeywordKindDelete: { + if (auto pos = syntax_tree.fUserValue.find(keyword.first.fKeywordName); + pos != CompilerKit::STLString::npos) + if (!kNasmOutput) { + syntax_tree.fUserValue.replace(pos, keyword.first.fKeywordName.size(), + "__operator_delete"); + } + continue; + } + case CompilerKit::KeywordKind::kKeywordKindNew: { + if (auto pos = syntax_tree.fUserValue.find(keyword.first.fKeywordName); + pos != CompilerKit::STLString::npos) { + if (!kNasmOutput) { + syntax_tree.fUserValue.replace(pos, keyword.first.fKeywordName.size(), + "__operator_new"); + } + } + + continue; + } + case CompilerKit::KeywordKind::kKeywordKindAccess: + case CompilerKit::KeywordKind::kKeywordKindFunctionAccess: + case CompilerKit::KeywordKind::kKeywordKindAccessChecked: { + if (text.find("return ") != CompilerKit::STLString::npos) { + break; + } + + if (text.find("if ") != CompilerKit::STLString::npos) { + break; + } + + if (text.find("const ") != CompilerKit::STLString::npos) { + break; + } + + if (text.find("let ") != CompilerKit::STLString::npos) { + break; + } + + if (text.find("):") != CompilerKit::STLString::npos) { + break; + } + + CompilerKit::STLString valueOfVar = + text.substr(text.find(keyword.first.fKeywordName) + keyword.first.fKeywordName.size()); + + CompilerKit::STLString args; + + if (valueOfVar.find("{") != CompilerKit::STLString::npos) { + break; + } + + if (CompilerKit::KeywordKind::kKeywordKindFunctionAccess == keyword.first.fKeywordKind) + args = text.substr(text.find(keyword.first.fKeywordName)); + else + args = valueOfVar.substr(valueOfVar.find("(") + 1); + + auto nameVar = text.substr(0, text.find(keyword.first.fKeywordName)); + + while (nameVar.find(" ") != CompilerKit::STLString::npos) { + nameVar.erase(nameVar.find(" "), 1); + } + + while (nameVar.find("\t") != CompilerKit::STLString::npos) { + nameVar.erase(nameVar.find("\t"), 1); + } + + auto method = text.substr(0, text.find(keyword.first.fKeywordName)); + + if (method.find("let ") != CompilerKit::STLString::npos) { + method.erase(0, method.find("let ") + strlen("let ")); + } else if (method.find("const ") != CompilerKit::STLString::npos) { + method.erase(0, method.find("const ") + strlen("const ")); + } + + if (method.find(":=") != CompilerKit::STLString::npos) { + method.erase(0, method.find(":=") + strlen(":=")); + } + + while (method.find(" ") != CompilerKit::STLString::npos) { + method.erase(method.find(" "), 1); + } + + if (!nectar_get_variable_ref(nameVar).empty()) + syntax_tree.fUserValue += "lea r8, " + nectar_get_variable_ref(nameVar) + "\n"; + + if (CompilerKit::KeywordKind::kKeywordKindFunctionAccess != keyword.first.fKeywordKind) + method = valueOfVar.erase(valueOfVar.find("(")); + + valueOfVar += "\n"; + + CompilerKit::STLString arg; + auto index = 9; + auto cnter = 0; + + CompilerKit::STLString buf; + + for (auto& ch : args) { + if (ch == ',' || ch == ')') { + if (index <= 15) { + auto val = nectar_get_variable_ref(arg); + + if (val.empty()) { + val = arg; + + while (val.find(" ") != CompilerKit::STLString::npos) { + val.erase(val.find(" "), 1); + } + + if (!isnumber(val[0])) { + val = "0x0"; + } + } + + if (!arg.empty()) buf += "mov r" + std::to_string(index) + ", " + val + "\n"; + + arg.clear(); + ++index; + ++cnter; + } + + continue; + } + + arg += ch; + } + + if (!nectar_get_variable_ref(nameVar).empty()) { + if (!kNasmOutput) { + syntax_tree.fUserValue += buf; + syntax_tree.fUserValue += "call "; + syntax_tree.fUserValue += + (keyword.first.fKeywordName.ends_with('>') ? " __ptr __offset " : " __offset ") + + nectar_get_variable_ref(nameVar) + method + "\n"; + } else { + // NASM: Generate standard call through computed address + auto varRef = nectar_get_variable_ref(nameVar); + + if (keyword.first.fKeywordName.ends_with('>')) { + // Pointer dereference: load pointer then call through it + syntax_tree.fUserValue += "mov rax, " + varRef + "\n"; + syntax_tree.fUserValue += "call [rax + " + method + "]\n"; + } else { + // Direct offset call + syntax_tree.fUserValue += "lea rax, " + varRef + "\n"; + syntax_tree.fUserValue += "call [rax + " + method + "]\n"; + } + } + } else { + auto res = buf; + if (method.starts_with("__NECTAR") == false) + res += "call _" + method + "\n"; + else + res += "call " + method + "\n"; + + res += syntax_rem_buffer; + + syntax_tree.fUserValue += res; + } + + break; + } + case CompilerKit::KeywordKind::kKeywordKindEndLine: + case CompilerKit::KeywordKind::kKeywordKindVariableInc: + case CompilerKit::KeywordKind::kKeywordKindVariableDec: + case CompilerKit::KeywordKind::kKeywordKindVariableAssign: { + CompilerKit::STLString valueOfVar = ""; + + if (keyword.first.fKeywordKind == CompilerKit::KeywordKind::kKeywordKindVariableInc) { + valueOfVar = text.substr(text.find("+=") + 2); + } else if (keyword.first.fKeywordKind == + CompilerKit::KeywordKind::kKeywordKindVariableDec) { + valueOfVar = text.substr(text.find("-=") + 2); + } else if (keyword.first.fKeywordKind == + CompilerKit::KeywordKind::kKeywordKindVariableAssign) { + valueOfVar = text.substr(text.find(keyword.first.fKeywordName) + + keyword.first.fKeywordName.size()); + } else if (keyword.first.fKeywordKind == CompilerKit::KeywordKind::kKeywordKindEndLine) { + break; + } + + if (valueOfVar.empty()) { + CompilerKit::Detail::print_error("Undefined Right-Value for variable", file); + } + + while (valueOfVar.find(";") != CompilerKit::STLString::npos && + keyword.first.fKeywordKind != CompilerKit::KeywordKind::kKeywordKindEndLine) { + valueOfVar.erase(valueOfVar.find(";")); + } + + CompilerKit::STLString varName = text; + + if (keyword.first.fKeywordKind == CompilerKit::KeywordKind::kKeywordKindVariableInc) { + varName.erase(varName.find("+=")); + } else if (keyword.first.fKeywordKind == + CompilerKit::KeywordKind::kKeywordKindVariableDec) { + varName.erase(varName.find("-=")); + } else if (keyword.first.fKeywordKind == + CompilerKit::KeywordKind::kKeywordKindVariableAssign) { + varName.erase(varName.find(keyword.first.fKeywordName)); + } else if (keyword.first.fKeywordKind == CompilerKit::KeywordKind::kKeywordKindEndLine) { + varName.erase(varName.find(";")); + } + + static bool typeFound = false; + + for (auto& keyword : kKeywords) { + if (keyword.fKeywordKind == CompilerKit::KeywordKind::kKeywordKindVariable) { + if (text.find(keyword.fKeywordName) != CompilerKit::STLString::npos) { + if (text[text.find(keyword.fKeywordName)] == ' ') { + typeFound = false; + continue; + } + + typeFound = true; + } + } + } + + CompilerKit::STLString instr = "mov "; + + std::vector newVars; + + if (typeFound && + keyword.first.fKeywordKind != CompilerKit::KeywordKind::kKeywordKindVariableInc && + keyword.first.fKeywordKind != CompilerKit::KeywordKind::kKeywordKindVariableDec) { + // Remove whitespace only (keep operators and quotes) + while (!valueOfVar.empty() && (valueOfVar[0] == ' ' || valueOfVar[0] == '\t')) { + valueOfVar.erase(0, 1); + } + } + + if (keyword.second > 0 && kKeywords[keyword.second - 1].fKeywordKind == + CompilerKit::KeywordKind::kKeywordKindVariable) { + syntax_tree.fUserValue += "\n"; + continue; + } + + if (keyword.first.fKeywordKind == CompilerKit::KeywordKind::kKeywordKindEndLine) { + syntax_tree.fUserValue += "\n"; + continue; + } + + if (keyword.first.fKeywordKind == CompilerKit::KeywordKind::kKeywordKindVariableInc) { + instr = "add "; + } else if (keyword.first.fKeywordKind == + CompilerKit::KeywordKind::kKeywordKindVariableDec) { + instr = "sub "; + } + + CompilerKit::STLString varErrCpy = varName; + + std::size_t indxReg = 0UL; + + while (!valueOfVar.empty() && (valueOfVar[0] == ' ' || valueOfVar[0] == '\t')) { + valueOfVar.erase(0, 1); + } + + while (valueOfVar.find(" ") != CompilerKit::STLString::npos) { + valueOfVar.erase(valueOfVar.find(" "), 1); + } + + while (valueOfVar.find("\t") != CompilerKit::STLString::npos) { + valueOfVar.erase(valueOfVar.find("\t"), 1); + } + + auto pos = 0; + + if (varName.find("let ") != CompilerKit::STLString::npos) { + pos = varName.find("let "); + varName = varName.substr(pos + std::string{"let "}.size()); + } + + while (varName.find(" ") != CompilerKit::STLString::npos) { + varName.erase(varName.find(" "), 1); + } + + while (varName.find("\t") != CompilerKit::STLString::npos) { + varName.erase(varName.find("\t"), 1); + } + + nectar_allocate_stack_variable(varName, 8, + text.find("const ") != CompilerKit::STLString::npos); + + CompilerKit::STLString mangled; + + if (valueOfVar.find(".") != CompilerKit::STLString::npos) { + if (!kNasmOutput) { + CompilerKit::STLString value = "__offset "; + valueOfVar.erase(0, valueOfVar.find(".") + strlen(".")); + valueOfVar.insert(0, value, value.size()); + } else { + valueOfVar.erase(0, valueOfVar.find(".") + strlen(".")); + } + + mangled = "__NECTAR_SM_"; + } + + if (valueOfVar.find("->") != CompilerKit::STLString::npos) { + if (!kNasmOutput) { + CompilerKit::STLString value = "__ptr __offset "; + valueOfVar.erase(0, valueOfVar.find("->") + strlen("->")); + valueOfVar.insert(0, value, value.size()); + } else { + valueOfVar.erase(0, valueOfVar.find("->") + strlen("->")); + } + mangled = "__NECTAR_RM_"; + } + + if (valueOfVar.find(")") != CompilerKit::STLString::npos) { + if (valueOfVar.find("(") != CompilerKit::STLString::npos) + valueOfVar.erase(valueOfVar.find("(")); + + if (!valueOfVar.empty()) { + // Track as potential external symbol for NASM + kExternalSymbols.insert(mangled + valueOfVar); + + if (!kNasmOutput) { + if (valueOfVar.ends_with(")") && + valueOfVar.find("->") != CompilerKit::STLString::npos || + valueOfVar.find(".") != CompilerKit::STLString::npos) + syntax_tree.fUserValue += instr + nectar_get_variable_ref(varName) + + ", __thiscall " + mangled + valueOfVar + "\n"; + else + syntax_tree.fUserValue += + instr + nectar_get_variable_ref(varName) + ", " + mangled + valueOfVar + "\n"; + } else { + syntax_rem_buffer = instr + nectar_get_variable_ref(varName) + ", rax\n"; + } + } + + break; + } + + if (valueOfVar.ends_with("{}")) valueOfVar = "rax"; // impl init returns back to rax. + + syntax_tree.fUserValue += + instr + nectar_get_variable_ref(varName) + ", " + valueOfVar + "\n"; + + break; + } + case CompilerKit::KeywordKind::kKeywordKindExport: { + auto tmp = text; + + if (tmp.find(";") != CompilerKit::STLString::npos) tmp.erase(tmp.find(";")); + + while (tmp.find(" ") != CompilerKit::STLString::npos) { + tmp.erase(tmp.find(" "), 1); + } + + if (!kNasmOutput) + syntax_tree.fUserValue += + "public_segment .code64 _" + + tmp.substr(tmp.find(keyword.first.fKeywordName) + keyword.first.fKeywordName.size()) + + "\n"; + else + syntax_tree.fUserValue += + "section .text\nglobal _" + + tmp.substr(tmp.find(keyword.first.fKeywordName) + keyword.first.fKeywordName.size()) + + "\n"; + + break; + } + case CompilerKit::KeywordKind::kKeywordKindImport: { + auto tmp = text; + + if (tmp.find(";") != CompilerKit::STLString::npos) tmp.erase(tmp.find(";")); + + while (tmp.find(" ") != CompilerKit::STLString::npos) { + tmp.erase(tmp.find(" "), 1); + } + + if (!kNasmOutput) + syntax_tree.fUserValue += + "extern_segment .zero64 _" + + tmp.substr(tmp.find(keyword.first.fKeywordName) + keyword.first.fKeywordName.size()) + + "\n"; + else + syntax_tree.fUserValue += + "section .data\nextern _" + + tmp.substr(tmp.find(keyword.first.fKeywordName) + keyword.first.fKeywordName.size()) + + "\n"; + + break; + } + case CompilerKit::KeywordKind::kKeywordKindExtern: { + auto tmp = text; + + if (tmp.find(";") != CompilerKit::STLString::npos) tmp.erase(tmp.find(";")); + + while (tmp.find(" ") != CompilerKit::STLString::npos) { + tmp.erase(tmp.find(" "), 1); + } + + if (!kNasmOutput) + syntax_tree.fUserValue += + "extern_segment .code64 _" + + tmp.substr(tmp.find(keyword.first.fKeywordName) + keyword.first.fKeywordName.size()) + + "\n"; + else + syntax_tree.fUserValue += + "section .text\nextern _" + + tmp.substr(tmp.find(keyword.first.fKeywordName) + keyword.first.fKeywordName.size()) + + "\n"; + + break; + } + case CompilerKit::KeywordKind::kKeywordKindReturn: { + try { + auto pos = text.find("return"); + + if (pos == CompilerKit::STLString::npos) { + syntax_tree.fUserValue += nectar_generate_epilogue(); + syntax_tree.fUserValue += "ret\n"; + ++kOrigin; + break; + } + + pos += std::string("return").size() + 1; + + CompilerKit::STLString subText = text.substr(pos); + + subText = subText.erase(subText.find(";")); + size_t indxReg = 0UL; + + // Extract and set up call arguments before erasing them + if (subText.find("):") != CompilerKit::STLString::npos) { + auto argStart = subText.find("(") + 1; + auto argEnd = subText.find("):"); + + if (argEnd != CompilerKit::STLString::npos && argEnd > argStart) { + auto argsStr = subText.substr(argStart, argEnd - argStart); + auto regIdx = 9; + + CompilerKit::STLString currentArg; + for (std::size_t i = 0; i <= argsStr.size(); ++i) { + if (i == argsStr.size() || argsStr[i] == ',') { + while (!currentArg.empty() && currentArg[0] == ' ') currentArg.erase(0, 1); + while (!currentArg.empty() && currentArg.back() == ' ') currentArg.pop_back(); + + if (!currentArg.empty() && regIdx <= 15) { + auto val = nectar_get_variable_ref(currentArg); + if (val.empty()) val = currentArg; + + syntax_tree.fUserValue += "mov r" + std::to_string(regIdx) + ", " + val + "\n"; + ++regIdx; + } + + currentArg.clear(); + } else { + currentArg += argsStr[i]; + } + } + } + + subText.erase(subText.find("(")); + } + + auto ref = nectar_get_variable_ref(subText); + + if (ref.empty() == false) syntax_tree.fUserValue += "lea rax, " + ref + "\n"; + + if (subText.starts_with("'") || isnumber(subText[0])) + syntax_tree.fUserValue += "mov rax, " + subText + "\n"; + else if (text.find("(") != CompilerKit::STLString::npos && + text.find(");") != CompilerKit::STLString::npos) { + // Track as potential external symbol for NASM. + + subText.erase(subText.find("(")); + + for (const auto& keyword : kKeywords) { + if (keyword.fKeywordName == subText) + CompilerKit::Detail::print_error("A nectar keyword cannot be used there.", file); + } + + kExternalSymbols.insert(subText); + + if (!kNasmOutput) { + syntax_tree.fUserValue += "mov rax, __call " + subText + "\n"; + } else { + // NASM: call function, result is in rax + syntax_tree.fUserValue += "call " + subText + "\n"; + } + } + + syntax_tree.fUserValue += nectar_generate_epilogue() + "ret\n"; + ++kOrigin; + } catch (...) { + syntax_tree.fUserValue += nectar_generate_epilogue() + "ret\n"; + ++kOrigin; + } + + if (kCurrentIfCondition) { + if (!kNasmOutput) + syntax_tree.fUserValue += + "public_segment .code64 __ret_" + kCurrentIfSymbol + "\nnop\n"; + else + syntax_tree.fUserValue += "__ret_" + kCurrentIfSymbol + ":\n"; + + kCurrentIfSymbol.clear(); + kCurrentIfCondition = false; + } + } + default: { + continue; + } + } + } + + return this->CompileLayout(text, file, syntax_tree); +} + +/// \brief Parse NECTAR Impls. +/// \param CompilerKit::SyntaxLeafList::SyntaxLeaf the leaf to build upon. +CompilerKit::SyntaxLeafList::SyntaxLeaf CompilerFrontendNectarAMD64::CompileLayout( + CompilerKit::STLString& text, const CompilerKit::STLString& file, + CompilerKit::SyntaxLeafList::SyntaxLeaf& syntax_tree) { + if ((text.find("impl") != CompilerKit::STLString::npos)) { + CompilerKit::STLString keyword = "impl"; + auto classPos = text.find(keyword) + keyword.length(); + auto bracePos = text.find("{"); + + auto className = text.substr(classPos, bracePos - classPos); + + // Trim whitespace + while (!className.empty() && (className.front() == ' ' || className.front() == '\t')) { + className.erase(0, 1); + } + while (!className.empty() && (className.back() == ' ' || className.back() == '\t')) { + className.pop_back(); + } + + if (!className.empty()) { + nectar_push_scope(ScopeKind::kScopeClass, className); + ++kOnClassScope; + } + + syntax_tree.fUserValue += ";; HINT: " + className + "\n"; + } + + // Handle class exit + if (text.find("};") != CompilerKit::STLString::npos) { + --kOnClassScope; + nectar_pop_scope(); + + syntax_tree.fUserValue += ";; HINT: END NAMESPACE\n"; + } + + return syntax_tree; +} + +///////////////////////////////////////////////////////////////////////////////////////// + +// HELPER FUNCTION IMPLEMENTATIONS + +///////////////////////////////////////////////////////////////////////////////////////// + +/// \brief Push a new scope onto the scope stack +static void nectar_push_scope(ScopeKind kind, const CompilerKit::STLString& name) { + CompilerScope scope; + scope.fKind = kind; + scope.fName = name; + + // Build mangled prefix based on current scope stack + for (const auto& s : kContext.fScopeStack) { + if (s.fKind == ScopeKind::kScopeNamespace) { + scope.fMangledPrefix += "N_" + s.fName; + } else if (s.fKind == ScopeKind::kScopeClass) { + scope.fMangledPrefix += "C_" + s.fName; + } + } + + kContext.fScopeStack.push_back(scope); +} + +/// \brief Pop the current scope from the scope stack +static void nectar_pop_scope() { + if (!kContext.fScopeStack.empty()) { + kContext.fScopeStack.pop_back(); + } +} + +/// \brief Extract function arguments from function declaration +static std::vector nectar_extract_function_args( + const CompilerKit::STLString& text) { + std::vector args; + + auto openParen = text.find("("); + auto closeParen = text.find(")"); + + if (openParen == CompilerKit::STLString::npos || closeParen == CompilerKit::STLString::npos || + closeParen <= openParen) { + return args; + } + + auto argsText = text.substr(openParen + 1, closeParen - openParen - 1); + + // Trim whitespace + while (!argsText.empty() && (argsText.front() == ' ' || argsText.front() == '\t')) { + argsText.erase(0, 1); + } + while (!argsText.empty() && (argsText.back() == ' ' || argsText.back() == '\t')) { + argsText.pop_back(); + } + + if (argsText.empty() || argsText == "void") { + return args; + } + + // Simple comma-separated parsing + std::size_t pos = 0; + while (pos < argsText.size()) { + auto commaPos = argsText.find(",", pos); + if (commaPos == CompilerKit::STLString::npos) { + commaPos = argsText.size(); + } + + auto arg = argsText.substr(pos, commaPos - pos); + + // Extract type name (skip variable name) + std::size_t lastSpace = arg.rfind(' '); + if (lastSpace != CompilerKit::STLString::npos) { + arg = arg.substr(0, lastSpace); + } + + // Trim + while (!arg.empty() && (arg.front() == ' ' || arg.front() == '\t')) { + arg.erase(0, 1); + } + while (!arg.empty() && (arg.back() == ' ' || arg.back() == '\t')) { + arg.pop_back(); + } + + if (!arg.empty()) { + args.push_back(arg); + } + + pos = commaPos + 1; + } + + return args; +} + +/// \brief Mangle a function or method name according to Nectar mangling scheme +static CompilerKit::STLString nectar_mangle_name(const CompilerKit::STLString& identifier, + const std::vector& args) { + CompilerKit::STLString mangled = "__NECTAR_"; + + // Add scope chain + for (const auto& scope : kContext.fScopeStack) { + if (scope.fKind == ScopeKind::kScopeNamespace) { + mangled += "N_" + scope.fName; + } + } + + // Check if we're in a class scope for member functions + bool inClass = false; + + for (const auto& scope : kContext.fScopeStack) { + if (scope.fKind == ScopeKind::kScopeClass) { + inClass = true; + break; + } + } + + CompilerKit::STLString identifierCopy = identifier; + + if (auto pos = identifierCopy.find("let "); pos != CompilerKit::STLString::npos) { + identifierCopy = identifierCopy.substr(pos + 3); + } else if (auto pos = identifierCopy.find("const "); pos != CompilerKit::STLString::npos) { + identifierCopy = identifierCopy.substr(pos + 5); + } + + while (auto pos = identifierCopy.find(" ")) { + if (pos == CompilerKit::STLString::npos) break; + identifierCopy.erase(pos, 1); + } + + if (inClass) { + mangled += "M_" + identifierCopy; + } else { + return identifierCopy; + } + + return mangled; +} + +/// \brief Generate function prologue +static CompilerKit::STLString nectar_generate_prologue() { + return "push rbp\nmov rbp, rsp\n"; +} + +/// \brief Generate function epilogue +static CompilerKit::STLString nectar_generate_epilogue() { + return "mov rsp, rbp\npop rbp\n"; +} + +/// \brief Allocate a variable on the stack +static Int32 nectar_allocate_stack_variable(const CompilerKit::STLString& var_name, Int32 size, + bool is_constant) { + kContext.fStackOffset -= size; + kContext.fMaxStackUsed = std::min(kContext.fStackOffset, kContext.fMaxStackUsed); + + if (auto var = nectar_find_variable(var_name); var) { + if (var->fIsConstant) + CompilerKit::Detail::print_error( + "Variable " + var_name.substr(var_name.find("const") + strlen("const")) + " is constant.", + "CompilerKit"); + + if (var->fStackOffset > 0) + CompilerKit::Detail::print_error("Variable " + var_name + " is already defined.", + "CompilerKit"); + + delete var; + } + + VariableInfo varInfo; + varInfo.fName = var_name; + varInfo.fLocation = VarLocation::kStack; + varInfo.fStackOffset = kContext.fStackOffset; + varInfo.fSize = size; + varInfo.fLastUsed = kContext.fInstructionCounter; + varInfo.fIsConstant = is_constant; + kContext.fVariables.push_back(varInfo); + + return kContext.fStackOffset; +} + +/// \brief Find a variable by name +static VariableInfo* nectar_find_variable(const CompilerKit::STLString& var_name) { + for (auto& var : kContext.fVariables) { + if (var.fName == var_name) { + return new VariableInfo(var); + } + } + return nullptr; +} + +/// \brief Get variable reference (register or stack location) +static CompilerKit::STLString nectar_get_variable_ref(const CompilerKit::STLString& var_name, + bool lookup) { + auto* varInfo = nectar_find_variable(var_name); + + if (!varInfo || var_name.empty() || !isnumber(var_name[0])) { + if (!isnumber(var_name[0]) && lookup) + CompilerKit::Detail::print_error("Variable " + var_name + " not found.", "CompilerKit"); + } + + if (!varInfo) { + return ""; + } + + if (varInfo->fIsConstant) { + CompilerKit::Detail::print_error("Invalid use of constant " + + var_name.substr(var_name.find("const") + strlen("const")) + + " as variable.", + "CompilerKit"); + return "call __abort"; + } + + varInfo->fLastUsed = kContext.fInstructionCounter; + + if (varInfo->fLocation == VarLocation::kRegister) { + auto reg = varInfo->fRegister; + delete varInfo; + return reg; + } else { + // Stack or spilled + auto reg = "qword [rbp+" + std::to_string(-varInfo->fStackOffset) + "]"; + delete varInfo; + return reg; + } + + return ""; +} + +/// \brief Allocate a register for a variable +static CompilerKit::STLString nectar_allocate_register(const CompilerKit::STLString& var_name) { + // Check if variable already has a register + VariableInfo* existing = nullptr; + + for (auto& var : kContext.fVariables) { + if (var.fName == var_name) { + existing = &var; + break; + } + } + + if (existing && existing->fLocation == VarLocation::kRegister) { + return existing->fRegister; + } + + // Find a free register + for (const auto& reg : kRegisterList) { + bool inUse = false; + for (const auto& var : kContext.fVariables) { + if (var.fLocation == VarLocation::kRegister && var.fRegister == reg) { + inUse = true; + break; + } + } + + if (!inUse) { + // Allocate this register + if (existing) { + if (existing->fIsConstant) { + CompilerKit::Detail::print_error("Invalid use of constant " + var_name + " as variable.", + "CompilerKit"); + return "__call __abort"; + } + + existing->fLocation = VarLocation::kRegister; + existing->fRegister = reg; + existing->fLastUsed = kContext.fInstructionCounter; + } else { + VariableInfo varInfo; + varInfo.fName = var_name; + varInfo.fLocation = VarLocation::kRegister; + varInfo.fRegister = reg; + varInfo.fLastUsed = kContext.fInstructionCounter; + varInfo.fIsConstant = existing->fIsConstant; + + kContext.fVariables.push_back(varInfo); + } + return reg; + } + } + + // No free register + return ""; +} + +/// \brief Spill the least recently used variable to stack +static CompilerKit::STLString nectar_spill_lru_variable() { + CompilerKit::STLString spillCode; + + // Find LRU variable in register (that's not a parameter) + VariableInfo* lruVar = nullptr; + UInt32 minLastUsed = UINT32_MAX; + + for (auto& var : kContext.fVariables) { + if (var.fLocation == VarLocation::kRegister && !var.fIsParameter && + var.fLastUsed < minLastUsed) { + lruVar = &var; + minLastUsed = var.fLastUsed; + } + } + + if (!lruVar) { + return ""; // No variable to spill + } + + // Allocate stack space + kContext.fStackOffset -= lruVar->fSize; + kContext.fMaxStackUsed = std::min(kContext.fStackOffset, kContext.fMaxStackUsed); + + // Generate spill code + + /// if impl init + if (!lruVar->fRegister.ends_with("{}")) + spillCode = "mov qword [rbp+" + std::to_string(-kContext.fStackOffset) + "], " + + lruVar->fRegister + "\n"; + else + spillCode = "mov qword [rbp+" + std::to_string(-kContext.fStackOffset) + "], rax\n"; + + // Update variable info + lruVar->fLocation = VarLocation::kStackSpill; + lruVar->fStackOffset = kContext.fStackOffset; + auto spilledReg = lruVar->fRegister; + lruVar->fRegister = ""; + + return spillCode; +} + +/// \brief Add a class member to the struct map +static void nectar_add_impl_member(const CompilerKit::STLString& class_name, + const CompilerKit::STLString& member_name, Int32 size) { + // Find or create struct map entry + CompilerStructMap* structMap = nullptr; + for (auto& sm : kContext.fStructMapVector) { + if (sm.fName == class_name) { + structMap = &sm; + break; + } + } + + if (!structMap) { + CompilerStructMap newMap; + newMap.fName = class_name; + kContext.fStructMapVector.push_back(newMap); + structMap = &kContext.fStructMapVector.back(); + } + + // Calculate offset + UInt32 offset = 0; + if (!structMap->fOffsets.empty()) { + offset = structMap->fOffsets.back().first + 8; // Assume 8-byte members for now + } + + structMap->fOffsets.emplace_back(offset, member_name); +} + +/// \brief Get the size of a class +static Int32 nectar_get_impl_size(const CompilerKit::STLString& class_name) { + for (const auto& sm : kContext.fStructMapVector) { + if (sm.fName == class_name) { + if (sm.fOffsets.empty()) { + return 0; + } + return sm.fOffsets.back().first + 8; // Last offset + size + } + } + return 0; +} + +/// \brief Generate constructor call +static CompilerKit::STLString nectar_generate_constructor_call( + const CompilerKit::STLString& class_name, const CompilerKit::STLString& obj_name) { + auto size = nectar_get_impl_size(class_name); + auto offset = nectar_allocate_stack_variable( + obj_name, size == 0 ? 8 : size, obj_name.find("_const_") != CompilerKit::STLString::npos); + + nectar_push_scope(ScopeKind::kScopeClass, class_name); + auto ctor_mangled = nectar_mangle_name(class_name); + nectar_pop_scope(); + + CompilerKit::STLString code; + code += "lea r8, [rbp+" + std::to_string(offset) + "]\n"; + code += "call " + ctor_mangled + "\n"; + return code; +} + +/// \brief Generate destructor call +static CompilerKit::STLString nectar_generate_destructor_call( + const CompilerKit::STLString& class_name, const CompilerKit::STLString& obj_name) { + auto* varInfo = nectar_find_variable(obj_name); + + if (!varInfo) { + return ""; + } + + nectar_push_scope(ScopeKind::kScopeClass, class_name); + auto dtor_mangled = nectar_mangle_name("~" + class_name); + nectar_pop_scope(); + + CompilerKit::STLString code; + if (varInfo->fLocation == VarLocation::kStack || varInfo->fLocation == VarLocation::kStackSpill) { + code += "lea r8, [rbp+" + std::to_string(varInfo->fStackOffset) + "]\n"; + } else { + code += "mov r8, " + varInfo->fRegister + "\n"; + } + + delete varInfo; + + code += "call " + dtor_mangled + "\n"; + return code; +} + +/// \brief Process function parameters per PEF calling convention. +/// \note Assumes args are already extracted. +static void nectar_process_function_parameters(const std::vector& args) { + for (size_t i = 0; i < args.size() && i < 8; ++i) { + VariableInfo param; + param.fName = "arg" + std::to_string(i); + param.fLocation = VarLocation::kRegister; + param.fRegister = kRegisterConventionCallList[i]; + param.fIsParameter = true; + param.fTypeName = args[i]; + param.fLastUsed = kContext.fInstructionCounter; + kContext.fVariables.push_back(param); + } + + // Args beyond r15 go on stack + for (size_t i = 8; i < args.size(); ++i) { + Int32 offset = 16 + (i - 8) * 8; + VariableInfo param; + param.fName = "arg" + std::to_string(i); + param.fLocation = VarLocation::kStack; + param.fStackOffset = offset; // Positive (before rbp) + param.fIsParameter = true; + param.fTypeName = args[i]; + param.fLastUsed = kContext.fInstructionCounter; + kContext.fVariables.push_back(param); + } +} + +///////////////////////////////////////////////////////////////////////////////////////// + +/** + * @brief NECTAR assembler class. + */ + +///////////////////////////////////////////////////////////////////////////////////////// + +#define kExtListCxx {".nc", ".pp.nc"} + +class AssemblyNectarInterfaceAMD64 final CK_ASSEMBLY_INTERFACE { + public: + explicit AssemblyNectarInterfaceAMD64() = default; + ~AssemblyNectarInterfaceAMD64() override = default; + + NECTAR_COPY_DEFAULT(AssemblyNectarInterfaceAMD64); + + UInt32 Arch() noexcept override { return CompilerKit::AssemblyFactory::kArchAMD64; } + + Int32 CompileToFormat(CompilerKit::STLString src, Int32 arch) override { + if (kFrontend == nullptr) return EXIT_FAILURE; + + CompilerKit::STLString dest = src; + std::vector ext = kExtListCxx; + + dest.erase(dest.find(ext[0])); + + dest += ".masm"; + + std::ofstream out_fp(dest); + std::ifstream src_fp = std::ifstream(src); + + CompilerKit::STLString line_source; + + std::stringstream ss; + ss << std::hex << kOrigin; + + // Clear symbol tracking sets for this compilation unit + kDefinedSymbols.clear(); + kExternalSymbols.clear(); + + // First pass: compile all lines and collect symbols + CompilerKit::STLString compiledCode; + std::size_t lastRes{}; + std::string prevRes; + std::string nextRes; + + while (std::getline(src_fp, line_source)) { + auto res = kFrontend->Compile(line_source, src); + if (kAcceptableErrors > 0) return EXIT_FAILURE; + + if (res.fPlaceType == CompilerKit::SyntaxLeafList::SyntaxLeaf::kPlaceBefore) { + compiledCode.insert(compiledCode.find(prevRes), res.fUserValue, 0, res.fUserValue.size()); + } else if (res.fPlaceType == CompilerKit::SyntaxLeafList::SyntaxLeaf::kPlaceAfter) { + nextRes = res.fUserValue; + continue; + } else { + compiledCode += res.fUserValue; + if (!nextRes.empty()) { + compiledCode += nextRes; + nextRes.clear(); + } + } + + lastRes = res.fUserValue.size(); + prevRes = res.fUserValue; + } + + // Output header + if (!kNasmOutput) + out_fp << "%bits 64\n"; + else { + out_fp << "[bits 64]\n"; + out_fp << "extern __operator_new\nextern __operator_delete\n"; + } + + // For NASM output: emit extern declarations for undefined symbols + if (kNasmOutput) { + for (const auto& sym : kExternalSymbols) { + // Only declare as extern if not defined in this file + if (kDefinedSymbols.find(sym) == kDefinedSymbols.end() && !sym.empty()) { + out_fp << "extern " << sym << "\n"; + } + } + if (!kExternalSymbols.empty()) { + out_fp << "\n"; + } + } + + // Output compiled code + out_fp << compiledCode; + + return EXIT_SUCCESS; + } +}; + +///////////////////////////////////////////////////////////////////////////////////////// + +///////////////////////////////////////////////////////////////////////////////////////// + +NECTAR_MODULE(CompilerNectarAMD64) { + bool skip = false; + + kKeywords.emplace_back("impl", CompilerKit::KeywordKind::kKeywordKindImpl); + kKeywords.emplace_back("trait", CompilerKit::KeywordKind::kKeywordKindTrait); + kKeywords.emplace_back("{", CompilerKit::KeywordKind::kKeywordKindBodyStart); + kKeywords.emplace_back("}", CompilerKit::KeywordKind::kKeywordKindBodyEnd); + kKeywords.emplace_back("{}", CompilerKit::KeywordKind::kKeywordKindImplInit); + kKeywords.emplace_back("(", CompilerKit::KeywordKind::kKeywordKindFunctionStart); + kKeywords.emplace_back(")", CompilerKit::KeywordKind::kKeywordKindFunctionEnd); + kKeywords.emplace_back(":=", CompilerKit::KeywordKind::kKeywordKindVariableAssign); + kKeywords.emplace_back("+=", CompilerKit::KeywordKind::kKeywordKindVariableInc); + kKeywords.emplace_back("-=", CompilerKit::KeywordKind::kKeywordKindVariableDec); + kKeywords.emplace_back("const", CompilerKit::KeywordKind::kKeywordKindVariable); + kKeywords.emplace_back("let", CompilerKit::KeywordKind::kKeywordKindVariable); + kKeywords.emplace_back("new", CompilerKit::KeywordKind::kKeywordKindNew); + kKeywords.emplace_back("delete", CompilerKit::KeywordKind::kKeywordKindDelete); + kKeywords.emplace_back(".", CompilerKit::KeywordKind::kKeywordKindAccess); + kKeywords.emplace_back("->", CompilerKit::KeywordKind::kKeywordKindAccessChecked); + kKeywords.emplace_back("(", CompilerKit::KeywordKind::kKeywordKindFunctionAccess); + kKeywords.emplace_back(";", CompilerKit::KeywordKind::kKeywordKindEndLine); + kKeywords.emplace_back("return", CompilerKit::KeywordKind::kKeywordKindReturn); + kKeywords.emplace_back("extern", CompilerKit::KeywordKind::kKeywordKindExtern); + kKeywords.emplace_back("import", CompilerKit::KeywordKind::kKeywordKindImport); + kKeywords.emplace_back("export", CompilerKit::KeywordKind::kKeywordKindExport); + + kKeywords.emplace_back("if", CompilerKit::KeywordKind::kKeywordKindIf); + + kErrorLimit = 0; + + kFrontend = new CompilerFrontendNectarAMD64(); + + CompilerKit::StrongRef mntPnt{new AssemblyNectarInterfaceAMD64()}; + kAssembler.Mount({mntPnt.Leak()}); + + CompilerKit::install_signal(SIGSEGV, CompilerKit::Detail::drvi_crash_handler); + + // Ensure cleanup on exit + std::atexit([]() { + delete kFrontend; + kFrontend = nullptr; + }); + + for (auto index = 1UL; index < argc; ++index) { + if (!argv[index]) break; + + if (argv[index][0] == '-') { + if (skip) { + skip = false; + continue; + } + + if (strcmp(argv[index], "-fverbose") == 0) { + kVerbose = true; + continue; + } + + if (strcmp(argv[index], "-fuse-masm") == 0) { + kNasmOutput = false; + continue; + } + + if (strcmp(argv[index], "-fuse-nasm") == 0) { + kNasmOutput = true; + continue; + } + + if (strcmp(argv[index], "-fprint-dialect") == 0) { + if (kFrontend) std::cout << kFrontend->Language() << "\n"; + + return NECTAR_SUCCESS; + } + + CompilerKit::STLString err = "Unknown option: "; + err += argv[index]; + + CompilerKit::Detail::print_error(err, "Nectar"); + + continue; + } + + CompilerKit::STLString argv_i = argv[index]; + + std::vector exts = kExtListCxx; + + for (CompilerKit::STLString ext : exts) { + if (argv_i.ends_with(ext)) { + if (kAssembler.Compile(argv_i, kMachine) != EXIT_SUCCESS) { + return NECTAR_INVALID_DATA; + } + + break; + } + } + } + + kAssembler.Unmount(); + + return NECTAR_SUCCESS; +} + +// +// Last rev 25-8-7 +// diff --git a/src/CompilerKit/src/Linkers/DynamicLinker64+MachO.cc b/src/CompilerKit/src/Linkers/DynamicLinker64+MachO.cc deleted file mode 100644 index dcbd7e7..0000000 --- a/src/CompilerKit/src/Linkers/DynamicLinker64+MachO.cc +++ /dev/null @@ -1,705 +0,0 @@ -// Copyright 2024-2026, Amlal El Mahrouss (amlal@nekernel.org) -// Licensed under the Apache License, Version 2.0 (See accompanying -// file LICENSE or copy at http://www.apache.org/licenses/LICENSE-2.0) -// Official repository: https://github.com/nekernel-org/nectar - -/// @author Amlal El Mahrouss (amlal@nekernel.org) -/// @brief NeKernel.org 64-bit Mach-O Linker. -/// Last Rev: 2026 -/// @note Outputs Mach-O executables with __TEXT and __DATA segments. - -#ifdef CK_USE_MACHO_LINKER - -#include -#include -#include -#include -#include -#include -#include -#include - -#define kLatestOSX (15) - -#define kMachODefaultEntrypoint "_main" -#define kMachODefaultOutput {"a" kMachOExt} - -#define kLinkerVersionStr "Nectar 64-Bit Linker (OS X Mach-O)" - -#define kLinkerSplash() kStdOut << kLinkerVersionStr << kStdEndl - -#define kConsoleOut \ - (std::cout << "\e[0;31m" \ - << "mld64: " \ - << "\e[0;97m") - -static CompilerKit::STLString kOutput = kMachODefaultOutput; -static cpu_type_t kCpuType = CPU_TYPE_X86_64; -static cpu_subtype_t kCpuSubType = CPU_SUBTYPE_X86_64_ALL; -static bool kFatBinaryEnable = false; -static bool kStartFound = false; -static bool kDuplicateSymbols = false; -static bool kIsDylib = false; -static Int64 kMachODefaultStackSz = 0; - -static CompilerKit::STLString kLinkerStart = kMachODefaultEntrypoint; - -/* object code and list. */ -static std::vector kObjectList; -static std::vector kTextBytes; -static std::vector kDataBytes; - -/* @brief symbol tables */ -static std::vector kSymbolTable; -static std::vector kStringTable; -static std::map kSymbolOffsets; - -/// @brief Structure to hold section information from AE records -struct SectionInfo { - CompilerKit::STLString name; - UInt32 kind; - std::vector bytes; - UInt64 address; - UInt64 size; -}; - -using SectionInfoVec = std::vector; - -/// @brief Extract clean symbol name from AE record name -/// AE format: ".code64$symbolname" or "symbolname.code64" -static CompilerKit::STLString macho_extract_symbol_name(const CompilerKit::STLString& aeName) { - CompilerKit::STLString name = aeName; - - // Remove section prefixes/suffixes - const Char* sections[] = {".code64", ".data64", ".zero64", "$"}; - - for (const auto& sec : sections) { - size_t pos; - while ((pos = name.find(sec)) != CompilerKit::STLString::npos) { - name.erase(pos, strlen(sec)); - } - } - - // Trim whitespace - while (!name.empty() && (name.front() == ' ' || name.front() == '\t')) { - name.erase(0, 1); - } - while (!name.empty() && (name.back() == ' ' || name.back() == '\t')) { - name.pop_back(); - } - - return name; -} - -/// @brief Add a symbol to the symbol table -static UInt32 macho_add_symbol(const CompilerKit::STLString& name, uint8_t type, uint8_t sect, - UInt64 value) { - // Add name to string table (offset 0 is reserved for empty string) - if (kStringTable.empty()) { - kStringTable.push_back('\0'); // First byte is null - } - - UInt32 strOffset = static_cast(kStringTable.size()); - - for (Char c : name) { - kStringTable.push_back(c); - } - kStringTable.push_back('\0'); - - // Create nlist_64 entry - nlist_64 sym{}; - sym.n_un.n_strx = strOffset; - sym.n_type = type; - sym.n_sect = sect; - sym.n_desc = 0; - sym.n_value = value; - - kSymbolTable.push_back(sym); - kSymbolOffsets[name] = value; - - return static_cast(kSymbolTable.size() - 1); -} - -/// @brief Nectar 64-bit Mach-O Linker. -/// @note This linker outputs Mach-O executables for macOS/iOS. -NECTAR_MODULE(DynamicLinker64MachO) { - CompilerKit::install_signal(SIGSEGV, CompilerKit::Detail::drvi_crash_handler); - - /** - * @brief parse flags and trigger options. - */ - for (size_t linker_arg{1}; linker_arg < argc; ++linker_arg) { - if (std::strcmp(argv[linker_arg], "-help") == 0) { - kLinkerSplash(); - - kConsoleOut << "-version: Show linker version.\n"; - kConsoleOut << "-help: Show linker help.\n"; - kConsoleOut << "-verbose: Enable linker trace.\n"; - kConsoleOut << "-fdylib: Output as a Dynamic Library.\n"; - kConsoleOut << "-ffat: Output as a FAT binary.\n"; - kConsoleOut << "-famd64: Output as an x86_64 Mach-O.\n"; - kConsoleOut << "-farm64: Output as an ARM64 Mach-O.\n"; - kConsoleOut << "-output: Select the output file name.\n"; - kConsoleOut << "-fstart: Specify entry point symbol.\n"; - - return NECTAR_SUCCESS; - } else if (std::strcmp(argv[linker_arg], "-version") == 0) { - kLinkerSplash(); - - return NECTAR_SUCCESS; - } else if (std::strcmp(argv[linker_arg], "-ffat") == 0) { - kFatBinaryEnable = true; - - continue; - } else if (std::strcmp(argv[linker_arg], "-famd64") == 0) { - kCpuType = CPU_TYPE_X86_64; - kCpuSubType = CPU_SUBTYPE_X86_64_ALL; - - continue; - } else if (std::strcmp(argv[linker_arg], "-farm64") == 0) { - kCpuType = CPU_TYPE_ARM64; - kCpuSubType = CPU_SUBTYPE_ARM64_ALL; - - continue; - } else if (std::strcmp(argv[linker_arg], "-fstart") == 0) { - if (argv[linker_arg + 1] == nullptr || argv[linker_arg + 1][0] == '-') continue; - - kLinkerStart = argv[linker_arg + 1]; - linker_arg += 1; - - continue; - } else if (std::strcmp(argv[linker_arg], "-verbose") == 0) { - kVerbose = true; - - continue; - } else if (std::strcmp(argv[linker_arg], "-fdylib") == 0) { - kIsDylib = true; - - if (kOutput.find(kMachOExt) != CompilerKit::STLString::npos) { - kOutput.erase(kOutput.find(kMachOExt), strlen(kMachOExt)); - kOutput += kMachODylibExt; - } - - continue; - } else if (std::strcmp(argv[linker_arg], "-output") == 0) { - if ((linker_arg + 1) > argc) continue; - - kOutput = argv[linker_arg + 1]; - ++linker_arg; - - continue; - } else { - if (argv[linker_arg][0] == '-') { - kConsoleOut << "unknown option: " << argv[linker_arg] << "\n"; - return EXIT_FAILURE; - } - - kObjectList.emplace_back(argv[linker_arg]); - - continue; - } - } - - if (kOutput.empty()) { - kConsoleOut << "no output filename set." << std::endl; - return NECTAR_EXEC_ERROR; - } else if (kObjectList.empty()) { - kConsoleOut << "no input files." << std::endl; - return NECTAR_EXEC_ERROR; - } else { - namespace FS = std::filesystem; - - // check for existing files, if they don't throw an error. - for (auto& obj : kObjectList) { - if (!FS::exists(obj)) { - kConsoleOut << "no such file: " << obj << std::endl; - return NECTAR_EXEC_ERROR; - } - } - } - - SectionInfoVec sections; - CompilerKit::Utils::AEReadableProtocol reader_protocol{}; - - entry_point_command entryCommand{}; - entryCommand.stacksize = kMachODefaultStackSz; - - // Collect all text and data from AE object files - for (const auto& objectFile : kObjectList) { - if (!std::filesystem::exists(objectFile)) continue; - - CompilerKit::AEHeader hdr{}; - - reader_protocol.fFilePtr = std::ifstream(objectFile, std::ifstream::binary); - reader_protocol.fFilePtr >> hdr; - - if (hdr.fMagic[0] == kAEMag0 && hdr.fMagic[1] == kAEMag1 && - hdr.fSize == sizeof(CompilerKit::AEHeader) && hdr.fMagic[2] == kAEMag2) { - std::size_t cnt = hdr.fCount; - - if (kVerbose) kConsoleOut << "header found, record count: " << cnt << "\n"; - - Char* raw_ae_records = new Char[cnt * sizeof(CompilerKit::AERecordHeader)]; - - if (!raw_ae_records) { - if (kVerbose) kConsoleOut << "allocation failed for records of count: " << cnt << "\n"; - return NECTAR_EXEC_ERROR; - } - - std::memset(raw_ae_records, 0, cnt * sizeof(CompilerKit::AERecordHeader)); - - auto* ae_records = reader_protocol.Read(raw_ae_records, cnt); - - for (size_t ae_record_index = 0; ae_record_index < cnt; ++ae_record_index) { - SectionInfo section; - section.name = ae_records[ae_record_index].fName; - section.kind = ae_records[ae_record_index].fKind; - section.size = ae_records[ae_record_index].fSize; - - // Extract clean symbol name and add to symbol table - CompilerKit::STLString symbolName = macho_extract_symbol_name(section.name); - - if (!symbolName.empty()) { - // Determine section number (1 = __text, 2 = __data) - uint8_t sectNum = 0; - if (section.kind & CompilerKit::kPefCode) { - sectNum = 1; // __text section - } else if (section.kind & CompilerKit::kPefData) { - sectNum = 2; // __data section - } else if (section.kind & CompilerKit::kPefZero) { - sectNum = 3; // __bss section - } - - // N_EXT = external, N_SECT = defined in section - uint8_t symType = N_EXT | N_SECT; - - macho_add_symbol(symbolName, symType, sectNum, ae_records[ae_record_index].fOffset); - - if (kVerbose) { - kConsoleOut << "added symbol: " << symbolName - << " at offset: " << ae_records[ae_record_index].fOffset << "\n"; - } - } - - sections.push_back(section); - } - - // Look up entry point from symbol table - auto entryIt = kSymbolOffsets.find(kLinkerStart); - if (entryIt != kSymbolOffsets.end()) { - entryCommand.entryoff = entryIt->second; - kStartFound = true; - - if (kVerbose) { - kConsoleOut << "found entry point " << kLinkerStart << " at offset: " << entryIt->second - << "\n"; - } - } - - delete[] raw_ae_records; - - // Read the actual code bytes - std::vector bytes; - bytes.resize(hdr.fCodeSize); - - reader_protocol.fFilePtr.seekg(std::streamsize(hdr.fStartCode)); - reader_protocol.fFilePtr.read(bytes.data(), std::streamsize(hdr.fCodeSize)); - - // Separate code and data based on section kind - for (auto& section : sections) { - if (section.kind == CompilerKit::kPefCode) { - kTextBytes.push_back({.mBlob = bytes, .mOffset = 0}); - } else if (section.kind == CompilerKit::kPefData) { - kDataBytes.push_back({.mBlob = bytes, .mOffset = 0}); - } - } - - reader_protocol.fFilePtr.close(); - continue; - } - - kConsoleOut << "not an object container: " << objectFile << std::endl; - return NECTAR_EXEC_ERROR; - } - - // Check for entry point in executables - if (!kStartFound && !kIsDylib) { - kConsoleOut << "undefined entrypoint " << kLinkerStart << " for executable: " << kOutput - << "\n"; - } - - // Calculate sizes - UInt64 textSize = 0; - UInt64 dataSize = 0; - - for (auto& blob : kTextBytes) { - textSize += blob.mBlob.size(); - } - - for (auto& blob : kDataBytes) { - dataSize += blob.mBlob.size(); - } - - // Open output file - std::ofstream output_fc(kOutput, std::ofstream::binary); - - if (output_fc.bad()) { - if (kVerbose) { - kConsoleOut << "error: " << strerror(errno) << "\n"; - } - return NECTAR_FILE_NOT_FOUND; - } - - using namespace CompilerKit::MachO; - - UInt32 numCommands = 8; // __PAGEZERO, LC_BUILD_VERSION, __TEXT, __LINKEDIT, LC_LOAD_DYLINKER, - // LC_UUID, LC_SYMTAB, LC_DYSYMTAB - - if (!kIsDylib) { - numCommands += 1; // LC_MAIN - } - - UInt32 dataSegCmdSize = - kDataBytes.size() > 0 ? sizeof(segment_command_64) + sizeof(section_64) : 0; - - if (dataSegCmdSize > 0) ++numCommands; // __DATA segment - - UInt32 sizeOfCmds = 0; - UInt32 headerSize = sizeof(mach_header_64); - UInt32 pageZeroSize = sizeof(segment_command_64); - UInt32 textSegCmdSize = sizeof(segment_command_64) + sizeof(section_64); - UInt32 buildCmdSize = sizeof(build_version_command); - UInt32 mainCmdSize = sizeof(entry_point_command); - UInt32 uuidCmdSize = sizeof(uuid_command); - UInt32 symtabCmdSize = sizeof(symtab_command); - UInt32 dysymtabCmdSize = sizeof(dysymtab_command); - UInt32 linkeditCmdSize = sizeof(segment_command_64); // No sections - UInt32 dylinkerCmdSize = - (strlen(dylinker_command) + 13 + 1 + 7) & ~7; // "/usr/lib/dyld" + padding to 8-byte align - - sizeOfCmds = pageZeroSize + textSegCmdSize + dataSegCmdSize + buildCmdSize + uuidCmdSize + - symtabCmdSize + dysymtabCmdSize + linkeditCmdSize + dylinkerCmdSize; - - if (!kIsDylib) sizeOfCmds += mainCmdSize; - - UInt64 headerAndCmdsSize = headerSize + sizeOfCmds; - UInt64 textFileOffset = AlignToPage(headerAndCmdsSize); - UInt64 textVMAddr = kDefaultBaseAddress; - UInt64 textSegmentSize = AlignToPage(textSize > 0 ? textSize : kPageSize); - UInt64 textVMSize = textFileOffset + textSegmentSize; // __TEXT includes header - - UInt64 dataFileOffset = textFileOffset + textSegmentSize; - UInt64 dataVMAddr = textVMAddr + textVMSize; - UInt64 dataSegmentSize = dataSize > 0 ? AlignToPage(dataSize) : 0; // 0 if no data - - // __LINKEDIT segment comes after data segment (or __TEXT if no data) - UInt64 linkeditFileOffset = - dataSegmentSize > 0 ? dataFileOffset + dataSegmentSize : textFileOffset + textSegmentSize; - UInt64 linkeditVMAddr = - dataSegmentSize > 0 ? dataVMAddr + dataSegmentSize : textVMAddr + textVMSize; - UInt64 symtabFileOffset = linkeditFileOffset; - UInt64 strtabFileOffset = symtabFileOffset + (kSymbolTable.size() * sizeof(nlist_64)); - UInt64 linkeditFileSize = (kSymbolTable.size() * sizeof(nlist_64)) + kStringTable.size(); - UInt64 linkeditVMSize = AlignToPage(linkeditFileSize > 0 ? linkeditFileSize : 1); - - // Write Mach-O header - mach_header_64 header{}; - - header.magic = MH_MAGIC_64; - header.cputype = kCpuType; - header.cpusubtype = kCpuSubType; - header.filetype = kIsDylib ? MH_DYLIB : MH_EXECUTE; - header.ncmds = numCommands; - header.sizeofcmds = sizeOfCmds; - header.flags = MH_NOUNDEFS | MH_DYLDLINK | MH_TWOLEVEL | MH_PIE; - header.reserved = 0; - - output_fc.write(reinterpret_cast(&header), sizeof(header)); - - if (kVerbose) { - kConsoleOut << "Wrote Mach-O header, ncmds: " << numCommands << "\n"; - } - - segment_command_64 pageZeroSegment{}; - pageZeroSegment.cmd = LC_SEGMENT_64; - pageZeroSegment.cmdsize = sizeof(segment_command_64); - CopySegmentName(pageZeroSegment.segname, kSegmentPageZero); - pageZeroSegment.vmaddr = 0; - pageZeroSegment.vmsize = 0x100000000; - pageZeroSegment.fileoff = 0; - pageZeroSegment.filesize = 0; - pageZeroSegment.maxprot = 0; - pageZeroSegment.initprot = 0; - pageZeroSegment.nsects = 0; - pageZeroSegment.flags = 0; - - output_fc.write(reinterpret_cast(&pageZeroSegment), sizeof(pageZeroSegment)); - - build_version_command build = {.cmd = LC_BUILD_VERSION, - .cmdsize = sizeof(build_version_command), - .platform = PLATFORM_MACOS, - .minos = (kLatestOSX << 16), // macOS 11.0 - .sdk = (kLatestOSX << 16), // macOS 11.0 - .ntools = 0}; - - output_fc.write(reinterpret_cast(&build), sizeof(build)); - - if (kVerbose) { - kConsoleOut << "Wrote LC_BUILD_VERSION, platform: macOS, minos: 11.0, sdk: 11.0\n"; - } - - // Write __TEXT segment command - segment_command_64 textSegment{}; - textSegment.cmd = LC_SEGMENT_64; - textSegment.cmdsize = sizeof(segment_command_64) + sizeof(section_64); // 1 section - CopySegmentName(textSegment.segname, kSegmentText); - textSegment.vmaddr = textVMAddr; - textSegment.vmsize = textVMSize; // Header + code (page-aligned) - textSegment.fileoff = 0; // Must include Mach-O header - textSegment.filesize = - dataSegmentSize > 0 ? dataFileOffset : linkeditFileOffset; // Extend to next segment - textSegment.maxprot = VM_PROT_READ | VM_PROT_EXECUTE; - textSegment.initprot = VM_PROT_READ | VM_PROT_EXECUTE; - textSegment.nsects = 1; - textSegment.flags = 0; - - output_fc.write(reinterpret_cast(&textSegment), sizeof(textSegment)); - - // Write __text section header - section_64 textSection{}; - CopySegmentName(textSection.sectname, kSectionText); - CopySegmentName(textSection.segname, kSegmentText); - textSection.addr = textVMAddr + textFileOffset; // Section is at offset within segment - textSection.size = textSize; - textSection.offset = static_cast(textFileOffset); - textSection.align = kSectionAlign; - textSection.reloff = 0; - textSection.nreloc = 0; - textSection.flags = S_ATTR_PURE_INSTRUCTIONS | S_ATTR_SOME_INSTRUCTIONS; - textSection.reserved1 = 0; - textSection.reserved2 = 0; - textSection.reserved3 = 0; - - output_fc.write(reinterpret_cast(&textSection), sizeof(textSection)); - - if (kVerbose) { - kConsoleOut << "Wrote __TEXT segment, vmaddr: 0x" << std::hex << textVMAddr << std::dec << "\n"; - kConsoleOut << " __text section, size: " << textSize << " bytes\n"; - } - - // Write __DATA segment command - segment_command_64 dataSegment{}; - dataSegment.cmd = LC_SEGMENT_64; - dataSegment.cmdsize = sizeof(segment_command_64) + sizeof(section_64); // 1 section - CopySegmentName(dataSegment.segname, kSegmentData); - dataSegment.vmaddr = dataVMAddr; - dataSegment.vmsize = dataSegmentSize; - dataSegment.fileoff = dataFileOffset; - dataSegment.filesize = dataSize; - dataSegment.maxprot = VM_PROT_READ | VM_PROT_WRITE; - dataSegment.initprot = VM_PROT_READ | VM_PROT_WRITE; - dataSegment.nsects = 1; - dataSegment.flags = 0; - - if (dataSegCmdSize > 0) - output_fc.write(reinterpret_cast(&dataSegment), sizeof(dataSegment)); - - // Write __data section header - section_64 dataSection{}; - CopySegmentName(dataSection.sectname, kSectionData); - CopySegmentName(dataSection.segname, kSegmentData); - dataSection.addr = dataVMAddr; - dataSection.size = dataSize; - dataSection.offset = static_cast(dataFileOffset); - dataSection.align = kSectionAlign; - dataSection.reloff = 0; - dataSection.nreloc = 0; - dataSection.flags = 0; - dataSection.reserved1 = 0; - dataSection.reserved2 = 0; - dataSection.reserved3 = 0; - - if (dataSegCmdSize > 0) - output_fc.write(reinterpret_cast(&dataSection), sizeof(dataSection)); - - if (kVerbose) { - kConsoleOut << "Wrote __DATA segment, vmaddr: 0x" << std::hex << dataVMAddr << std::dec << "\n"; - kConsoleOut << " __data section, size: " << dataSize << " bytes\n"; - } - - // Write __LINKEDIT segment command (contains symbol/string tables) - segment_command_64 linkeditSegment{}; - linkeditSegment.cmd = LC_SEGMENT_64; - linkeditSegment.cmdsize = sizeof(segment_command_64); // No sections - CopySegmentName(linkeditSegment.segname, "__LINKEDIT"); - linkeditSegment.vmaddr = linkeditVMAddr; - linkeditSegment.vmsize = linkeditVMSize; - linkeditSegment.fileoff = linkeditFileOffset; - linkeditSegment.filesize = linkeditFileSize; - linkeditSegment.maxprot = VM_PROT_READ; - linkeditSegment.initprot = VM_PROT_READ; - linkeditSegment.nsects = 0; - linkeditSegment.flags = 0; - - output_fc.write(reinterpret_cast(&linkeditSegment), sizeof(linkeditSegment)); - - if (kVerbose) { - kConsoleOut << "Wrote __LINKEDIT segment, vmaddr: 0x" << std::hex << linkeditVMAddr << std::dec - << ", fileoff: " << linkeditFileOffset << ", filesize: " << linkeditFileSize - << "\n"; - } - - // Write LC_LOAD_DYLINKER command - constexpr Char* dyldPath = "/usr/lib/dyld"; - std::vector dylinkerCmd(dylinkerCmdSize, 0); - dylinker_command* dylinker = reinterpret_cast(dylinkerCmd.data()); - dylinker->cmd = LC_LOAD_DYLINKER; - dylinker->cmdsize = dylinkerCmdSize; - dylinker->name.offset = sizeof(dylinker_command); - std::memcpy(dylinkerCmd.data() + sizeof(dylinker_command), dyldPath, strlen(dyldPath) + 1); - - output_fc.write(dylinkerCmd.data(), dylinkerCmd.size()); - - if (kVerbose) { - kConsoleOut << "Wrote LC_LOAD_DYLINKER: " << dyldPath << "\n"; - } - - // Write LC_MAIN entry point command (executables only) - if (!kIsDylib) { - entryCommand.cmd = LC_MAIN; - entryCommand.cmdsize = sizeof(entry_point_command); - // entryoff is relative to __TEXT segment file offset - entryCommand.entryoff = textFileOffset + entryCommand.entryoff; - - output_fc.write(reinterpret_cast(&entryCommand), sizeof(entryCommand)); - - if (kVerbose) { - kConsoleOut << "Wrote LC_MAIN, entryoff: 0x" << std::hex << entryCommand.entryoff << std::dec - << ", stacksize: " << entryCommand.stacksize << "\n"; - } - } - - // Write LC_UUID command - uuid_command uuidCmd{}; - uuidCmd.cmd = LC_UUID; - uuidCmd.cmdsize = sizeof(uuid_command); - - // Generate a random UUID (version 4) - std::random_device rd; - std::mt19937 gen(rd()); - uuids::uuid_random_generator uuidGen(gen); - uuids::uuid generatedUuid = uuidGen(); - auto uuidBytes = generatedUuid.as_bytes(); - std::memcpy(uuidCmd.uuid, uuidBytes.data(), 16); - - output_fc.write(reinterpret_cast(&uuidCmd), sizeof(uuidCmd)); - - if (kVerbose) { - kConsoleOut << "Wrote LC_UUID\n"; - } - - // Write LC_SYMTAB command - symtab_command symtabCmd{}; - symtabCmd.cmd = LC_SYMTAB; - symtabCmd.cmdsize = sizeof(symtab_command); - symtabCmd.symoff = static_cast(symtabFileOffset); - symtabCmd.nsyms = static_cast(kSymbolTable.size()); - symtabCmd.stroff = static_cast(strtabFileOffset); - symtabCmd.strsize = static_cast(kStringTable.size()); - - output_fc.write(reinterpret_cast(&symtabCmd), sizeof(symtabCmd)); - - if (kVerbose) { - kConsoleOut << "Wrote LC_SYMTAB, nsyms: " << symtabCmd.nsyms - << ", strsize: " << symtabCmd.strsize << "\n"; - } - - // Write LC_DYSYMTAB command - dysymtab_command dysymtabCmd{}; - std::memset(&dysymtabCmd, 0, sizeof(dysymtabCmd)); - dysymtabCmd.cmd = LC_DYSYMTAB; - dysymtabCmd.cmdsize = sizeof(dysymtab_command); - - // All symbols are local for now - dysymtabCmd.ilocalsym = 0; - dysymtabCmd.nlocalsym = static_cast(kSymbolTable.size()); - - // External symbols start after locals - dysymtabCmd.iextdefsym = static_cast(kSymbolTable.size()); - dysymtabCmd.nextdefsym = 0; - - // Undefined symbols - dysymtabCmd.iundefsym = static_cast(kSymbolTable.size()); - dysymtabCmd.nundefsym = 0; - - output_fc.write(reinterpret_cast(&dysymtabCmd), sizeof(dysymtabCmd)); - - if (kVerbose) { - kConsoleOut << "Wrote LC_DYSYMTAB\n"; - } - - // Pad to text section offset - UInt64 currentPos = output_fc.tellp(); - UInt64 padding = textFileOffset - currentPos; - - if (padding > 0) { - std::vector zeros(padding, 0); - output_fc.write(zeros.data(), zeros.size()); - } - - // Write __text content - for (auto& blob : kTextBytes) { - output_fc.write(blob.mBlob.data(), blob.mBlob.size()); - } - - // Pad to data section offset - currentPos = output_fc.tellp(); - padding = dataFileOffset - currentPos; - - if (padding > 0) { - std::vector zeros(padding, 0); - output_fc.write(zeros.data(), zeros.size()); - } - - // Write __data content - for (auto& blob : kDataBytes) { - output_fc.write(blob.mBlob.data(), blob.mBlob.size()); - } - - // Pad to symbol table offset - currentPos = output_fc.tellp(); - padding = symtabFileOffset - currentPos; - - if (padding > 0) { - std::vector zeros(padding, 0); - output_fc.write(zeros.data(), zeros.size()); - } - - // Write symbol table (nlist_64 entries) - for (auto& sym : kSymbolTable) { - output_fc.write(reinterpret_cast(&sym), sizeof(nlist_64)); - } - - if (kVerbose) { - kConsoleOut << "Wrote symbol table, " << kSymbolTable.size() << " entries\n"; - } - - // Write string table - output_fc.write(kStringTable.data(), kStringTable.size()); - - if (kVerbose) { - kConsoleOut << "Wrote string table, " << kStringTable.size() << " bytes\n"; - } - - output_fc.flush(); - - if (kVerbose) { - kConsoleOut << "Wrote Mach-O binary: " << kOutput << "\n"; - } - - return NECTAR_SUCCESS; -} - -// Last rev - 2026 - -#endif // ifdef CK_USE_MACHO_LINKER diff --git a/src/CompilerKit/src/Linkers/DynamicLinker64+MachO.cpp b/src/CompilerKit/src/Linkers/DynamicLinker64+MachO.cpp new file mode 100644 index 0000000..dcbd7e7 --- /dev/null +++ b/src/CompilerKit/src/Linkers/DynamicLinker64+MachO.cpp @@ -0,0 +1,705 @@ +// Copyright 2024-2026, Amlal El Mahrouss (amlal@nekernel.org) +// Licensed under the Apache License, Version 2.0 (See accompanying +// file LICENSE or copy at http://www.apache.org/licenses/LICENSE-2.0) +// Official repository: https://github.com/nekernel-org/nectar + +/// @author Amlal El Mahrouss (amlal@nekernel.org) +/// @brief NeKernel.org 64-bit Mach-O Linker. +/// Last Rev: 2026 +/// @note Outputs Mach-O executables with __TEXT and __DATA segments. + +#ifdef CK_USE_MACHO_LINKER + +#include +#include +#include +#include +#include +#include +#include +#include + +#define kLatestOSX (15) + +#define kMachODefaultEntrypoint "_main" +#define kMachODefaultOutput {"a" kMachOExt} + +#define kLinkerVersionStr "Nectar 64-Bit Linker (OS X Mach-O)" + +#define kLinkerSplash() kStdOut << kLinkerVersionStr << kStdEndl + +#define kConsoleOut \ + (std::cout << "\e[0;31m" \ + << "mld64: " \ + << "\e[0;97m") + +static CompilerKit::STLString kOutput = kMachODefaultOutput; +static cpu_type_t kCpuType = CPU_TYPE_X86_64; +static cpu_subtype_t kCpuSubType = CPU_SUBTYPE_X86_64_ALL; +static bool kFatBinaryEnable = false; +static bool kStartFound = false; +static bool kDuplicateSymbols = false; +static bool kIsDylib = false; +static Int64 kMachODefaultStackSz = 0; + +static CompilerKit::STLString kLinkerStart = kMachODefaultEntrypoint; + +/* object code and list. */ +static std::vector kObjectList; +static std::vector kTextBytes; +static std::vector kDataBytes; + +/* @brief symbol tables */ +static std::vector kSymbolTable; +static std::vector kStringTable; +static std::map kSymbolOffsets; + +/// @brief Structure to hold section information from AE records +struct SectionInfo { + CompilerKit::STLString name; + UInt32 kind; + std::vector bytes; + UInt64 address; + UInt64 size; +}; + +using SectionInfoVec = std::vector; + +/// @brief Extract clean symbol name from AE record name +/// AE format: ".code64$symbolname" or "symbolname.code64" +static CompilerKit::STLString macho_extract_symbol_name(const CompilerKit::STLString& aeName) { + CompilerKit::STLString name = aeName; + + // Remove section prefixes/suffixes + const Char* sections[] = {".code64", ".data64", ".zero64", "$"}; + + for (const auto& sec : sections) { + size_t pos; + while ((pos = name.find(sec)) != CompilerKit::STLString::npos) { + name.erase(pos, strlen(sec)); + } + } + + // Trim whitespace + while (!name.empty() && (name.front() == ' ' || name.front() == '\t')) { + name.erase(0, 1); + } + while (!name.empty() && (name.back() == ' ' || name.back() == '\t')) { + name.pop_back(); + } + + return name; +} + +/// @brief Add a symbol to the symbol table +static UInt32 macho_add_symbol(const CompilerKit::STLString& name, uint8_t type, uint8_t sect, + UInt64 value) { + // Add name to string table (offset 0 is reserved for empty string) + if (kStringTable.empty()) { + kStringTable.push_back('\0'); // First byte is null + } + + UInt32 strOffset = static_cast(kStringTable.size()); + + for (Char c : name) { + kStringTable.push_back(c); + } + kStringTable.push_back('\0'); + + // Create nlist_64 entry + nlist_64 sym{}; + sym.n_un.n_strx = strOffset; + sym.n_type = type; + sym.n_sect = sect; + sym.n_desc = 0; + sym.n_value = value; + + kSymbolTable.push_back(sym); + kSymbolOffsets[name] = value; + + return static_cast(kSymbolTable.size() - 1); +} + +/// @brief Nectar 64-bit Mach-O Linker. +/// @note This linker outputs Mach-O executables for macOS/iOS. +NECTAR_MODULE(DynamicLinker64MachO) { + CompilerKit::install_signal(SIGSEGV, CompilerKit::Detail::drvi_crash_handler); + + /** + * @brief parse flags and trigger options. + */ + for (size_t linker_arg{1}; linker_arg < argc; ++linker_arg) { + if (std::strcmp(argv[linker_arg], "-help") == 0) { + kLinkerSplash(); + + kConsoleOut << "-version: Show linker version.\n"; + kConsoleOut << "-help: Show linker help.\n"; + kConsoleOut << "-verbose: Enable linker trace.\n"; + kConsoleOut << "-fdylib: Output as a Dynamic Library.\n"; + kConsoleOut << "-ffat: Output as a FAT binary.\n"; + kConsoleOut << "-famd64: Output as an x86_64 Mach-O.\n"; + kConsoleOut << "-farm64: Output as an ARM64 Mach-O.\n"; + kConsoleOut << "-output: Select the output file name.\n"; + kConsoleOut << "-fstart: Specify entry point symbol.\n"; + + return NECTAR_SUCCESS; + } else if (std::strcmp(argv[linker_arg], "-version") == 0) { + kLinkerSplash(); + + return NECTAR_SUCCESS; + } else if (std::strcmp(argv[linker_arg], "-ffat") == 0) { + kFatBinaryEnable = true; + + continue; + } else if (std::strcmp(argv[linker_arg], "-famd64") == 0) { + kCpuType = CPU_TYPE_X86_64; + kCpuSubType = CPU_SUBTYPE_X86_64_ALL; + + continue; + } else if (std::strcmp(argv[linker_arg], "-farm64") == 0) { + kCpuType = CPU_TYPE_ARM64; + kCpuSubType = CPU_SUBTYPE_ARM64_ALL; + + continue; + } else if (std::strcmp(argv[linker_arg], "-fstart") == 0) { + if (argv[linker_arg + 1] == nullptr || argv[linker_arg + 1][0] == '-') continue; + + kLinkerStart = argv[linker_arg + 1]; + linker_arg += 1; + + continue; + } else if (std::strcmp(argv[linker_arg], "-verbose") == 0) { + kVerbose = true; + + continue; + } else if (std::strcmp(argv[linker_arg], "-fdylib") == 0) { + kIsDylib = true; + + if (kOutput.find(kMachOExt) != CompilerKit::STLString::npos) { + kOutput.erase(kOutput.find(kMachOExt), strlen(kMachOExt)); + kOutput += kMachODylibExt; + } + + continue; + } else if (std::strcmp(argv[linker_arg], "-output") == 0) { + if ((linker_arg + 1) > argc) continue; + + kOutput = argv[linker_arg + 1]; + ++linker_arg; + + continue; + } else { + if (argv[linker_arg][0] == '-') { + kConsoleOut << "unknown option: " << argv[linker_arg] << "\n"; + return EXIT_FAILURE; + } + + kObjectList.emplace_back(argv[linker_arg]); + + continue; + } + } + + if (kOutput.empty()) { + kConsoleOut << "no output filename set." << std::endl; + return NECTAR_EXEC_ERROR; + } else if (kObjectList.empty()) { + kConsoleOut << "no input files." << std::endl; + return NECTAR_EXEC_ERROR; + } else { + namespace FS = std::filesystem; + + // check for existing files, if they don't throw an error. + for (auto& obj : kObjectList) { + if (!FS::exists(obj)) { + kConsoleOut << "no such file: " << obj << std::endl; + return NECTAR_EXEC_ERROR; + } + } + } + + SectionInfoVec sections; + CompilerKit::Utils::AEReadableProtocol reader_protocol{}; + + entry_point_command entryCommand{}; + entryCommand.stacksize = kMachODefaultStackSz; + + // Collect all text and data from AE object files + for (const auto& objectFile : kObjectList) { + if (!std::filesystem::exists(objectFile)) continue; + + CompilerKit::AEHeader hdr{}; + + reader_protocol.fFilePtr = std::ifstream(objectFile, std::ifstream::binary); + reader_protocol.fFilePtr >> hdr; + + if (hdr.fMagic[0] == kAEMag0 && hdr.fMagic[1] == kAEMag1 && + hdr.fSize == sizeof(CompilerKit::AEHeader) && hdr.fMagic[2] == kAEMag2) { + std::size_t cnt = hdr.fCount; + + if (kVerbose) kConsoleOut << "header found, record count: " << cnt << "\n"; + + Char* raw_ae_records = new Char[cnt * sizeof(CompilerKit::AERecordHeader)]; + + if (!raw_ae_records) { + if (kVerbose) kConsoleOut << "allocation failed for records of count: " << cnt << "\n"; + return NECTAR_EXEC_ERROR; + } + + std::memset(raw_ae_records, 0, cnt * sizeof(CompilerKit::AERecordHeader)); + + auto* ae_records = reader_protocol.Read(raw_ae_records, cnt); + + for (size_t ae_record_index = 0; ae_record_index < cnt; ++ae_record_index) { + SectionInfo section; + section.name = ae_records[ae_record_index].fName; + section.kind = ae_records[ae_record_index].fKind; + section.size = ae_records[ae_record_index].fSize; + + // Extract clean symbol name and add to symbol table + CompilerKit::STLString symbolName = macho_extract_symbol_name(section.name); + + if (!symbolName.empty()) { + // Determine section number (1 = __text, 2 = __data) + uint8_t sectNum = 0; + if (section.kind & CompilerKit::kPefCode) { + sectNum = 1; // __text section + } else if (section.kind & CompilerKit::kPefData) { + sectNum = 2; // __data section + } else if (section.kind & CompilerKit::kPefZero) { + sectNum = 3; // __bss section + } + + // N_EXT = external, N_SECT = defined in section + uint8_t symType = N_EXT | N_SECT; + + macho_add_symbol(symbolName, symType, sectNum, ae_records[ae_record_index].fOffset); + + if (kVerbose) { + kConsoleOut << "added symbol: " << symbolName + << " at offset: " << ae_records[ae_record_index].fOffset << "\n"; + } + } + + sections.push_back(section); + } + + // Look up entry point from symbol table + auto entryIt = kSymbolOffsets.find(kLinkerStart); + if (entryIt != kSymbolOffsets.end()) { + entryCommand.entryoff = entryIt->second; + kStartFound = true; + + if (kVerbose) { + kConsoleOut << "found entry point " << kLinkerStart << " at offset: " << entryIt->second + << "\n"; + } + } + + delete[] raw_ae_records; + + // Read the actual code bytes + std::vector bytes; + bytes.resize(hdr.fCodeSize); + + reader_protocol.fFilePtr.seekg(std::streamsize(hdr.fStartCode)); + reader_protocol.fFilePtr.read(bytes.data(), std::streamsize(hdr.fCodeSize)); + + // Separate code and data based on section kind + for (auto& section : sections) { + if (section.kind == CompilerKit::kPefCode) { + kTextBytes.push_back({.mBlob = bytes, .mOffset = 0}); + } else if (section.kind == CompilerKit::kPefData) { + kDataBytes.push_back({.mBlob = bytes, .mOffset = 0}); + } + } + + reader_protocol.fFilePtr.close(); + continue; + } + + kConsoleOut << "not an object container: " << objectFile << std::endl; + return NECTAR_EXEC_ERROR; + } + + // Check for entry point in executables + if (!kStartFound && !kIsDylib) { + kConsoleOut << "undefined entrypoint " << kLinkerStart << " for executable: " << kOutput + << "\n"; + } + + // Calculate sizes + UInt64 textSize = 0; + UInt64 dataSize = 0; + + for (auto& blob : kTextBytes) { + textSize += blob.mBlob.size(); + } + + for (auto& blob : kDataBytes) { + dataSize += blob.mBlob.size(); + } + + // Open output file + std::ofstream output_fc(kOutput, std::ofstream::binary); + + if (output_fc.bad()) { + if (kVerbose) { + kConsoleOut << "error: " << strerror(errno) << "\n"; + } + return NECTAR_FILE_NOT_FOUND; + } + + using namespace CompilerKit::MachO; + + UInt32 numCommands = 8; // __PAGEZERO, LC_BUILD_VERSION, __TEXT, __LINKEDIT, LC_LOAD_DYLINKER, + // LC_UUID, LC_SYMTAB, LC_DYSYMTAB + + if (!kIsDylib) { + numCommands += 1; // LC_MAIN + } + + UInt32 dataSegCmdSize = + kDataBytes.size() > 0 ? sizeof(segment_command_64) + sizeof(section_64) : 0; + + if (dataSegCmdSize > 0) ++numCommands; // __DATA segment + + UInt32 sizeOfCmds = 0; + UInt32 headerSize = sizeof(mach_header_64); + UInt32 pageZeroSize = sizeof(segment_command_64); + UInt32 textSegCmdSize = sizeof(segment_command_64) + sizeof(section_64); + UInt32 buildCmdSize = sizeof(build_version_command); + UInt32 mainCmdSize = sizeof(entry_point_command); + UInt32 uuidCmdSize = sizeof(uuid_command); + UInt32 symtabCmdSize = sizeof(symtab_command); + UInt32 dysymtabCmdSize = sizeof(dysymtab_command); + UInt32 linkeditCmdSize = sizeof(segment_command_64); // No sections + UInt32 dylinkerCmdSize = + (strlen(dylinker_command) + 13 + 1 + 7) & ~7; // "/usr/lib/dyld" + padding to 8-byte align + + sizeOfCmds = pageZeroSize + textSegCmdSize + dataSegCmdSize + buildCmdSize + uuidCmdSize + + symtabCmdSize + dysymtabCmdSize + linkeditCmdSize + dylinkerCmdSize; + + if (!kIsDylib) sizeOfCmds += mainCmdSize; + + UInt64 headerAndCmdsSize = headerSize + sizeOfCmds; + UInt64 textFileOffset = AlignToPage(headerAndCmdsSize); + UInt64 textVMAddr = kDefaultBaseAddress; + UInt64 textSegmentSize = AlignToPage(textSize > 0 ? textSize : kPageSize); + UInt64 textVMSize = textFileOffset + textSegmentSize; // __TEXT includes header + + UInt64 dataFileOffset = textFileOffset + textSegmentSize; + UInt64 dataVMAddr = textVMAddr + textVMSize; + UInt64 dataSegmentSize = dataSize > 0 ? AlignToPage(dataSize) : 0; // 0 if no data + + // __LINKEDIT segment comes after data segment (or __TEXT if no data) + UInt64 linkeditFileOffset = + dataSegmentSize > 0 ? dataFileOffset + dataSegmentSize : textFileOffset + textSegmentSize; + UInt64 linkeditVMAddr = + dataSegmentSize > 0 ? dataVMAddr + dataSegmentSize : textVMAddr + textVMSize; + UInt64 symtabFileOffset = linkeditFileOffset; + UInt64 strtabFileOffset = symtabFileOffset + (kSymbolTable.size() * sizeof(nlist_64)); + UInt64 linkeditFileSize = (kSymbolTable.size() * sizeof(nlist_64)) + kStringTable.size(); + UInt64 linkeditVMSize = AlignToPage(linkeditFileSize > 0 ? linkeditFileSize : 1); + + // Write Mach-O header + mach_header_64 header{}; + + header.magic = MH_MAGIC_64; + header.cputype = kCpuType; + header.cpusubtype = kCpuSubType; + header.filetype = kIsDylib ? MH_DYLIB : MH_EXECUTE; + header.ncmds = numCommands; + header.sizeofcmds = sizeOfCmds; + header.flags = MH_NOUNDEFS | MH_DYLDLINK | MH_TWOLEVEL | MH_PIE; + header.reserved = 0; + + output_fc.write(reinterpret_cast(&header), sizeof(header)); + + if (kVerbose) { + kConsoleOut << "Wrote Mach-O header, ncmds: " << numCommands << "\n"; + } + + segment_command_64 pageZeroSegment{}; + pageZeroSegment.cmd = LC_SEGMENT_64; + pageZeroSegment.cmdsize = sizeof(segment_command_64); + CopySegmentName(pageZeroSegment.segname, kSegmentPageZero); + pageZeroSegment.vmaddr = 0; + pageZeroSegment.vmsize = 0x100000000; + pageZeroSegment.fileoff = 0; + pageZeroSegment.filesize = 0; + pageZeroSegment.maxprot = 0; + pageZeroSegment.initprot = 0; + pageZeroSegment.nsects = 0; + pageZeroSegment.flags = 0; + + output_fc.write(reinterpret_cast(&pageZeroSegment), sizeof(pageZeroSegment)); + + build_version_command build = {.cmd = LC_BUILD_VERSION, + .cmdsize = sizeof(build_version_command), + .platform = PLATFORM_MACOS, + .minos = (kLatestOSX << 16), // macOS 11.0 + .sdk = (kLatestOSX << 16), // macOS 11.0 + .ntools = 0}; + + output_fc.write(reinterpret_cast(&build), sizeof(build)); + + if (kVerbose) { + kConsoleOut << "Wrote LC_BUILD_VERSION, platform: macOS, minos: 11.0, sdk: 11.0\n"; + } + + // Write __TEXT segment command + segment_command_64 textSegment{}; + textSegment.cmd = LC_SEGMENT_64; + textSegment.cmdsize = sizeof(segment_command_64) + sizeof(section_64); // 1 section + CopySegmentName(textSegment.segname, kSegmentText); + textSegment.vmaddr = textVMAddr; + textSegment.vmsize = textVMSize; // Header + code (page-aligned) + textSegment.fileoff = 0; // Must include Mach-O header + textSegment.filesize = + dataSegmentSize > 0 ? dataFileOffset : linkeditFileOffset; // Extend to next segment + textSegment.maxprot = VM_PROT_READ | VM_PROT_EXECUTE; + textSegment.initprot = VM_PROT_READ | VM_PROT_EXECUTE; + textSegment.nsects = 1; + textSegment.flags = 0; + + output_fc.write(reinterpret_cast(&textSegment), sizeof(textSegment)); + + // Write __text section header + section_64 textSection{}; + CopySegmentName(textSection.sectname, kSectionText); + CopySegmentName(textSection.segname, kSegmentText); + textSection.addr = textVMAddr + textFileOffset; // Section is at offset within segment + textSection.size = textSize; + textSection.offset = static_cast(textFileOffset); + textSection.align = kSectionAlign; + textSection.reloff = 0; + textSection.nreloc = 0; + textSection.flags = S_ATTR_PURE_INSTRUCTIONS | S_ATTR_SOME_INSTRUCTIONS; + textSection.reserved1 = 0; + textSection.reserved2 = 0; + textSection.reserved3 = 0; + + output_fc.write(reinterpret_cast(&textSection), sizeof(textSection)); + + if (kVerbose) { + kConsoleOut << "Wrote __TEXT segment, vmaddr: 0x" << std::hex << textVMAddr << std::dec << "\n"; + kConsoleOut << " __text section, size: " << textSize << " bytes\n"; + } + + // Write __DATA segment command + segment_command_64 dataSegment{}; + dataSegment.cmd = LC_SEGMENT_64; + dataSegment.cmdsize = sizeof(segment_command_64) + sizeof(section_64); // 1 section + CopySegmentName(dataSegment.segname, kSegmentData); + dataSegment.vmaddr = dataVMAddr; + dataSegment.vmsize = dataSegmentSize; + dataSegment.fileoff = dataFileOffset; + dataSegment.filesize = dataSize; + dataSegment.maxprot = VM_PROT_READ | VM_PROT_WRITE; + dataSegment.initprot = VM_PROT_READ | VM_PROT_WRITE; + dataSegment.nsects = 1; + dataSegment.flags = 0; + + if (dataSegCmdSize > 0) + output_fc.write(reinterpret_cast(&dataSegment), sizeof(dataSegment)); + + // Write __data section header + section_64 dataSection{}; + CopySegmentName(dataSection.sectname, kSectionData); + CopySegmentName(dataSection.segname, kSegmentData); + dataSection.addr = dataVMAddr; + dataSection.size = dataSize; + dataSection.offset = static_cast(dataFileOffset); + dataSection.align = kSectionAlign; + dataSection.reloff = 0; + dataSection.nreloc = 0; + dataSection.flags = 0; + dataSection.reserved1 = 0; + dataSection.reserved2 = 0; + dataSection.reserved3 = 0; + + if (dataSegCmdSize > 0) + output_fc.write(reinterpret_cast(&dataSection), sizeof(dataSection)); + + if (kVerbose) { + kConsoleOut << "Wrote __DATA segment, vmaddr: 0x" << std::hex << dataVMAddr << std::dec << "\n"; + kConsoleOut << " __data section, size: " << dataSize << " bytes\n"; + } + + // Write __LINKEDIT segment command (contains symbol/string tables) + segment_command_64 linkeditSegment{}; + linkeditSegment.cmd = LC_SEGMENT_64; + linkeditSegment.cmdsize = sizeof(segment_command_64); // No sections + CopySegmentName(linkeditSegment.segname, "__LINKEDIT"); + linkeditSegment.vmaddr = linkeditVMAddr; + linkeditSegment.vmsize = linkeditVMSize; + linkeditSegment.fileoff = linkeditFileOffset; + linkeditSegment.filesize = linkeditFileSize; + linkeditSegment.maxprot = VM_PROT_READ; + linkeditSegment.initprot = VM_PROT_READ; + linkeditSegment.nsects = 0; + linkeditSegment.flags = 0; + + output_fc.write(reinterpret_cast(&linkeditSegment), sizeof(linkeditSegment)); + + if (kVerbose) { + kConsoleOut << "Wrote __LINKEDIT segment, vmaddr: 0x" << std::hex << linkeditVMAddr << std::dec + << ", fileoff: " << linkeditFileOffset << ", filesize: " << linkeditFileSize + << "\n"; + } + + // Write LC_LOAD_DYLINKER command + constexpr Char* dyldPath = "/usr/lib/dyld"; + std::vector dylinkerCmd(dylinkerCmdSize, 0); + dylinker_command* dylinker = reinterpret_cast(dylinkerCmd.data()); + dylinker->cmd = LC_LOAD_DYLINKER; + dylinker->cmdsize = dylinkerCmdSize; + dylinker->name.offset = sizeof(dylinker_command); + std::memcpy(dylinkerCmd.data() + sizeof(dylinker_command), dyldPath, strlen(dyldPath) + 1); + + output_fc.write(dylinkerCmd.data(), dylinkerCmd.size()); + + if (kVerbose) { + kConsoleOut << "Wrote LC_LOAD_DYLINKER: " << dyldPath << "\n"; + } + + // Write LC_MAIN entry point command (executables only) + if (!kIsDylib) { + entryCommand.cmd = LC_MAIN; + entryCommand.cmdsize = sizeof(entry_point_command); + // entryoff is relative to __TEXT segment file offset + entryCommand.entryoff = textFileOffset + entryCommand.entryoff; + + output_fc.write(reinterpret_cast(&entryCommand), sizeof(entryCommand)); + + if (kVerbose) { + kConsoleOut << "Wrote LC_MAIN, entryoff: 0x" << std::hex << entryCommand.entryoff << std::dec + << ", stacksize: " << entryCommand.stacksize << "\n"; + } + } + + // Write LC_UUID command + uuid_command uuidCmd{}; + uuidCmd.cmd = LC_UUID; + uuidCmd.cmdsize = sizeof(uuid_command); + + // Generate a random UUID (version 4) + std::random_device rd; + std::mt19937 gen(rd()); + uuids::uuid_random_generator uuidGen(gen); + uuids::uuid generatedUuid = uuidGen(); + auto uuidBytes = generatedUuid.as_bytes(); + std::memcpy(uuidCmd.uuid, uuidBytes.data(), 16); + + output_fc.write(reinterpret_cast(&uuidCmd), sizeof(uuidCmd)); + + if (kVerbose) { + kConsoleOut << "Wrote LC_UUID\n"; + } + + // Write LC_SYMTAB command + symtab_command symtabCmd{}; + symtabCmd.cmd = LC_SYMTAB; + symtabCmd.cmdsize = sizeof(symtab_command); + symtabCmd.symoff = static_cast(symtabFileOffset); + symtabCmd.nsyms = static_cast(kSymbolTable.size()); + symtabCmd.stroff = static_cast(strtabFileOffset); + symtabCmd.strsize = static_cast(kStringTable.size()); + + output_fc.write(reinterpret_cast(&symtabCmd), sizeof(symtabCmd)); + + if (kVerbose) { + kConsoleOut << "Wrote LC_SYMTAB, nsyms: " << symtabCmd.nsyms + << ", strsize: " << symtabCmd.strsize << "\n"; + } + + // Write LC_DYSYMTAB command + dysymtab_command dysymtabCmd{}; + std::memset(&dysymtabCmd, 0, sizeof(dysymtabCmd)); + dysymtabCmd.cmd = LC_DYSYMTAB; + dysymtabCmd.cmdsize = sizeof(dysymtab_command); + + // All symbols are local for now + dysymtabCmd.ilocalsym = 0; + dysymtabCmd.nlocalsym = static_cast(kSymbolTable.size()); + + // External symbols start after locals + dysymtabCmd.iextdefsym = static_cast(kSymbolTable.size()); + dysymtabCmd.nextdefsym = 0; + + // Undefined symbols + dysymtabCmd.iundefsym = static_cast(kSymbolTable.size()); + dysymtabCmd.nundefsym = 0; + + output_fc.write(reinterpret_cast(&dysymtabCmd), sizeof(dysymtabCmd)); + + if (kVerbose) { + kConsoleOut << "Wrote LC_DYSYMTAB\n"; + } + + // Pad to text section offset + UInt64 currentPos = output_fc.tellp(); + UInt64 padding = textFileOffset - currentPos; + + if (padding > 0) { + std::vector zeros(padding, 0); + output_fc.write(zeros.data(), zeros.size()); + } + + // Write __text content + for (auto& blob : kTextBytes) { + output_fc.write(blob.mBlob.data(), blob.mBlob.size()); + } + + // Pad to data section offset + currentPos = output_fc.tellp(); + padding = dataFileOffset - currentPos; + + if (padding > 0) { + std::vector zeros(padding, 0); + output_fc.write(zeros.data(), zeros.size()); + } + + // Write __data content + for (auto& blob : kDataBytes) { + output_fc.write(blob.mBlob.data(), blob.mBlob.size()); + } + + // Pad to symbol table offset + currentPos = output_fc.tellp(); + padding = symtabFileOffset - currentPos; + + if (padding > 0) { + std::vector zeros(padding, 0); + output_fc.write(zeros.data(), zeros.size()); + } + + // Write symbol table (nlist_64 entries) + for (auto& sym : kSymbolTable) { + output_fc.write(reinterpret_cast(&sym), sizeof(nlist_64)); + } + + if (kVerbose) { + kConsoleOut << "Wrote symbol table, " << kSymbolTable.size() << " entries\n"; + } + + // Write string table + output_fc.write(kStringTable.data(), kStringTable.size()); + + if (kVerbose) { + kConsoleOut << "Wrote string table, " << kStringTable.size() << " bytes\n"; + } + + output_fc.flush(); + + if (kVerbose) { + kConsoleOut << "Wrote Mach-O binary: " << kOutput << "\n"; + } + + return NECTAR_SUCCESS; +} + +// Last rev - 2026 + +#endif // ifdef CK_USE_MACHO_LINKER diff --git a/src/CompilerKit/src/Linkers/DynamicLinker64+PEF.cc b/src/CompilerKit/src/Linkers/DynamicLinker64+PEF.cc deleted file mode 100644 index 6be6c58..0000000 --- a/src/CompilerKit/src/Linkers/DynamicLinker64+PEF.cc +++ /dev/null @@ -1,677 +0,0 @@ -// Copyright 2024-2026, Amlal El Mahrouss (amlal@nekernel.org) -// Licensed under the Apache License, Version 2.0 (See accompanying -// file LICENSE or copy at http://www.apache.org/licenses/LICENSE-2.0) -// Official repository: https://github.com/nekernel-org/nectar - -/// @author Amlal El Mahrouss (amlal@nekernel.org) -/// @brief NeKernel.org 64-bit PEF Linker. -/// Last Rev: Sun Feb 8 CET 2026 -/// @note Do not look up for anything with .code64/.data64/.zero64! -/// It will be loaded when the program loader will start the image. - -#include -#include -#include -#include -#include -#include -#include - -#define kLinkerPefNoCpu (0U) -#define kLinkerPefNoSubCpu (0U) -#define kLinkerPefDefaultOutput {"a" kPefExt} - -#define kLinkerVersionStr "Nectar 64-Bit Linker (NeKernel PEF)" - -#define kLinkerDefaultOrigin kPefBaseOrigin - -#define kLinkerId (0x5046FF) - -#define kLinkerAbiContainer "__PEFContainer:ABI:" -#define kLinkerGuidContainer "__PEFContainer:GUID:" -#define kEndContainer "__PEFContainer:END:" - -#define kLinkerSplash() kStdOut << kLinkerVersionStr << kStdEndl - -/// @brief PEF stack size symbol. -#define kLinkerStackSizeSymbol "__PEFSizeOfReserveStack" - -#define kConsoleOut \ - (std::cout << "\e[0;31m" \ - << "ld64: " \ - << "\e[0;97m") - -enum struct ABIType : Int32 { - kABITypeNull = 0, - kABITypeStart = 0x1010, /* The start of ABI list. */ - kABITypeNE = 0x5046, /* PF (NeKernel.org's PEF ABI) */ - kABITypeEnd = kABITypeNull, - kABITypeInvalid = 0xFFFF, -}; - -static CompilerKit::STLString kOutput = kLinkerPefDefaultOutput; -static ABIType kAbi = ABIType::kABITypeNE; -static Int32 kSubArch = kLinkerPefNoSubCpu; -static Int32 kArch = CompilerKit::kPefArchInvalid; -static bool kFatBinaryEnable = false; -static bool kStartFound = false; -static bool kDuplicateSymbols = false; - -/* ld64 is to be found, mld is to be found at runtime. */ -static const Char* kLinkerDefineSymbol = ":UndefinedSymbol:"; -static const Char* kLinkerDynamicSym = ":RuntimeSymbol:"; - -static CompilerKit::STLString kLinkerStart = kPefStart; - -/* object code and list. */ -static std::vector kObjectList; -static std::vector kObjectBytes; - -/// @brief Nectar 64-bit Linker. -/// @note This linker is made for PEF executable, thus Nectar based OSes. -NECTAR_MODULE(DynamicLinker64PEF) { - bool is_executable = true; - - CompilerKit::install_signal(SIGSEGV, CompilerKit::Detail::drvi_crash_handler); - - /** - * @brief parse flags and trigger options. - */ - for (size_t linker_arg = 1; linker_arg < argc; ++linker_arg) { - if (std::strcmp(argv[linker_arg], "-help") == 0) { - kLinkerSplash(); - - kConsoleOut << "-version: Show linker version.\n"; - kConsoleOut << "-help: Show linker help.\n"; - kConsoleOut << "-verbose: Enable linker trace.\n"; - kConsoleOut << "-fdylib: Output as a Dynamic PEF.\n"; - kConsoleOut << "-ffat: Output as a FAT PEF.\n"; - kConsoleOut << "-f32k: Output as a 32x0 PEF.\n"; - kConsoleOut << "-f64k: Output as a 64x0 PEF.\n"; - kConsoleOut << "-famd64: Output as a AMD64 PEF.\n"; - kConsoleOut << "-frv64: Output as a RISC-V PEF.\n"; - kConsoleOut << "-fpower64: Output as a POWER PEF.\n"; - kConsoleOut << "-farm64: Output as a ARM64 PEF.\n"; - kConsoleOut << "-output: Select the output file name.\n"; - - return NECTAR_SUCCESS; - } else if (std::strcmp(argv[linker_arg], "-version") == 0) { - kLinkerSplash(); - - return NECTAR_SUCCESS; - } else if (std::strcmp(argv[linker_arg], "-ffat") == 0) { - kFatBinaryEnable = true; - - continue; - } else if (std::strcmp(argv[linker_arg], "-f64k") == 0) { - kArch = CompilerKit::kPefArch64000; - - continue; - } else if (std::strcmp(argv[linker_arg], "-famd64") == 0) { - kArch = CompilerKit::kPefArchAMD64; - - continue; - } else if (std::strcmp(argv[linker_arg], "-fstart") == 0) { - if (argv[linker_arg + 1] == nullptr || argv[linker_arg + 1][0] == '-') continue; - - kLinkerStart = argv[linker_arg + 1]; - linker_arg += 1; - - continue; - } else if (std::strcmp(argv[linker_arg], "-f32k") == 0) { - kArch = CompilerKit::kPefArch32000; - - continue; - } else if (std::strcmp(argv[linker_arg], "-fpower64") == 0) { - kArch = CompilerKit::kPefArchPowerPC; - - continue; - } else if (std::strcmp(argv[linker_arg], "-friscv64") == 0) { - kArch = CompilerKit::kPefArchRISCV; - - continue; - } else if (std::strcmp(argv[linker_arg], "-farm64") == 0) { - kArch = CompilerKit::kPefArchARM64; - - continue; - } else if (std::strcmp(argv[linker_arg], "-verbose") == 0) { - kVerbose = true; - - continue; - } else if (std::strcmp(argv[linker_arg], "-fdylib") == 0) { - if (kOutput.empty()) { - continue; - } - - if (kOutput.find(kPefExt) != CompilerKit::STLString::npos) - kOutput.erase(kOutput.find(kPefExt), strlen(kPefExt)); - - kOutput += kPefDylibExt; - - is_executable = false; - - continue; - } else if (std::strcmp(argv[linker_arg], "-output") == 0) { - if ((linker_arg + 1) > argc) continue; - - kOutput = argv[linker_arg + 1]; - ++linker_arg; - - continue; - } else { - if (argv[linker_arg][0] == '-') { - kConsoleOut << "unknown option: " << argv[linker_arg] << "\n"; - return EXIT_FAILURE; - } - - kObjectList.emplace_back(argv[linker_arg]); - - continue; - } - } - - if (kOutput.empty()) { - kConsoleOut << "no output filename set." << std::endl; - return NECTAR_EXEC_ERROR; - } else if (kObjectList.empty()) { - kConsoleOut << "no input files." << std::endl; - return NECTAR_EXEC_ERROR; - } else { - namespace FS = std::filesystem; - - // check for existing files, if they don't throw an error. - for (auto& obj : kObjectList) { - if (!FS::exists(obj)) { - // if filesystem doesn't find file - // -> throw error. - kConsoleOut << "no such file: " << obj << std::endl; - return NECTAR_EXEC_ERROR; - } - } - } - - // PEF expects a valid target architecture when outputing a binary. - if (kArch == CompilerKit::kPefArchInvalid) { - kConsoleOut << "no target architecture set, can't continue." << std::endl; - return NECTAR_EXEC_ERROR; - } - - CompilerKit::PEFContainer pef_container{}; - - int32_t archs = kArch; - - pef_container.Count = 0UL; - pef_container.Kind = is_executable ? CompilerKit::kPefKindExec : CompilerKit::kPefKindDylib; - pef_container.SubCpu = kSubArch; - pef_container.Linker = kLinkerId; // Nectar Linker - pef_container.Abi = static_cast(kAbi); // Multi-Processor UX ABI - pef_container.Magic[0] = kPefMagic[kFatBinaryEnable ? 2 : 0]; - pef_container.Magic[1] = kPefMagic[1]; - pef_container.Magic[2] = kPefMagic[kFatBinaryEnable ? 0 : 2]; - pef_container.Magic[3] = kPefMagic[3]; - pef_container.Version = kPefVersion; - - // specify the start address, can be 0x10000 - pef_container.Start = kLinkerDefaultOrigin; - pef_container.HdrSz = sizeof(CompilerKit::PEFContainer); - pef_container.Checksum = 0UL; - - std::ofstream output_fc(kOutput, std::ofstream::binary); - - if (output_fc.bad()) { - if (kVerbose) { - kConsoleOut << "error: " << strerror(errno) << "\n"; - } - - return NECTAR_FILE_NOT_FOUND; - } - - //! Read AE to convert as PEF. - - std::vector command_headers; - CompilerKit::Utils::AEReadableProtocol reader_protocol{}; - - for (const auto& objectFile : kObjectList) { - if (!std::filesystem::exists(objectFile)) continue; - - CompilerKit::AEHeader hdr{}; - - reader_protocol.fFilePtr = std::ifstream(objectFile, std::ifstream::binary); - reader_protocol.fFilePtr >> hdr; - - if (hdr.fMagic[0] == kAEMag0 && hdr.fMagic[1] == kAEMag1 && - hdr.fSize == sizeof(CompilerKit::AEHeader) && hdr.fMagic[2] == kAEMag2) { - if (hdr.fArch != kArch && hdr.fVersion == kAEIdentVersion) { - if (kVerbose) kConsoleOut << "is this a FAT binary? : "; - - if (!kFatBinaryEnable) { - if (kVerbose) kConsoleOut << "not a FAT binary.\n"; - - kConsoleOut << "object " << objectFile - << " is a different kind of architecture and output isn't " - "treated as a FAT binary." - << std::endl; - - return NECTAR_FAT_ERROR; - } else { - if (kVerbose) { - kConsoleOut << "Architecture matches what we expect.\n"; - } - } - } - - // append arch type to archs varaible. - archs |= hdr.fArch; - std::size_t cnt = hdr.fCount; - - if (kVerbose) kConsoleOut << "header found, record count: " << cnt << "\n"; - - pef_container.Count = cnt; - - Char* raw_ae_records = new Char[cnt * sizeof(CompilerKit::AERecordHeader)]; - - if (!raw_ae_records) { - if (kVerbose) kConsoleOut << "allocation failed for records of count: " << cnt << "\n"; - } - - std::memset(raw_ae_records, 0, cnt * sizeof(CompilerKit::AERecordHeader)); - - auto* ae_records = reader_protocol.Read(raw_ae_records, cnt); - - size_t org = kLinkerDefaultOrigin; - - for (size_t ae_record_index = 0; ae_record_index < cnt; ++ae_record_index) { - CompilerKit::PEFCommandHeader command_header{{}}; - std::size_t offset_of_obj = ae_records[ae_record_index].fOffset; - - std::memcpy(command_header.Name, ae_records[ae_record_index].fName, kPefNameLen); - - CompilerKit::STLString cmd_hdr_name(command_header.Name); - - // check this header if it's any valid. - if (cmd_hdr_name.find(kPefCode64) == CompilerKit::STLString::npos && - cmd_hdr_name.find(kPefData64) == CompilerKit::STLString::npos && - cmd_hdr_name.find(kPefZero64) == CompilerKit::STLString::npos) { - if (cmd_hdr_name.find(kLinkerStart) == CompilerKit::STLString::npos && - *command_header.Name == 0) { - if (cmd_hdr_name.find(kLinkerDefineSymbol) != CompilerKit::STLString::npos) { - goto ld_mark_header; - } else { - continue; - } - } - } - - if (cmd_hdr_name.find(kLinkerStart) != CompilerKit::STLString::npos && - cmd_hdr_name.find(kPefCode64) != CompilerKit::STLString::npos) { - kStartFound = true; - } - - ld_mark_header: - command_header.Offset = offset_of_obj; - command_header.Kind = ae_records[ae_record_index].fKind; - command_header.VirtualSize = ae_records[ae_record_index].fSize; - command_header.Cpu = hdr.fArch; - command_header.VirtualAddress = org; - command_header.SubCpu = hdr.fSubArch; - command_header.OffsetSize = ae_records[ae_record_index].fSize; - - org += command_header.VirtualSize; - - if (kVerbose) { - kConsoleOut << "Record: " << ae_records[ae_record_index].fName << " is marked.\n"; - kConsoleOut << "Offset: " << command_header.Offset << "\n"; - } - - command_headers.emplace_back(command_header); - } - - delete[] raw_ae_records; - raw_ae_records = nullptr; - - std::vector bytes; - bytes.resize(hdr.fCodeSize); - - reader_protocol.fFilePtr.seekg(std::streamsize(hdr.fStartCode)); - reader_protocol.fFilePtr.read(bytes.data(), std::streamsize(hdr.fCodeSize)); - - kObjectBytes.push_back({.mBlob = bytes, .mOffset = hdr.fStartCode}); - - // Blob was written, close fp. - - reader_protocol.fFilePtr.close(); - - continue; - } - - kConsoleOut << "not an object container: " << objectFile << std::endl; - - // don't continue, it is a fatal error. - return NECTAR_EXEC_ERROR; - } - - pef_container.Cpu = archs; - - output_fc << pef_container; - - if (kVerbose) { - kConsoleOut << "wrote container to: " << output_fc.tellp() << ".\n"; - } - - output_fc.seekp(std::streamsize(pef_container.HdrSz)); - - std::vector not_found; - std::vector symbols; - - // step 2: check for errors (multiple symbols, undefined ones) - - CompilerKit::STLString cmd_hdr_name; - - for (auto& command_hdr : command_headers) { - // check if this symbol needs to be resolved. - if (CompilerKit::STLString(command_hdr.Name).find(kLinkerDefineSymbol) != - CompilerKit::STLString::npos && - CompilerKit::STLString(command_hdr.Name).find(kLinkerDynamicSym) == - CompilerKit::STLString::npos) { - if (kVerbose) kConsoleOut << "Found undefined symbol: " << command_hdr.Name << "\n"; - - cmd_hdr_name = command_hdr.Name; - - if (auto it = std::find(not_found.begin(), not_found.end(), cmd_hdr_name); - it == not_found.end()) { - not_found.emplace_back(cmd_hdr_name); - } - } - - symbols.emplace_back(command_hdr.Name); - } - - // Now try to solve these symbols. - - for (size_t not_found_idx = 0; not_found_idx < command_headers.size(); ++not_found_idx) { - if (const auto it = std::find(not_found.begin(), not_found.end(), - CompilerKit::STLString(command_headers[not_found_idx].Name)); - it != not_found.end()) { - CompilerKit::STLString symbol_imp = *it; - - if (symbol_imp.find(kLinkerDefineSymbol) == CompilerKit::STLString::npos) continue; - - // erase the lookup prefix. - symbol_imp.erase(0, symbol_imp.find(kLinkerDefineSymbol) + strlen(kLinkerDefineSymbol)); - - // demangle everything. - while (symbol_imp.find('$') != CompilerKit::STLString::npos) - symbol_imp.erase(symbol_imp.find('$'), 1); - - // the reason we do is because, this may not match the symbol, and we need - // to look for other matching symbols. - for (auto& command_hdr : command_headers) { - if (CompilerKit::STLString(command_hdr.Name).find(symbol_imp) != - CompilerKit::STLString::npos && - CompilerKit::STLString(command_hdr.Name).find(kLinkerDefineSymbol) == - CompilerKit::STLString::npos) { - CompilerKit::STLString undefined_symbol = command_hdr.Name; - auto result_of_sym = undefined_symbol.substr(undefined_symbol.find(symbol_imp)); - - for (int i = 0; result_of_sym[i] != 0; ++i) { - if (result_of_sym[i] != symbol_imp[i]) goto ld_continue_search; - } - - not_found.erase(it); - - if (kVerbose) kConsoleOut << "Found symbol: " << command_hdr.Name << "\n"; - - break; - } - } - - ld_continue_search: - continue; - } - } - - // step 3: check for errors (recheck if we have those symbols.) - - if (!kStartFound && is_executable) { - if (kVerbose) - kConsoleOut << "Undefined entrypoint: " << kLinkerStart - << ", you may have forget to link " - "against your runtime library.\n"; - - kConsoleOut << "Undefined entrypoint " << kLinkerStart << " for executable: " << kOutput - << "\n"; - } - - // step 4: write all PEF commands. - - CompilerKit::PEFCommandHeader date_cmd_hdr{}; - - time_t timestamp = time(nullptr); - - CompilerKit::STLString timeStampStr = "__PEFContainer:BuildEpoch:"; - timeStampStr += std::to_string(timestamp); - - strncpy(date_cmd_hdr.Name, timeStampStr.c_str(), timeStampStr.size()); - - date_cmd_hdr.Flags = 0; - date_cmd_hdr.Kind = CompilerKit::kPefZero; - date_cmd_hdr.Offset = output_fc.tellp(); - date_cmd_hdr.VirtualSize = timeStampStr.size(); - - command_headers.push_back(std::move(date_cmd_hdr)); - - CompilerKit::PEFCommandHeader abi_cmd_hdr{}; - - CompilerKit::STLString abi = kLinkerAbiContainer; - - switch (kArch) { - case CompilerKit::kPefArchAMD64: { - abi += "MSFT"; - break; - } - case CompilerKit::kPefArchPowerPC: - case CompilerKit::kPefArchARM64: - case CompilerKit::kPefArchRISCV: - case CompilerKit::kPefArch32000: - case CompilerKit::kPefArch64000: { - abi += "KRNL"; - break; - } - default: { - abi += "NOOP"; - break; - } - } - - std::memcpy(abi_cmd_hdr.Name, abi.c_str(), abi.size()); - - abi_cmd_hdr.VirtualSize = abi.size(); - abi_cmd_hdr.Offset = output_fc.tellp(); - abi_cmd_hdr.Flags = 0; - abi_cmd_hdr.Kind = CompilerKit::kPefLinkerID; - - command_headers.push_back(std::move(abi_cmd_hdr)); - - CompilerKit::PEFCommandHeader stack_cmd_hdr{{}}; - - stack_cmd_hdr.Cpu = kArch; - stack_cmd_hdr.Flags = 0; - stack_cmd_hdr.VirtualSize = sizeof(uintptr_t); - stack_cmd_hdr.Offset = 0; - - std::memcpy(stack_cmd_hdr.Name, kLinkerStackSizeSymbol, strlen(kLinkerStackSizeSymbol)); - - command_headers.push_back(std::move(stack_cmd_hdr)); - - CompilerKit::PEFCommandHeader uuid_cmd_hdr{}; - - std::random_device rd; - - auto seedData = std::array{}; - std::generate(std::begin(seedData), std::end(seedData), std::ref(rd)); - std::seed_seq seq(std::begin(seedData), std::end(seedData)); - std::mt19937 generator(seq); - - auto gen = uuids::uuid_random_generator{generator}; - uuids::uuid id = gen(); - auto uuidStr = uuids::to_string(id); - - std::memcpy(uuid_cmd_hdr.Name, kLinkerGuidContainer, strlen(kLinkerGuidContainer)); - std::memcpy(uuid_cmd_hdr.Name + strlen(kLinkerGuidContainer), uuidStr.c_str(), uuidStr.size()); - - uuid_cmd_hdr.VirtualSize = strlen(uuid_cmd_hdr.Name); - uuid_cmd_hdr.Offset = output_fc.tellp(); - uuid_cmd_hdr.Flags = CompilerKit::kPefLinkerID; - uuid_cmd_hdr.Kind = CompilerKit::kPefZero; - - command_headers.push_back(std::move(uuid_cmd_hdr)); - - // prepare a symbol vector. - std::vector undef_symbols; - std::vector dupl_symbols; - std::vector resolve_symbols; - - constexpr Int32 kPaddingOffset = 16; - - size_t previous_offset = - (command_headers.size() * sizeof(CompilerKit::PEFCommandHeader)) + kPaddingOffset; - - CompilerKit::PEFCommandHeader end_exec_hdr; - - end_exec_hdr.Offset = output_fc.tellp(); - end_exec_hdr.Flags = CompilerKit::kPefLinkerID; - end_exec_hdr.Kind = CompilerKit::kPefZero; - - std::memcpy(end_exec_hdr.Name, kEndContainer, strlen(kEndContainer)); - - end_exec_hdr.VirtualSize = strlen(end_exec_hdr.Name); - - command_headers.push_back(std::move(end_exec_hdr)); - - // Finally write down the command headers. - // And check for any duplications - for (size_t commandHeaderIndex = 0UL; commandHeaderIndex < command_headers.size(); - ++commandHeaderIndex) { - if (CompilerKit::STLString(command_headers[commandHeaderIndex].Name) - .find(kLinkerDefineSymbol) != CompilerKit::STLString::npos && - CompilerKit::STLString(command_headers[commandHeaderIndex].Name).find(kLinkerDynamicSym) == - CompilerKit::STLString::npos) { - // ignore :UndefinedSymbol: headers, they do not contain code. - continue; - } - - CompilerKit::STLString symbol_name = command_headers[commandHeaderIndex].Name; - - if (!symbol_name.empty()) { - undef_symbols.emplace_back(symbol_name); - } - - command_headers[commandHeaderIndex].Offset += previous_offset; - previous_offset += command_headers[commandHeaderIndex].VirtualSize; - - CompilerKit::STLString name = command_headers[commandHeaderIndex].Name; - - /// so this is valid when we get to the entrypoint. - /// it is always a code64 container. And should equal to kLinkerStart as well. - /// this chunk of code updates the pef_container.Start with the updated offset. - if (name.find(kLinkerStart) != CompilerKit::STLString::npos && - name.find(kPefCode64) != CompilerKit::STLString::npos) { - pef_container.Start = command_headers[commandHeaderIndex].Offset; - auto tellCurPos = output_fc.tellp(); - - output_fc.seekp(0); - output_fc << pef_container; - - output_fc.seekp(tellCurPos); - } - - if (kVerbose) { - kConsoleOut << "Command name: " << name << "\n"; - kConsoleOut << "Virtual address of command content: " - << command_headers[commandHeaderIndex].Offset << "\n"; - } - - output_fc << command_headers[commandHeaderIndex]; - - for (size_t sub_command_header_index = 0UL; sub_command_header_index < command_headers.size(); - ++sub_command_header_index) { - if (sub_command_header_index == commandHeaderIndex) continue; - - if (CompilerKit::STLString(command_headers[sub_command_header_index].Name) - .find(kLinkerDefineSymbol) != CompilerKit::STLString::npos && - CompilerKit::STLString(command_headers[sub_command_header_index].Name) - .find(kLinkerDynamicSym) == CompilerKit::STLString::npos) { - if (kVerbose) { - kConsoleOut << "Ignoring :UndefinedSymbol: headers...\n"; - } - - // Let's ignore :UndefinedSymbol: headers, they do not contain code. - continue; - } - - auto& command_hdr = command_headers[sub_command_header_index]; - - if (command_hdr.Name == CompilerKit::STLString(command_headers[commandHeaderIndex].Name)) { - if (std::find(dupl_symbols.cbegin(), dupl_symbols.cend(), command_hdr.Name) == - dupl_symbols.cend()) { - dupl_symbols.emplace_back(command_hdr.Name); - } - - if (kVerbose) kConsoleOut << "Found duplicate symbols of: " << command_hdr.Name << "\n"; - - kDuplicateSymbols = true; - } - } - } - - if (!dupl_symbols.empty()) { - for (auto& symbol : dupl_symbols) { - kConsoleOut << "Multiple symbols of: " << symbol << " detected, cannot continue.\n"; - } - - return NECTAR_EXEC_ERROR; - } - - // step 2.5: write program bytes. - - for (auto& struct_of_blob : kObjectBytes) { - output_fc.write(struct_of_blob.mBlob.data(), struct_of_blob.mBlob.size()); - } - - if (kVerbose) { - kConsoleOut << "Wrote contents of: " << kOutput << "\n"; - } - - // step 3: check if we have those symbols - - std::vector unreferenced_symbols; - - for (auto& command_hdr : command_headers) { - if (auto it = - std::find(not_found.begin(), not_found.end(), CompilerKit::STLString(command_hdr.Name)); - it != not_found.end()) { - unreferenced_symbols.emplace_back(command_hdr.Name); - } - } - - if (!unreferenced_symbols.empty()) { - for (auto& unreferenced_symbol : unreferenced_symbols) { - kConsoleOut << "Undefined symbol " << unreferenced_symbol << "\n"; - } - - return NECTAR_EXEC_ERROR; - } - - if ((!kStartFound || kDuplicateSymbols) && - (std::filesystem::exists(kOutput) || !unreferenced_symbols.empty())) { - if (kVerbose) { - kConsoleOut << "File: " << kOutput << " is corrupt now...\n"; - } - - return NECTAR_EXEC_ERROR; - } - - return NECTAR_SUCCESS; -} - -// Last rev 2026 diff --git a/src/CompilerKit/src/Linkers/DynamicLinker64+PEF.cpp b/src/CompilerKit/src/Linkers/DynamicLinker64+PEF.cpp new file mode 100644 index 0000000..6be6c58 --- /dev/null +++ b/src/CompilerKit/src/Linkers/DynamicLinker64+PEF.cpp @@ -0,0 +1,677 @@ +// Copyright 2024-2026, Amlal El Mahrouss (amlal@nekernel.org) +// Licensed under the Apache License, Version 2.0 (See accompanying +// file LICENSE or copy at http://www.apache.org/licenses/LICENSE-2.0) +// Official repository: https://github.com/nekernel-org/nectar + +/// @author Amlal El Mahrouss (amlal@nekernel.org) +/// @brief NeKernel.org 64-bit PEF Linker. +/// Last Rev: Sun Feb 8 CET 2026 +/// @note Do not look up for anything with .code64/.data64/.zero64! +/// It will be loaded when the program loader will start the image. + +#include +#include +#include +#include +#include +#include +#include + +#define kLinkerPefNoCpu (0U) +#define kLinkerPefNoSubCpu (0U) +#define kLinkerPefDefaultOutput {"a" kPefExt} + +#define kLinkerVersionStr "Nectar 64-Bit Linker (NeKernel PEF)" + +#define kLinkerDefaultOrigin kPefBaseOrigin + +#define kLinkerId (0x5046FF) + +#define kLinkerAbiContainer "__PEFContainer:ABI:" +#define kLinkerGuidContainer "__PEFContainer:GUID:" +#define kEndContainer "__PEFContainer:END:" + +#define kLinkerSplash() kStdOut << kLinkerVersionStr << kStdEndl + +/// @brief PEF stack size symbol. +#define kLinkerStackSizeSymbol "__PEFSizeOfReserveStack" + +#define kConsoleOut \ + (std::cout << "\e[0;31m" \ + << "ld64: " \ + << "\e[0;97m") + +enum struct ABIType : Int32 { + kABITypeNull = 0, + kABITypeStart = 0x1010, /* The start of ABI list. */ + kABITypeNE = 0x5046, /* PF (NeKernel.org's PEF ABI) */ + kABITypeEnd = kABITypeNull, + kABITypeInvalid = 0xFFFF, +}; + +static CompilerKit::STLString kOutput = kLinkerPefDefaultOutput; +static ABIType kAbi = ABIType::kABITypeNE; +static Int32 kSubArch = kLinkerPefNoSubCpu; +static Int32 kArch = CompilerKit::kPefArchInvalid; +static bool kFatBinaryEnable = false; +static bool kStartFound = false; +static bool kDuplicateSymbols = false; + +/* ld64 is to be found, mld is to be found at runtime. */ +static const Char* kLinkerDefineSymbol = ":UndefinedSymbol:"; +static const Char* kLinkerDynamicSym = ":RuntimeSymbol:"; + +static CompilerKit::STLString kLinkerStart = kPefStart; + +/* object code and list. */ +static std::vector kObjectList; +static std::vector kObjectBytes; + +/// @brief Nectar 64-bit Linker. +/// @note This linker is made for PEF executable, thus Nectar based OSes. +NECTAR_MODULE(DynamicLinker64PEF) { + bool is_executable = true; + + CompilerKit::install_signal(SIGSEGV, CompilerKit::Detail::drvi_crash_handler); + + /** + * @brief parse flags and trigger options. + */ + for (size_t linker_arg = 1; linker_arg < argc; ++linker_arg) { + if (std::strcmp(argv[linker_arg], "-help") == 0) { + kLinkerSplash(); + + kConsoleOut << "-version: Show linker version.\n"; + kConsoleOut << "-help: Show linker help.\n"; + kConsoleOut << "-verbose: Enable linker trace.\n"; + kConsoleOut << "-fdylib: Output as a Dynamic PEF.\n"; + kConsoleOut << "-ffat: Output as a FAT PEF.\n"; + kConsoleOut << "-f32k: Output as a 32x0 PEF.\n"; + kConsoleOut << "-f64k: Output as a 64x0 PEF.\n"; + kConsoleOut << "-famd64: Output as a AMD64 PEF.\n"; + kConsoleOut << "-frv64: Output as a RISC-V PEF.\n"; + kConsoleOut << "-fpower64: Output as a POWER PEF.\n"; + kConsoleOut << "-farm64: Output as a ARM64 PEF.\n"; + kConsoleOut << "-output: Select the output file name.\n"; + + return NECTAR_SUCCESS; + } else if (std::strcmp(argv[linker_arg], "-version") == 0) { + kLinkerSplash(); + + return NECTAR_SUCCESS; + } else if (std::strcmp(argv[linker_arg], "-ffat") == 0) { + kFatBinaryEnable = true; + + continue; + } else if (std::strcmp(argv[linker_arg], "-f64k") == 0) { + kArch = CompilerKit::kPefArch64000; + + continue; + } else if (std::strcmp(argv[linker_arg], "-famd64") == 0) { + kArch = CompilerKit::kPefArchAMD64; + + continue; + } else if (std::strcmp(argv[linker_arg], "-fstart") == 0) { + if (argv[linker_arg + 1] == nullptr || argv[linker_arg + 1][0] == '-') continue; + + kLinkerStart = argv[linker_arg + 1]; + linker_arg += 1; + + continue; + } else if (std::strcmp(argv[linker_arg], "-f32k") == 0) { + kArch = CompilerKit::kPefArch32000; + + continue; + } else if (std::strcmp(argv[linker_arg], "-fpower64") == 0) { + kArch = CompilerKit::kPefArchPowerPC; + + continue; + } else if (std::strcmp(argv[linker_arg], "-friscv64") == 0) { + kArch = CompilerKit::kPefArchRISCV; + + continue; + } else if (std::strcmp(argv[linker_arg], "-farm64") == 0) { + kArch = CompilerKit::kPefArchARM64; + + continue; + } else if (std::strcmp(argv[linker_arg], "-verbose") == 0) { + kVerbose = true; + + continue; + } else if (std::strcmp(argv[linker_arg], "-fdylib") == 0) { + if (kOutput.empty()) { + continue; + } + + if (kOutput.find(kPefExt) != CompilerKit::STLString::npos) + kOutput.erase(kOutput.find(kPefExt), strlen(kPefExt)); + + kOutput += kPefDylibExt; + + is_executable = false; + + continue; + } else if (std::strcmp(argv[linker_arg], "-output") == 0) { + if ((linker_arg + 1) > argc) continue; + + kOutput = argv[linker_arg + 1]; + ++linker_arg; + + continue; + } else { + if (argv[linker_arg][0] == '-') { + kConsoleOut << "unknown option: " << argv[linker_arg] << "\n"; + return EXIT_FAILURE; + } + + kObjectList.emplace_back(argv[linker_arg]); + + continue; + } + } + + if (kOutput.empty()) { + kConsoleOut << "no output filename set." << std::endl; + return NECTAR_EXEC_ERROR; + } else if (kObjectList.empty()) { + kConsoleOut << "no input files." << std::endl; + return NECTAR_EXEC_ERROR; + } else { + namespace FS = std::filesystem; + + // check for existing files, if they don't throw an error. + for (auto& obj : kObjectList) { + if (!FS::exists(obj)) { + // if filesystem doesn't find file + // -> throw error. + kConsoleOut << "no such file: " << obj << std::endl; + return NECTAR_EXEC_ERROR; + } + } + } + + // PEF expects a valid target architecture when outputing a binary. + if (kArch == CompilerKit::kPefArchInvalid) { + kConsoleOut << "no target architecture set, can't continue." << std::endl; + return NECTAR_EXEC_ERROR; + } + + CompilerKit::PEFContainer pef_container{}; + + int32_t archs = kArch; + + pef_container.Count = 0UL; + pef_container.Kind = is_executable ? CompilerKit::kPefKindExec : CompilerKit::kPefKindDylib; + pef_container.SubCpu = kSubArch; + pef_container.Linker = kLinkerId; // Nectar Linker + pef_container.Abi = static_cast(kAbi); // Multi-Processor UX ABI + pef_container.Magic[0] = kPefMagic[kFatBinaryEnable ? 2 : 0]; + pef_container.Magic[1] = kPefMagic[1]; + pef_container.Magic[2] = kPefMagic[kFatBinaryEnable ? 0 : 2]; + pef_container.Magic[3] = kPefMagic[3]; + pef_container.Version = kPefVersion; + + // specify the start address, can be 0x10000 + pef_container.Start = kLinkerDefaultOrigin; + pef_container.HdrSz = sizeof(CompilerKit::PEFContainer); + pef_container.Checksum = 0UL; + + std::ofstream output_fc(kOutput, std::ofstream::binary); + + if (output_fc.bad()) { + if (kVerbose) { + kConsoleOut << "error: " << strerror(errno) << "\n"; + } + + return NECTAR_FILE_NOT_FOUND; + } + + //! Read AE to convert as PEF. + + std::vector command_headers; + CompilerKit::Utils::AEReadableProtocol reader_protocol{}; + + for (const auto& objectFile : kObjectList) { + if (!std::filesystem::exists(objectFile)) continue; + + CompilerKit::AEHeader hdr{}; + + reader_protocol.fFilePtr = std::ifstream(objectFile, std::ifstream::binary); + reader_protocol.fFilePtr >> hdr; + + if (hdr.fMagic[0] == kAEMag0 && hdr.fMagic[1] == kAEMag1 && + hdr.fSize == sizeof(CompilerKit::AEHeader) && hdr.fMagic[2] == kAEMag2) { + if (hdr.fArch != kArch && hdr.fVersion == kAEIdentVersion) { + if (kVerbose) kConsoleOut << "is this a FAT binary? : "; + + if (!kFatBinaryEnable) { + if (kVerbose) kConsoleOut << "not a FAT binary.\n"; + + kConsoleOut << "object " << objectFile + << " is a different kind of architecture and output isn't " + "treated as a FAT binary." + << std::endl; + + return NECTAR_FAT_ERROR; + } else { + if (kVerbose) { + kConsoleOut << "Architecture matches what we expect.\n"; + } + } + } + + // append arch type to archs varaible. + archs |= hdr.fArch; + std::size_t cnt = hdr.fCount; + + if (kVerbose) kConsoleOut << "header found, record count: " << cnt << "\n"; + + pef_container.Count = cnt; + + Char* raw_ae_records = new Char[cnt * sizeof(CompilerKit::AERecordHeader)]; + + if (!raw_ae_records) { + if (kVerbose) kConsoleOut << "allocation failed for records of count: " << cnt << "\n"; + } + + std::memset(raw_ae_records, 0, cnt * sizeof(CompilerKit::AERecordHeader)); + + auto* ae_records = reader_protocol.Read(raw_ae_records, cnt); + + size_t org = kLinkerDefaultOrigin; + + for (size_t ae_record_index = 0; ae_record_index < cnt; ++ae_record_index) { + CompilerKit::PEFCommandHeader command_header{{}}; + std::size_t offset_of_obj = ae_records[ae_record_index].fOffset; + + std::memcpy(command_header.Name, ae_records[ae_record_index].fName, kPefNameLen); + + CompilerKit::STLString cmd_hdr_name(command_header.Name); + + // check this header if it's any valid. + if (cmd_hdr_name.find(kPefCode64) == CompilerKit::STLString::npos && + cmd_hdr_name.find(kPefData64) == CompilerKit::STLString::npos && + cmd_hdr_name.find(kPefZero64) == CompilerKit::STLString::npos) { + if (cmd_hdr_name.find(kLinkerStart) == CompilerKit::STLString::npos && + *command_header.Name == 0) { + if (cmd_hdr_name.find(kLinkerDefineSymbol) != CompilerKit::STLString::npos) { + goto ld_mark_header; + } else { + continue; + } + } + } + + if (cmd_hdr_name.find(kLinkerStart) != CompilerKit::STLString::npos && + cmd_hdr_name.find(kPefCode64) != CompilerKit::STLString::npos) { + kStartFound = true; + } + + ld_mark_header: + command_header.Offset = offset_of_obj; + command_header.Kind = ae_records[ae_record_index].fKind; + command_header.VirtualSize = ae_records[ae_record_index].fSize; + command_header.Cpu = hdr.fArch; + command_header.VirtualAddress = org; + command_header.SubCpu = hdr.fSubArch; + command_header.OffsetSize = ae_records[ae_record_index].fSize; + + org += command_header.VirtualSize; + + if (kVerbose) { + kConsoleOut << "Record: " << ae_records[ae_record_index].fName << " is marked.\n"; + kConsoleOut << "Offset: " << command_header.Offset << "\n"; + } + + command_headers.emplace_back(command_header); + } + + delete[] raw_ae_records; + raw_ae_records = nullptr; + + std::vector bytes; + bytes.resize(hdr.fCodeSize); + + reader_protocol.fFilePtr.seekg(std::streamsize(hdr.fStartCode)); + reader_protocol.fFilePtr.read(bytes.data(), std::streamsize(hdr.fCodeSize)); + + kObjectBytes.push_back({.mBlob = bytes, .mOffset = hdr.fStartCode}); + + // Blob was written, close fp. + + reader_protocol.fFilePtr.close(); + + continue; + } + + kConsoleOut << "not an object container: " << objectFile << std::endl; + + // don't continue, it is a fatal error. + return NECTAR_EXEC_ERROR; + } + + pef_container.Cpu = archs; + + output_fc << pef_container; + + if (kVerbose) { + kConsoleOut << "wrote container to: " << output_fc.tellp() << ".\n"; + } + + output_fc.seekp(std::streamsize(pef_container.HdrSz)); + + std::vector not_found; + std::vector symbols; + + // step 2: check for errors (multiple symbols, undefined ones) + + CompilerKit::STLString cmd_hdr_name; + + for (auto& command_hdr : command_headers) { + // check if this symbol needs to be resolved. + if (CompilerKit::STLString(command_hdr.Name).find(kLinkerDefineSymbol) != + CompilerKit::STLString::npos && + CompilerKit::STLString(command_hdr.Name).find(kLinkerDynamicSym) == + CompilerKit::STLString::npos) { + if (kVerbose) kConsoleOut << "Found undefined symbol: " << command_hdr.Name << "\n"; + + cmd_hdr_name = command_hdr.Name; + + if (auto it = std::find(not_found.begin(), not_found.end(), cmd_hdr_name); + it == not_found.end()) { + not_found.emplace_back(cmd_hdr_name); + } + } + + symbols.emplace_back(command_hdr.Name); + } + + // Now try to solve these symbols. + + for (size_t not_found_idx = 0; not_found_idx < command_headers.size(); ++not_found_idx) { + if (const auto it = std::find(not_found.begin(), not_found.end(), + CompilerKit::STLString(command_headers[not_found_idx].Name)); + it != not_found.end()) { + CompilerKit::STLString symbol_imp = *it; + + if (symbol_imp.find(kLinkerDefineSymbol) == CompilerKit::STLString::npos) continue; + + // erase the lookup prefix. + symbol_imp.erase(0, symbol_imp.find(kLinkerDefineSymbol) + strlen(kLinkerDefineSymbol)); + + // demangle everything. + while (symbol_imp.find('$') != CompilerKit::STLString::npos) + symbol_imp.erase(symbol_imp.find('$'), 1); + + // the reason we do is because, this may not match the symbol, and we need + // to look for other matching symbols. + for (auto& command_hdr : command_headers) { + if (CompilerKit::STLString(command_hdr.Name).find(symbol_imp) != + CompilerKit::STLString::npos && + CompilerKit::STLString(command_hdr.Name).find(kLinkerDefineSymbol) == + CompilerKit::STLString::npos) { + CompilerKit::STLString undefined_symbol = command_hdr.Name; + auto result_of_sym = undefined_symbol.substr(undefined_symbol.find(symbol_imp)); + + for (int i = 0; result_of_sym[i] != 0; ++i) { + if (result_of_sym[i] != symbol_imp[i]) goto ld_continue_search; + } + + not_found.erase(it); + + if (kVerbose) kConsoleOut << "Found symbol: " << command_hdr.Name << "\n"; + + break; + } + } + + ld_continue_search: + continue; + } + } + + // step 3: check for errors (recheck if we have those symbols.) + + if (!kStartFound && is_executable) { + if (kVerbose) + kConsoleOut << "Undefined entrypoint: " << kLinkerStart + << ", you may have forget to link " + "against your runtime library.\n"; + + kConsoleOut << "Undefined entrypoint " << kLinkerStart << " for executable: " << kOutput + << "\n"; + } + + // step 4: write all PEF commands. + + CompilerKit::PEFCommandHeader date_cmd_hdr{}; + + time_t timestamp = time(nullptr); + + CompilerKit::STLString timeStampStr = "__PEFContainer:BuildEpoch:"; + timeStampStr += std::to_string(timestamp); + + strncpy(date_cmd_hdr.Name, timeStampStr.c_str(), timeStampStr.size()); + + date_cmd_hdr.Flags = 0; + date_cmd_hdr.Kind = CompilerKit::kPefZero; + date_cmd_hdr.Offset = output_fc.tellp(); + date_cmd_hdr.VirtualSize = timeStampStr.size(); + + command_headers.push_back(std::move(date_cmd_hdr)); + + CompilerKit::PEFCommandHeader abi_cmd_hdr{}; + + CompilerKit::STLString abi = kLinkerAbiContainer; + + switch (kArch) { + case CompilerKit::kPefArchAMD64: { + abi += "MSFT"; + break; + } + case CompilerKit::kPefArchPowerPC: + case CompilerKit::kPefArchARM64: + case CompilerKit::kPefArchRISCV: + case CompilerKit::kPefArch32000: + case CompilerKit::kPefArch64000: { + abi += "KRNL"; + break; + } + default: { + abi += "NOOP"; + break; + } + } + + std::memcpy(abi_cmd_hdr.Name, abi.c_str(), abi.size()); + + abi_cmd_hdr.VirtualSize = abi.size(); + abi_cmd_hdr.Offset = output_fc.tellp(); + abi_cmd_hdr.Flags = 0; + abi_cmd_hdr.Kind = CompilerKit::kPefLinkerID; + + command_headers.push_back(std::move(abi_cmd_hdr)); + + CompilerKit::PEFCommandHeader stack_cmd_hdr{{}}; + + stack_cmd_hdr.Cpu = kArch; + stack_cmd_hdr.Flags = 0; + stack_cmd_hdr.VirtualSize = sizeof(uintptr_t); + stack_cmd_hdr.Offset = 0; + + std::memcpy(stack_cmd_hdr.Name, kLinkerStackSizeSymbol, strlen(kLinkerStackSizeSymbol)); + + command_headers.push_back(std::move(stack_cmd_hdr)); + + CompilerKit::PEFCommandHeader uuid_cmd_hdr{}; + + std::random_device rd; + + auto seedData = std::array{}; + std::generate(std::begin(seedData), std::end(seedData), std::ref(rd)); + std::seed_seq seq(std::begin(seedData), std::end(seedData)); + std::mt19937 generator(seq); + + auto gen = uuids::uuid_random_generator{generator}; + uuids::uuid id = gen(); + auto uuidStr = uuids::to_string(id); + + std::memcpy(uuid_cmd_hdr.Name, kLinkerGuidContainer, strlen(kLinkerGuidContainer)); + std::memcpy(uuid_cmd_hdr.Name + strlen(kLinkerGuidContainer), uuidStr.c_str(), uuidStr.size()); + + uuid_cmd_hdr.VirtualSize = strlen(uuid_cmd_hdr.Name); + uuid_cmd_hdr.Offset = output_fc.tellp(); + uuid_cmd_hdr.Flags = CompilerKit::kPefLinkerID; + uuid_cmd_hdr.Kind = CompilerKit::kPefZero; + + command_headers.push_back(std::move(uuid_cmd_hdr)); + + // prepare a symbol vector. + std::vector undef_symbols; + std::vector dupl_symbols; + std::vector resolve_symbols; + + constexpr Int32 kPaddingOffset = 16; + + size_t previous_offset = + (command_headers.size() * sizeof(CompilerKit::PEFCommandHeader)) + kPaddingOffset; + + CompilerKit::PEFCommandHeader end_exec_hdr; + + end_exec_hdr.Offset = output_fc.tellp(); + end_exec_hdr.Flags = CompilerKit::kPefLinkerID; + end_exec_hdr.Kind = CompilerKit::kPefZero; + + std::memcpy(end_exec_hdr.Name, kEndContainer, strlen(kEndContainer)); + + end_exec_hdr.VirtualSize = strlen(end_exec_hdr.Name); + + command_headers.push_back(std::move(end_exec_hdr)); + + // Finally write down the command headers. + // And check for any duplications + for (size_t commandHeaderIndex = 0UL; commandHeaderIndex < command_headers.size(); + ++commandHeaderIndex) { + if (CompilerKit::STLString(command_headers[commandHeaderIndex].Name) + .find(kLinkerDefineSymbol) != CompilerKit::STLString::npos && + CompilerKit::STLString(command_headers[commandHeaderIndex].Name).find(kLinkerDynamicSym) == + CompilerKit::STLString::npos) { + // ignore :UndefinedSymbol: headers, they do not contain code. + continue; + } + + CompilerKit::STLString symbol_name = command_headers[commandHeaderIndex].Name; + + if (!symbol_name.empty()) { + undef_symbols.emplace_back(symbol_name); + } + + command_headers[commandHeaderIndex].Offset += previous_offset; + previous_offset += command_headers[commandHeaderIndex].VirtualSize; + + CompilerKit::STLString name = command_headers[commandHeaderIndex].Name; + + /// so this is valid when we get to the entrypoint. + /// it is always a code64 container. And should equal to kLinkerStart as well. + /// this chunk of code updates the pef_container.Start with the updated offset. + if (name.find(kLinkerStart) != CompilerKit::STLString::npos && + name.find(kPefCode64) != CompilerKit::STLString::npos) { + pef_container.Start = command_headers[commandHeaderIndex].Offset; + auto tellCurPos = output_fc.tellp(); + + output_fc.seekp(0); + output_fc << pef_container; + + output_fc.seekp(tellCurPos); + } + + if (kVerbose) { + kConsoleOut << "Command name: " << name << "\n"; + kConsoleOut << "Virtual address of command content: " + << command_headers[commandHeaderIndex].Offset << "\n"; + } + + output_fc << command_headers[commandHeaderIndex]; + + for (size_t sub_command_header_index = 0UL; sub_command_header_index < command_headers.size(); + ++sub_command_header_index) { + if (sub_command_header_index == commandHeaderIndex) continue; + + if (CompilerKit::STLString(command_headers[sub_command_header_index].Name) + .find(kLinkerDefineSymbol) != CompilerKit::STLString::npos && + CompilerKit::STLString(command_headers[sub_command_header_index].Name) + .find(kLinkerDynamicSym) == CompilerKit::STLString::npos) { + if (kVerbose) { + kConsoleOut << "Ignoring :UndefinedSymbol: headers...\n"; + } + + // Let's ignore :UndefinedSymbol: headers, they do not contain code. + continue; + } + + auto& command_hdr = command_headers[sub_command_header_index]; + + if (command_hdr.Name == CompilerKit::STLString(command_headers[commandHeaderIndex].Name)) { + if (std::find(dupl_symbols.cbegin(), dupl_symbols.cend(), command_hdr.Name) == + dupl_symbols.cend()) { + dupl_symbols.emplace_back(command_hdr.Name); + } + + if (kVerbose) kConsoleOut << "Found duplicate symbols of: " << command_hdr.Name << "\n"; + + kDuplicateSymbols = true; + } + } + } + + if (!dupl_symbols.empty()) { + for (auto& symbol : dupl_symbols) { + kConsoleOut << "Multiple symbols of: " << symbol << " detected, cannot continue.\n"; + } + + return NECTAR_EXEC_ERROR; + } + + // step 2.5: write program bytes. + + for (auto& struct_of_blob : kObjectBytes) { + output_fc.write(struct_of_blob.mBlob.data(), struct_of_blob.mBlob.size()); + } + + if (kVerbose) { + kConsoleOut << "Wrote contents of: " << kOutput << "\n"; + } + + // step 3: check if we have those symbols + + std::vector unreferenced_symbols; + + for (auto& command_hdr : command_headers) { + if (auto it = + std::find(not_found.begin(), not_found.end(), CompilerKit::STLString(command_hdr.Name)); + it != not_found.end()) { + unreferenced_symbols.emplace_back(command_hdr.Name); + } + } + + if (!unreferenced_symbols.empty()) { + for (auto& unreferenced_symbol : unreferenced_symbols) { + kConsoleOut << "Undefined symbol " << unreferenced_symbol << "\n"; + } + + return NECTAR_EXEC_ERROR; + } + + if ((!kStartFound || kDuplicateSymbols) && + (std::filesystem::exists(kOutput) || !unreferenced_symbols.empty())) { + if (kVerbose) { + kConsoleOut << "File: " << kOutput << " is corrupt now...\n"; + } + + return NECTAR_EXEC_ERROR; + } + + return NECTAR_SUCCESS; +} + +// Last rev 2026 diff --git a/src/CompilerKit/src/Preprocessors/Preprocessor+Generic.cc b/src/CompilerKit/src/Preprocessors/Preprocessor+Generic.cc deleted file mode 100644 index c2c17a8..0000000 --- a/src/CompilerKit/src/Preprocessors/Preprocessor+Generic.cc +++ /dev/null @@ -1,917 +0,0 @@ -// Copyright 2024-2025, Amlal El Mahrouss (amlal@nekernel.org) -// Licensed under the Apache License, Version 2.0 (See accompanying -// file LICENSE or copy at http://www.apache.org/licenses/LICENSE-2.0) -// Official repository: https://github.com/nekernel-org/nectar - -/// BUGS: 0 - -#include -#include -#include -#include -#include -#include -#include -#include - -#define kMacroPrefix '#' - -/// @author Amlal El Mahrouss (amlal@nekernel.org) -/// @file Preprocessor+Generic.cc -/// @brief Nectar Preprocessor. - -typedef Int32 (*pp_parser_fn_t)(CompilerKit::STLString& line, std::ifstream& hdr_file, - std::ofstream& pp_out); - -///////////////////////////////////////////////////////////////////////////////////////// - -// @brief Preprocessor internal types. - -///////////////////////////////////////////////////////////////////////////////////////// - -namespace Detail { -enum PPOperatorType : Int32 { - kInvalid = 0, - kEqual = 100, - kGreaterEqThan, - kLesserEqThan, - kGreaterThan, - kLesserThan, - kNotEqual, - kCount = 6, -}; - -struct pp_macro_condition final { - int32_t fType; - CompilerKit::STLString fTypeName; - - void Print() { - std::cout << "type: " << fType << "\n"; - std::cout << "type_name: " << fTypeName << "\n"; - } -}; - -struct pp_macro final { - std::vector fArgs; - CompilerKit::STLString fName; - CompilerKit::STLString fValue; - - void Print() { - std::cout << "name: " << fName << "\n"; - std::cout << "value: " << fValue << "\n"; - - for (auto& arg : fArgs) { - std::cout << "arg: " << arg << "\n"; - } - } -}; -} // namespace Detail - -static std::vector kFiles; -static std::vector kMacros; -static std::vector kIncludes; - -static CompilerKit::STLString kWorkingDir = ""; - -///////////////////////////////////////////////////////////////////////////////////////// - -// @name pp_parse_if_condition -// @brief parse #if condition - -///////////////////////////////////////////////////////////////////////////////////////// - -int32_t pp_parse_if_condition(Detail::pp_macro_condition& cond, Detail::pp_macro& macro, - bool& inactive_code, bool& defined, - CompilerKit::STLString& macro_str) { - if (cond.fType == Detail::PPOperatorType::kEqual) { - auto pos = macro_str.find(macro.fName); - if (pos == CompilerKit::STLString::npos) return 0; - - auto substr_macro = macro_str.substr(pos + macro.fName.size()); - - if (substr_macro.find(macro.fValue) != CompilerKit::STLString::npos) { - if (macro.fValue == "0") { - defined = false; - inactive_code = true; - - return 1; - } - - defined = true; - inactive_code = false; - - return 1; - } - } else if (cond.fType == Detail::kNotEqual) { - auto pos = macro_str.find(macro.fName); - if (pos == CompilerKit::STLString::npos) return 0; - - auto substr_macro = macro_str.substr(pos + macro.fName.size()); - - if (substr_macro.find(macro.fName) != CompilerKit::STLString::npos) { - if (substr_macro.find(macro.fValue) != CompilerKit::STLString::npos) { - defined = false; - inactive_code = true; - - return 1; - } - - defined = true; - inactive_code = false; - - return 1; - } - - return 0; - } - - auto pos = macro_str.find(macro.fName); - if (pos == CompilerKit::STLString::npos) return 0; - - auto substr_macro = macro_str.substr(pos + macro.fName.size()); - - CompilerKit::STLString number; - - for (auto& macro_num : kMacros) { - if (substr_macro.find(macro_num.fName) != CompilerKit::STLString::npos) { - for (size_t i = 0; i < macro_num.fValue.size(); ++i) { - if (isdigit(macro_num.fValue[i])) { - number += macro_num.fValue[i]; - } else { - number.clear(); - break; - } - } - - break; - } - } - - size_t y = 2; - - /* last try */ - for (; y < macro_str.size(); y++) { - if (isdigit(macro_str[y])) { - for (size_t x = y; x < macro_str.size(); x++) { - if (macro_str[x] == ' ') break; - - number += macro_str[x]; - } - - break; - } - } - - size_t rhs = atol(macro.fValue.c_str()); - size_t lhs = atol(number.c_str()); - - if (lhs == 0) { - number.clear(); - ++y; - - for (; y < macro_str.size(); y++) { - if (isdigit(macro_str[y])) { - for (size_t x = y; x < macro_str.size(); x++) { - if (macro_str[x] == ' ') break; - - number += macro_str[x]; - } - - break; - } - } - - lhs = atol(number.c_str()); - } - - if (cond.fType == Detail::PPOperatorType::kGreaterThan) { - if (lhs > rhs) { - defined = true; - inactive_code = false; - - return 1; - } - - return 0; - } - - if (cond.fType == Detail::PPOperatorType::kGreaterEqThan) { - if (lhs >= rhs) { - defined = true; - inactive_code = false; - - return 1; - } - - return 0; - } - - if (cond.fType == Detail::PPOperatorType::kLesserEqThan) { - if (lhs <= rhs) { - defined = true; - inactive_code = false; - - return 1; - } - - return 0; - } - - if (cond.fType == Detail::PPOperatorType::kLesserThan) { - if (lhs < rhs) { - defined = true; - inactive_code = false; - - return 1; - } - - return 0; - } - - return 0; -} - -///////////////////////////////////////////////////////////////////////////////////////// - -// @brief stores every included file here. - -///////////////////////////////////////////////////////////////////////////////////////// - -std::vector kAllIncludes; - -///////////////////////////////////////////////////////////////////////////////////////// - -// @name pp_parse_file -// @brief parse file to preprocess it. - -///////////////////////////////////////////////////////////////////////////////////////// - -void pp_parse_file(std::ifstream& hdr_file, std::ofstream& pp_out) { - CompilerKit::STLString hdr_line; - CompilerKit::STLString line_after_include; - - bool inactive_code = false; - bool defined = false; - - try { - while (std::getline(hdr_file, hdr_line)) { - if (inactive_code) { - if (hdr_line.find("#endif") == CompilerKit::STLString::npos) { - continue; - } else if (hdr_line[0] == kMacroPrefix && - hdr_line.find("#endif") != CompilerKit::STLString::npos) { - inactive_code = false; - } - } - - if (hdr_line.find("*/") != CompilerKit::STLString::npos) { - hdr_line.erase(hdr_line.find("*/"), strlen("*/")); - } - - if (hdr_line.find("/*") != CompilerKit::STLString::npos) { - inactive_code = true; - - // get rid of comment. - hdr_line.erase(hdr_line.find("/*")); - } - - if (hdr_line[0] == kMacroPrefix && hdr_line.find("endif") != CompilerKit::STLString::npos) { - if (!defined && inactive_code) { - inactive_code = false; - defined = false; - - continue; - } - - continue; - } - - if (!defined && inactive_code) { - continue; - } - - if (defined && inactive_code) { - continue; - } - - for (auto macro : kMacros) { - if (CompilerKit::ast_find_needle(hdr_line, macro.fName)) { - auto macro_pos = hdr_line.find(macro.fName); - if (macro_pos == CompilerKit::STLString::npos) continue; - - if (hdr_line.substr(macro_pos).find(macro.fName + '(') != CompilerKit::STLString::npos) { - if (!macro.fArgs.empty()) { - CompilerKit::STLString symbol_val = macro.fValue; - std::vector args; - - size_t x_arg_indx = 0; - - CompilerKit::STLString line_after_define = hdr_line; - CompilerKit::STLString str_arg; - - if (line_after_define.find("(") != CompilerKit::STLString::npos) { - line_after_define.erase(0, line_after_define.find("(") + 1); - - for (auto& subc : line_after_define) { - if (subc == ' ' || subc == '\t') continue; - - if (subc == ',' || subc == ')') { - if (str_arg.empty()) continue; - - args.push_back(str_arg); - - str_arg.clear(); - - continue; - } - - str_arg.push_back(subc); - } - } - - for (auto arg : macro.fArgs) { - if (symbol_val.find(macro.fArgs[x_arg_indx]) != CompilerKit::STLString::npos) { - symbol_val.replace(symbol_val.find(macro.fArgs[x_arg_indx]), - macro.fArgs[x_arg_indx].size(), args[x_arg_indx]); - ++x_arg_indx; - } else { - throw std::runtime_error("cppdrv: Internal error."); - } - } - - auto len = macro.fName.size(); - len += symbol_val.size(); - len += 2; // ( and ) - - auto paren_pos = hdr_line.find(")"); - if (paren_pos != CompilerKit::STLString::npos) { - hdr_line.erase(paren_pos, 1); - } - - auto macro_call_pos = hdr_line.find(macro.fName + '('); - if (macro_call_pos != CompilerKit::STLString::npos) { - hdr_line.replace(macro_call_pos, len, symbol_val); - } - } else { - auto value = macro.fValue; - - hdr_line.replace(hdr_line.find(macro.fName), macro.fName.size(), value); - } - } - } - } - - if (hdr_line[0] == kMacroPrefix && hdr_line.find("define ") != CompilerKit::STLString::npos) { - auto define_pos = hdr_line.find("define "); - if (define_pos == CompilerKit::STLString::npos) continue; - - auto line_after_define = hdr_line.substr(define_pos + strlen("define ")); - - CompilerKit::STLString macro_value; - CompilerKit::STLString macro_key; - - std::size_t pos = 0UL; - - std::vector args; - bool on_args = false; - - for (auto& ch : line_after_define) { - ++pos; - - if (ch == '(') { - on_args = true; - continue; - } - - if (ch == ')') { - on_args = false; - continue; - } - - if (ch == '\\') continue; - - if (on_args) continue; - - if (ch == ' ') { - for (size_t i = pos; i < line_after_define.size(); i++) { - macro_value += line_after_define[i]; - } - - break; - } - - macro_key += ch; - } - - CompilerKit::STLString str; - - if (line_after_define.find("(") != CompilerKit::STLString::npos) { - line_after_define.erase(0, line_after_define.find("(") + 1); - - for (auto& subc : line_after_define) { - if (subc == ',' || subc == ')') { - if (str.empty()) continue; - - args.push_back(str); - - str.clear(); - - continue; - } - - str.push_back(subc); - } - } - - Detail::pp_macro macro; - - macro.fArgs = args; - macro.fName = macro_key; - macro.fValue = macro_value; - - kMacros.emplace_back(macro); - - continue; - } - - if (hdr_line[0] != kMacroPrefix) { - if (inactive_code) { - continue; - } - - pp_out << hdr_line << std::endl; - - continue; - } - - if (hdr_line[0] == kMacroPrefix && hdr_line.find("ifndef") != CompilerKit::STLString::npos) { - auto ifndef_pos = hdr_line.find("ifndef"); - if (ifndef_pos == CompilerKit::STLString::npos) continue; - - auto line_after_ifndef = hdr_line.substr(ifndef_pos + strlen("ifndef") + 1); - CompilerKit::STLString macro; - - for (auto& ch : line_after_ifndef) { - if (ch == ' ') { - break; - } - - macro += ch; - } - - if (macro == "0") { - defined = true; - inactive_code = false; - continue; - } - - if (macro == "1") { - defined = false; - inactive_code = true; - - continue; - } - - bool found = false; - - defined = true; - inactive_code = false; - - for (auto& macro_ref : kMacros) { - if (hdr_line.find(macro_ref.fName) != CompilerKit::STLString::npos) { - found = true; - break; - } - } - - if (found) { - defined = false; - inactive_code = true; - - continue; - } - } else if (hdr_line[0] == kMacroPrefix && - hdr_line.find("else") != CompilerKit::STLString::npos) { - if (!defined && inactive_code) { - inactive_code = false; - defined = true; - - continue; - } else { - defined = false; - inactive_code = true; - - continue; - } - } else if (hdr_line[0] == kMacroPrefix && - hdr_line.find("ifdef") != CompilerKit::STLString::npos) { - auto ifdef_pos = hdr_line.find("ifdef"); - if (ifdef_pos == CompilerKit::STLString::npos) continue; - - auto line_after_ifdef = hdr_line.substr(ifdef_pos + strlen("ifdef") + 1); - CompilerKit::STLString macro; - - for (auto& ch : line_after_ifdef) { - if (ch == ' ') { - break; - } - - macro += ch; - } - - if (macro == "0") { - defined = false; - inactive_code = true; - - continue; - } - - if (macro == "1") { - defined = true; - inactive_code = false; - - continue; - } - - defined = false; - inactive_code = true; - - for (auto& macro_ref : kMacros) { - if (hdr_line.find(macro_ref.fName) != CompilerKit::STLString::npos) { - defined = true; - inactive_code = false; - - break; - } - } - } else if (hdr_line[0] == kMacroPrefix && - hdr_line.find("if") != CompilerKit::STLString::npos) { - inactive_code = true; - - std::vector pp_macro_condition_list = { - { - .fType = Detail::PPOperatorType::kEqual, - .fTypeName = "==", - }, - { - .fType = Detail::PPOperatorType::kNotEqual, - .fTypeName = "!=", - }, - { - .fType = Detail::PPOperatorType::kLesserThan, - .fTypeName = "<", - }, - { - .fType = Detail::PPOperatorType::kGreaterThan, - .fTypeName = ">", - }, - { - .fType = Detail::PPOperatorType::kLesserEqThan, - .fTypeName = "<=", - }, - { - .fType = Detail::PPOperatorType::kGreaterEqThan, - .fTypeName = ">=", - }, - }; - - int32_t good_to_go = 0; - - for (auto& macro_condition : pp_macro_condition_list) { - if (hdr_line.find(macro_condition.fTypeName) != CompilerKit::STLString::npos) { - for (auto& found_macro : kMacros) { - if (hdr_line.find(found_macro.fName) != CompilerKit::STLString::npos) { - good_to_go = pp_parse_if_condition(macro_condition, found_macro, inactive_code, - defined, hdr_line); - - break; - } - } - } - } - - if (good_to_go) continue; - - auto if_pos = hdr_line.find("if"); - if (if_pos == CompilerKit::STLString::npos) continue; - - auto line_after_if = hdr_line.substr(if_pos + strlen("if") + 1); - CompilerKit::STLString macro; - - for (auto& ch : line_after_if) { - if (ch == ' ') { - break; - } - - macro += ch; - } - - if (macro == "0") { - defined = false; - inactive_code = true; - continue; - } - - if (macro == "1") { - defined = true; - inactive_code = false; - - continue; - } - - // last try, is it defined to be one? - for (auto& macro_ref : kMacros) { - if (macro_ref.fName.find(macro) != CompilerKit::STLString::npos && - macro_ref.fValue == "1") { - inactive_code = false; - defined = true; - - break; - } - } - } else if (hdr_line[0] == kMacroPrefix && - hdr_line.find("warning") != CompilerKit::STLString::npos) { - auto warning_pos = hdr_line.find("warning"); - if (warning_pos == CompilerKit::STLString::npos) continue; - - auto line_after_warning = hdr_line.substr(warning_pos + strlen("warning") + 1); - CompilerKit::STLString message; - - for (auto& ch : line_after_warning) { - if (ch == '\r' || ch == '\n') { - break; - } - - message += ch; - } - - std::cout << "warn: " << message << std::endl; - } else if (hdr_line[0] == kMacroPrefix && - hdr_line.find("error") != CompilerKit::STLString::npos) { - auto error_pos = hdr_line.find("error"); - if (error_pos == CompilerKit::STLString::npos) continue; - - auto line_after_warning = hdr_line.substr(error_pos + strlen("error") + 1); - CompilerKit::STLString message; - - for (auto& ch : line_after_warning) { - if (ch == '\r' || ch == '\n') { - break; - } - - message += ch; - } - - throw std::runtime_error("error: " + message); - } else if (hdr_line[0] == kMacroPrefix && - hdr_line.find("include ") != CompilerKit::STLString::npos) { - auto include_pos = hdr_line.find("include "); - if (include_pos == CompilerKit::STLString::npos) continue; - - line_after_include = hdr_line.substr(include_pos + strlen("include ")); - - kIncludeFile: - auto it = std::find(kAllIncludes.cbegin(), kAllIncludes.cend(), line_after_include); - - if (it != kAllIncludes.cend()) { - continue; - } - - CompilerKit::STLString path; - - kAllIncludes.push_back(line_after_include); - - bool enable = false; - bool not_local = false; - - for (auto& ch : line_after_include) { - if (ch == ' ') continue; - - if (ch == '<') { - not_local = true; - enable = true; - - continue; - } - - if (ch == '\"') { - not_local = false; - enable = true; - continue; - } - - if (enable) { - path += ch; - } - } - - if (not_local) { - bool open = false; - - if (path.ends_with('>')) { - path.erase(path.find('>')); - } - - if (path.ends_with('"')) { - path.erase(path.find('"')); - } - - for (auto& include : kIncludes) { - CompilerKit::STLString header_path = include; - header_path.push_back('/'); - header_path += path; - - std::ifstream header(header_path); - - if (!header.is_open()) continue; - - open = true; - - pp_parse_file(header, pp_out); - - break; - } - - if (!open) { - throw std::runtime_error("cppdrv: no such include file: " + path); - } - } else { - std::ifstream header(path); - - if (!header.is_open()) throw std::runtime_error("cppdrv: no such include file: " + path); - - pp_parse_file(header, pp_out); - } - } else { - std::cerr << ("cppdrv: unknown pre-processor directive, " + hdr_line) << "\n"; - continue; - } - } - } catch (const std::out_of_range& oor) { - return; - } -} - -///////////////////////////////////////////////////////////////////////////////////////// - -// @brief main entrypoint of app. - -///////////////////////////////////////////////////////////////////////////////////////// - -NECTAR_MODULE(GenericPreprocessorMain) { - try { - bool skip = false; - bool double_skip = false; - - Detail::pp_macro macro_1; - - macro_1.fName = "__true"; - macro_1.fValue = "1"; - - kMacros.push_back(macro_1); - - Detail::pp_macro macro_unreachable; - - macro_unreachable.fName = "__unreachable"; - macro_unreachable.fValue = "__compilerkit_unreachable"; - - kMacros.push_back(macro_unreachable); - - Detail::pp_macro macro_unused; - - macro_unused.fName = "__unused"; - macro_unused.fValue = "__compilerkit_unused"; - - kMacros.push_back(macro_unused); - - Detail::pp_macro macro_0; - - macro_0.fName = "__false"; - macro_0.fValue = "0"; - - kMacros.push_back(macro_0); - - Detail::pp_macro macro_nectar; - - macro_nectar.fName = "__NECTAR__"; - macro_nectar.fValue = "1"; - - kMacros.push_back(macro_nectar); - - Detail::pp_macro macro_lang; - - macro_lang.fName = "__ncpp"; - macro_lang.fValue = "202601L"; - - kMacros.push_back(macro_lang); - - Detail::pp_macro macro_nil; - - macro_nil.fName = "nil"; - macro_nil.fValue = "0"; - - kMacros.push_back(macro_nil); - - for (auto index = 1UL; index < argc; ++index) { - if (skip) { - skip = false; - continue; - } - - if (double_skip) { - ++index; - double_skip = false; - continue; - } - - if (argv[index][0] == '-') { - if (strcmp(argv[index], "-cpp-ver") == 0) { - printf("%s\n", - "Nectar Preprocessor Driver v1.11, (c) Amlal El Mahrouss 2024-2026 all rights " - "reserved."); - - return NECTAR_SUCCESS; - } - - if (strcmp(argv[index], "-cpp-help") == 0) { - printf("%s\n", - "Nectar Preprocessor Driver v1.11, (c) Amlal El Mahrouss 2024-2026 all rights " - "reserved."); - printf("%s\n", "-cpp-working-dir : set directory to working path."); - printf("%s\n", "-cpp-include-dir : add directory to include path."); - printf("%s\n", "-cpp-def : define a macro."); - printf("%s\n", "-cpp-ver: print the version."); - printf("%s\n", "-cpp-help: show help (this current command)."); - - return NECTAR_SUCCESS; - } - - if (strcmp(argv[index], "-cpp-include-dir") == 0) { - CompilerKit::STLString inc = argv[index + 1]; - - skip = true; - - kIncludes.push_back(inc); - } - - if (strcmp(argv[index], "-cpp-working-dir") == 0) { - CompilerKit::STLString inc = argv[index + 1]; - skip = true; - kWorkingDir = inc; - } - - if (strcmp(argv[index], "-cpp-def") == 0 && argv[index + 1] != nullptr && - argv[index + 2] != nullptr) { - CompilerKit::STLString macro_key = argv[index + 1]; - - CompilerKit::STLString macro_value; - bool is_string = false; - - for (int argv_find_len = 0; argv_find_len < strlen(argv[index + 2]); ++argv_find_len) { - if (!isdigit(argv[index + 2][argv_find_len])) { - is_string = true; - macro_value += "\""; - - break; - } - } - - macro_value += argv[index + 2]; - - if (is_string) macro_value += "\""; - - Detail::pp_macro macro; - macro.fName = macro_key; - macro.fValue = macro_value; - - kMacros.push_back(macro); - - double_skip = true; - } - - continue; - } - - kFiles.emplace_back(argv[index]); - } - - if (kFiles.empty()) return NECTAR_EXEC_ERROR; - - for (auto& file : kFiles) { - if (!std::filesystem::exists(file)) continue; - - std::ifstream file_descriptor(file); - std::ofstream file_descriptor_pp(file + ".pp"); - - pp_parse_file(file_descriptor, file_descriptor_pp); - } - - return NECTAR_SUCCESS; - } catch (const std::runtime_error& e) { - std::cout << e.what() << '\n'; - } - - return NECTAR_EXEC_ERROR; -} - -// Last rev 8-1-24 diff --git a/src/CompilerKit/src/Preprocessors/Preprocessor+Generic.cpp b/src/CompilerKit/src/Preprocessors/Preprocessor+Generic.cpp new file mode 100644 index 0000000..c2c17a8 --- /dev/null +++ b/src/CompilerKit/src/Preprocessors/Preprocessor+Generic.cpp @@ -0,0 +1,917 @@ +// Copyright 2024-2025, Amlal El Mahrouss (amlal@nekernel.org) +// Licensed under the Apache License, Version 2.0 (See accompanying +// file LICENSE or copy at http://www.apache.org/licenses/LICENSE-2.0) +// Official repository: https://github.com/nekernel-org/nectar + +/// BUGS: 0 + +#include +#include +#include +#include +#include +#include +#include +#include + +#define kMacroPrefix '#' + +/// @author Amlal El Mahrouss (amlal@nekernel.org) +/// @file Preprocessor+Generic.cc +/// @brief Nectar Preprocessor. + +typedef Int32 (*pp_parser_fn_t)(CompilerKit::STLString& line, std::ifstream& hdr_file, + std::ofstream& pp_out); + +///////////////////////////////////////////////////////////////////////////////////////// + +// @brief Preprocessor internal types. + +///////////////////////////////////////////////////////////////////////////////////////// + +namespace Detail { +enum PPOperatorType : Int32 { + kInvalid = 0, + kEqual = 100, + kGreaterEqThan, + kLesserEqThan, + kGreaterThan, + kLesserThan, + kNotEqual, + kCount = 6, +}; + +struct pp_macro_condition final { + int32_t fType; + CompilerKit::STLString fTypeName; + + void Print() { + std::cout << "type: " << fType << "\n"; + std::cout << "type_name: " << fTypeName << "\n"; + } +}; + +struct pp_macro final { + std::vector fArgs; + CompilerKit::STLString fName; + CompilerKit::STLString fValue; + + void Print() { + std::cout << "name: " << fName << "\n"; + std::cout << "value: " << fValue << "\n"; + + for (auto& arg : fArgs) { + std::cout << "arg: " << arg << "\n"; + } + } +}; +} // namespace Detail + +static std::vector kFiles; +static std::vector kMacros; +static std::vector kIncludes; + +static CompilerKit::STLString kWorkingDir = ""; + +///////////////////////////////////////////////////////////////////////////////////////// + +// @name pp_parse_if_condition +// @brief parse #if condition + +///////////////////////////////////////////////////////////////////////////////////////// + +int32_t pp_parse_if_condition(Detail::pp_macro_condition& cond, Detail::pp_macro& macro, + bool& inactive_code, bool& defined, + CompilerKit::STLString& macro_str) { + if (cond.fType == Detail::PPOperatorType::kEqual) { + auto pos = macro_str.find(macro.fName); + if (pos == CompilerKit::STLString::npos) return 0; + + auto substr_macro = macro_str.substr(pos + macro.fName.size()); + + if (substr_macro.find(macro.fValue) != CompilerKit::STLString::npos) { + if (macro.fValue == "0") { + defined = false; + inactive_code = true; + + return 1; + } + + defined = true; + inactive_code = false; + + return 1; + } + } else if (cond.fType == Detail::kNotEqual) { + auto pos = macro_str.find(macro.fName); + if (pos == CompilerKit::STLString::npos) return 0; + + auto substr_macro = macro_str.substr(pos + macro.fName.size()); + + if (substr_macro.find(macro.fName) != CompilerKit::STLString::npos) { + if (substr_macro.find(macro.fValue) != CompilerKit::STLString::npos) { + defined = false; + inactive_code = true; + + return 1; + } + + defined = true; + inactive_code = false; + + return 1; + } + + return 0; + } + + auto pos = macro_str.find(macro.fName); + if (pos == CompilerKit::STLString::npos) return 0; + + auto substr_macro = macro_str.substr(pos + macro.fName.size()); + + CompilerKit::STLString number; + + for (auto& macro_num : kMacros) { + if (substr_macro.find(macro_num.fName) != CompilerKit::STLString::npos) { + for (size_t i = 0; i < macro_num.fValue.size(); ++i) { + if (isdigit(macro_num.fValue[i])) { + number += macro_num.fValue[i]; + } else { + number.clear(); + break; + } + } + + break; + } + } + + size_t y = 2; + + /* last try */ + for (; y < macro_str.size(); y++) { + if (isdigit(macro_str[y])) { + for (size_t x = y; x < macro_str.size(); x++) { + if (macro_str[x] == ' ') break; + + number += macro_str[x]; + } + + break; + } + } + + size_t rhs = atol(macro.fValue.c_str()); + size_t lhs = atol(number.c_str()); + + if (lhs == 0) { + number.clear(); + ++y; + + for (; y < macro_str.size(); y++) { + if (isdigit(macro_str[y])) { + for (size_t x = y; x < macro_str.size(); x++) { + if (macro_str[x] == ' ') break; + + number += macro_str[x]; + } + + break; + } + } + + lhs = atol(number.c_str()); + } + + if (cond.fType == Detail::PPOperatorType::kGreaterThan) { + if (lhs > rhs) { + defined = true; + inactive_code = false; + + return 1; + } + + return 0; + } + + if (cond.fType == Detail::PPOperatorType::kGreaterEqThan) { + if (lhs >= rhs) { + defined = true; + inactive_code = false; + + return 1; + } + + return 0; + } + + if (cond.fType == Detail::PPOperatorType::kLesserEqThan) { + if (lhs <= rhs) { + defined = true; + inactive_code = false; + + return 1; + } + + return 0; + } + + if (cond.fType == Detail::PPOperatorType::kLesserThan) { + if (lhs < rhs) { + defined = true; + inactive_code = false; + + return 1; + } + + return 0; + } + + return 0; +} + +///////////////////////////////////////////////////////////////////////////////////////// + +// @brief stores every included file here. + +///////////////////////////////////////////////////////////////////////////////////////// + +std::vector kAllIncludes; + +///////////////////////////////////////////////////////////////////////////////////////// + +// @name pp_parse_file +// @brief parse file to preprocess it. + +///////////////////////////////////////////////////////////////////////////////////////// + +void pp_parse_file(std::ifstream& hdr_file, std::ofstream& pp_out) { + CompilerKit::STLString hdr_line; + CompilerKit::STLString line_after_include; + + bool inactive_code = false; + bool defined = false; + + try { + while (std::getline(hdr_file, hdr_line)) { + if (inactive_code) { + if (hdr_line.find("#endif") == CompilerKit::STLString::npos) { + continue; + } else if (hdr_line[0] == kMacroPrefix && + hdr_line.find("#endif") != CompilerKit::STLString::npos) { + inactive_code = false; + } + } + + if (hdr_line.find("*/") != CompilerKit::STLString::npos) { + hdr_line.erase(hdr_line.find("*/"), strlen("*/")); + } + + if (hdr_line.find("/*") != CompilerKit::STLString::npos) { + inactive_code = true; + + // get rid of comment. + hdr_line.erase(hdr_line.find("/*")); + } + + if (hdr_line[0] == kMacroPrefix && hdr_line.find("endif") != CompilerKit::STLString::npos) { + if (!defined && inactive_code) { + inactive_code = false; + defined = false; + + continue; + } + + continue; + } + + if (!defined && inactive_code) { + continue; + } + + if (defined && inactive_code) { + continue; + } + + for (auto macro : kMacros) { + if (CompilerKit::ast_find_needle(hdr_line, macro.fName)) { + auto macro_pos = hdr_line.find(macro.fName); + if (macro_pos == CompilerKit::STLString::npos) continue; + + if (hdr_line.substr(macro_pos).find(macro.fName + '(') != CompilerKit::STLString::npos) { + if (!macro.fArgs.empty()) { + CompilerKit::STLString symbol_val = macro.fValue; + std::vector args; + + size_t x_arg_indx = 0; + + CompilerKit::STLString line_after_define = hdr_line; + CompilerKit::STLString str_arg; + + if (line_after_define.find("(") != CompilerKit::STLString::npos) { + line_after_define.erase(0, line_after_define.find("(") + 1); + + for (auto& subc : line_after_define) { + if (subc == ' ' || subc == '\t') continue; + + if (subc == ',' || subc == ')') { + if (str_arg.empty()) continue; + + args.push_back(str_arg); + + str_arg.clear(); + + continue; + } + + str_arg.push_back(subc); + } + } + + for (auto arg : macro.fArgs) { + if (symbol_val.find(macro.fArgs[x_arg_indx]) != CompilerKit::STLString::npos) { + symbol_val.replace(symbol_val.find(macro.fArgs[x_arg_indx]), + macro.fArgs[x_arg_indx].size(), args[x_arg_indx]); + ++x_arg_indx; + } else { + throw std::runtime_error("cppdrv: Internal error."); + } + } + + auto len = macro.fName.size(); + len += symbol_val.size(); + len += 2; // ( and ) + + auto paren_pos = hdr_line.find(")"); + if (paren_pos != CompilerKit::STLString::npos) { + hdr_line.erase(paren_pos, 1); + } + + auto macro_call_pos = hdr_line.find(macro.fName + '('); + if (macro_call_pos != CompilerKit::STLString::npos) { + hdr_line.replace(macro_call_pos, len, symbol_val); + } + } else { + auto value = macro.fValue; + + hdr_line.replace(hdr_line.find(macro.fName), macro.fName.size(), value); + } + } + } + } + + if (hdr_line[0] == kMacroPrefix && hdr_line.find("define ") != CompilerKit::STLString::npos) { + auto define_pos = hdr_line.find("define "); + if (define_pos == CompilerKit::STLString::npos) continue; + + auto line_after_define = hdr_line.substr(define_pos + strlen("define ")); + + CompilerKit::STLString macro_value; + CompilerKit::STLString macro_key; + + std::size_t pos = 0UL; + + std::vector args; + bool on_args = false; + + for (auto& ch : line_after_define) { + ++pos; + + if (ch == '(') { + on_args = true; + continue; + } + + if (ch == ')') { + on_args = false; + continue; + } + + if (ch == '\\') continue; + + if (on_args) continue; + + if (ch == ' ') { + for (size_t i = pos; i < line_after_define.size(); i++) { + macro_value += line_after_define[i]; + } + + break; + } + + macro_key += ch; + } + + CompilerKit::STLString str; + + if (line_after_define.find("(") != CompilerKit::STLString::npos) { + line_after_define.erase(0, line_after_define.find("(") + 1); + + for (auto& subc : line_after_define) { + if (subc == ',' || subc == ')') { + if (str.empty()) continue; + + args.push_back(str); + + str.clear(); + + continue; + } + + str.push_back(subc); + } + } + + Detail::pp_macro macro; + + macro.fArgs = args; + macro.fName = macro_key; + macro.fValue = macro_value; + + kMacros.emplace_back(macro); + + continue; + } + + if (hdr_line[0] != kMacroPrefix) { + if (inactive_code) { + continue; + } + + pp_out << hdr_line << std::endl; + + continue; + } + + if (hdr_line[0] == kMacroPrefix && hdr_line.find("ifndef") != CompilerKit::STLString::npos) { + auto ifndef_pos = hdr_line.find("ifndef"); + if (ifndef_pos == CompilerKit::STLString::npos) continue; + + auto line_after_ifndef = hdr_line.substr(ifndef_pos + strlen("ifndef") + 1); + CompilerKit::STLString macro; + + for (auto& ch : line_after_ifndef) { + if (ch == ' ') { + break; + } + + macro += ch; + } + + if (macro == "0") { + defined = true; + inactive_code = false; + continue; + } + + if (macro == "1") { + defined = false; + inactive_code = true; + + continue; + } + + bool found = false; + + defined = true; + inactive_code = false; + + for (auto& macro_ref : kMacros) { + if (hdr_line.find(macro_ref.fName) != CompilerKit::STLString::npos) { + found = true; + break; + } + } + + if (found) { + defined = false; + inactive_code = true; + + continue; + } + } else if (hdr_line[0] == kMacroPrefix && + hdr_line.find("else") != CompilerKit::STLString::npos) { + if (!defined && inactive_code) { + inactive_code = false; + defined = true; + + continue; + } else { + defined = false; + inactive_code = true; + + continue; + } + } else if (hdr_line[0] == kMacroPrefix && + hdr_line.find("ifdef") != CompilerKit::STLString::npos) { + auto ifdef_pos = hdr_line.find("ifdef"); + if (ifdef_pos == CompilerKit::STLString::npos) continue; + + auto line_after_ifdef = hdr_line.substr(ifdef_pos + strlen("ifdef") + 1); + CompilerKit::STLString macro; + + for (auto& ch : line_after_ifdef) { + if (ch == ' ') { + break; + } + + macro += ch; + } + + if (macro == "0") { + defined = false; + inactive_code = true; + + continue; + } + + if (macro == "1") { + defined = true; + inactive_code = false; + + continue; + } + + defined = false; + inactive_code = true; + + for (auto& macro_ref : kMacros) { + if (hdr_line.find(macro_ref.fName) != CompilerKit::STLString::npos) { + defined = true; + inactive_code = false; + + break; + } + } + } else if (hdr_line[0] == kMacroPrefix && + hdr_line.find("if") != CompilerKit::STLString::npos) { + inactive_code = true; + + std::vector pp_macro_condition_list = { + { + .fType = Detail::PPOperatorType::kEqual, + .fTypeName = "==", + }, + { + .fType = Detail::PPOperatorType::kNotEqual, + .fTypeName = "!=", + }, + { + .fType = Detail::PPOperatorType::kLesserThan, + .fTypeName = "<", + }, + { + .fType = Detail::PPOperatorType::kGreaterThan, + .fTypeName = ">", + }, + { + .fType = Detail::PPOperatorType::kLesserEqThan, + .fTypeName = "<=", + }, + { + .fType = Detail::PPOperatorType::kGreaterEqThan, + .fTypeName = ">=", + }, + }; + + int32_t good_to_go = 0; + + for (auto& macro_condition : pp_macro_condition_list) { + if (hdr_line.find(macro_condition.fTypeName) != CompilerKit::STLString::npos) { + for (auto& found_macro : kMacros) { + if (hdr_line.find(found_macro.fName) != CompilerKit::STLString::npos) { + good_to_go = pp_parse_if_condition(macro_condition, found_macro, inactive_code, + defined, hdr_line); + + break; + } + } + } + } + + if (good_to_go) continue; + + auto if_pos = hdr_line.find("if"); + if (if_pos == CompilerKit::STLString::npos) continue; + + auto line_after_if = hdr_line.substr(if_pos + strlen("if") + 1); + CompilerKit::STLString macro; + + for (auto& ch : line_after_if) { + if (ch == ' ') { + break; + } + + macro += ch; + } + + if (macro == "0") { + defined = false; + inactive_code = true; + continue; + } + + if (macro == "1") { + defined = true; + inactive_code = false; + + continue; + } + + // last try, is it defined to be one? + for (auto& macro_ref : kMacros) { + if (macro_ref.fName.find(macro) != CompilerKit::STLString::npos && + macro_ref.fValue == "1") { + inactive_code = false; + defined = true; + + break; + } + } + } else if (hdr_line[0] == kMacroPrefix && + hdr_line.find("warning") != CompilerKit::STLString::npos) { + auto warning_pos = hdr_line.find("warning"); + if (warning_pos == CompilerKit::STLString::npos) continue; + + auto line_after_warning = hdr_line.substr(warning_pos + strlen("warning") + 1); + CompilerKit::STLString message; + + for (auto& ch : line_after_warning) { + if (ch == '\r' || ch == '\n') { + break; + } + + message += ch; + } + + std::cout << "warn: " << message << std::endl; + } else if (hdr_line[0] == kMacroPrefix && + hdr_line.find("error") != CompilerKit::STLString::npos) { + auto error_pos = hdr_line.find("error"); + if (error_pos == CompilerKit::STLString::npos) continue; + + auto line_after_warning = hdr_line.substr(error_pos + strlen("error") + 1); + CompilerKit::STLString message; + + for (auto& ch : line_after_warning) { + if (ch == '\r' || ch == '\n') { + break; + } + + message += ch; + } + + throw std::runtime_error("error: " + message); + } else if (hdr_line[0] == kMacroPrefix && + hdr_line.find("include ") != CompilerKit::STLString::npos) { + auto include_pos = hdr_line.find("include "); + if (include_pos == CompilerKit::STLString::npos) continue; + + line_after_include = hdr_line.substr(include_pos + strlen("include ")); + + kIncludeFile: + auto it = std::find(kAllIncludes.cbegin(), kAllIncludes.cend(), line_after_include); + + if (it != kAllIncludes.cend()) { + continue; + } + + CompilerKit::STLString path; + + kAllIncludes.push_back(line_after_include); + + bool enable = false; + bool not_local = false; + + for (auto& ch : line_after_include) { + if (ch == ' ') continue; + + if (ch == '<') { + not_local = true; + enable = true; + + continue; + } + + if (ch == '\"') { + not_local = false; + enable = true; + continue; + } + + if (enable) { + path += ch; + } + } + + if (not_local) { + bool open = false; + + if (path.ends_with('>')) { + path.erase(path.find('>')); + } + + if (path.ends_with('"')) { + path.erase(path.find('"')); + } + + for (auto& include : kIncludes) { + CompilerKit::STLString header_path = include; + header_path.push_back('/'); + header_path += path; + + std::ifstream header(header_path); + + if (!header.is_open()) continue; + + open = true; + + pp_parse_file(header, pp_out); + + break; + } + + if (!open) { + throw std::runtime_error("cppdrv: no such include file: " + path); + } + } else { + std::ifstream header(path); + + if (!header.is_open()) throw std::runtime_error("cppdrv: no such include file: " + path); + + pp_parse_file(header, pp_out); + } + } else { + std::cerr << ("cppdrv: unknown pre-processor directive, " + hdr_line) << "\n"; + continue; + } + } + } catch (const std::out_of_range& oor) { + return; + } +} + +///////////////////////////////////////////////////////////////////////////////////////// + +// @brief main entrypoint of app. + +///////////////////////////////////////////////////////////////////////////////////////// + +NECTAR_MODULE(GenericPreprocessorMain) { + try { + bool skip = false; + bool double_skip = false; + + Detail::pp_macro macro_1; + + macro_1.fName = "__true"; + macro_1.fValue = "1"; + + kMacros.push_back(macro_1); + + Detail::pp_macro macro_unreachable; + + macro_unreachable.fName = "__unreachable"; + macro_unreachable.fValue = "__compilerkit_unreachable"; + + kMacros.push_back(macro_unreachable); + + Detail::pp_macro macro_unused; + + macro_unused.fName = "__unused"; + macro_unused.fValue = "__compilerkit_unused"; + + kMacros.push_back(macro_unused); + + Detail::pp_macro macro_0; + + macro_0.fName = "__false"; + macro_0.fValue = "0"; + + kMacros.push_back(macro_0); + + Detail::pp_macro macro_nectar; + + macro_nectar.fName = "__NECTAR__"; + macro_nectar.fValue = "1"; + + kMacros.push_back(macro_nectar); + + Detail::pp_macro macro_lang; + + macro_lang.fName = "__ncpp"; + macro_lang.fValue = "202601L"; + + kMacros.push_back(macro_lang); + + Detail::pp_macro macro_nil; + + macro_nil.fName = "nil"; + macro_nil.fValue = "0"; + + kMacros.push_back(macro_nil); + + for (auto index = 1UL; index < argc; ++index) { + if (skip) { + skip = false; + continue; + } + + if (double_skip) { + ++index; + double_skip = false; + continue; + } + + if (argv[index][0] == '-') { + if (strcmp(argv[index], "-cpp-ver") == 0) { + printf("%s\n", + "Nectar Preprocessor Driver v1.11, (c) Amlal El Mahrouss 2024-2026 all rights " + "reserved."); + + return NECTAR_SUCCESS; + } + + if (strcmp(argv[index], "-cpp-help") == 0) { + printf("%s\n", + "Nectar Preprocessor Driver v1.11, (c) Amlal El Mahrouss 2024-2026 all rights " + "reserved."); + printf("%s\n", "-cpp-working-dir : set directory to working path."); + printf("%s\n", "-cpp-include-dir : add directory to include path."); + printf("%s\n", "-cpp-def : define a macro."); + printf("%s\n", "-cpp-ver: print the version."); + printf("%s\n", "-cpp-help: show help (this current command)."); + + return NECTAR_SUCCESS; + } + + if (strcmp(argv[index], "-cpp-include-dir") == 0) { + CompilerKit::STLString inc = argv[index + 1]; + + skip = true; + + kIncludes.push_back(inc); + } + + if (strcmp(argv[index], "-cpp-working-dir") == 0) { + CompilerKit::STLString inc = argv[index + 1]; + skip = true; + kWorkingDir = inc; + } + + if (strcmp(argv[index], "-cpp-def") == 0 && argv[index + 1] != nullptr && + argv[index + 2] != nullptr) { + CompilerKit::STLString macro_key = argv[index + 1]; + + CompilerKit::STLString macro_value; + bool is_string = false; + + for (int argv_find_len = 0; argv_find_len < strlen(argv[index + 2]); ++argv_find_len) { + if (!isdigit(argv[index + 2][argv_find_len])) { + is_string = true; + macro_value += "\""; + + break; + } + } + + macro_value += argv[index + 2]; + + if (is_string) macro_value += "\""; + + Detail::pp_macro macro; + macro.fName = macro_key; + macro.fValue = macro_value; + + kMacros.push_back(macro); + + double_skip = true; + } + + continue; + } + + kFiles.emplace_back(argv[index]); + } + + if (kFiles.empty()) return NECTAR_EXEC_ERROR; + + for (auto& file : kFiles) { + if (!std::filesystem::exists(file)) continue; + + std::ifstream file_descriptor(file); + std::ofstream file_descriptor_pp(file + ".pp"); + + pp_parse_file(file_descriptor, file_descriptor_pp); + } + + return NECTAR_SUCCESS; + } catch (const std::runtime_error& e) { + std::cout << e.what() << '\n'; + } + + return NECTAR_EXEC_ERROR; +} + +// Last rev 8-1-24 diff --git a/src/DebuggerKit/dk-nekernel.json b/src/DebuggerKit/dk-nekernel.json index ca09081..4ada191 100644 --- a/src/DebuggerKit/dk-nekernel.json +++ b/src/DebuggerKit/dk-nekernel.json @@ -5,7 +5,7 @@ "../../include/DebuggerKit", "../../include" ], - "sources_path": ["src/*.cc"], + "sources_path": ["src/*.cpp"], "output_name": "/usr/local/lib/libDebuggerKit.dylib", "compiler_flags": ["-fPIC", "-shared"], "cpp_macros": [ diff --git a/src/DebuggerKit/dk-osx.json b/src/DebuggerKit/dk-osx.json index 6f619b7..f97f758 100644 --- a/src/DebuggerKit/dk-osx.json +++ b/src/DebuggerKit/dk-osx.json @@ -5,7 +5,7 @@ "../../include/DebuggerKit", "../../include" ], - "sources_path": ["src/*.cc"], + "sources_path": ["src/*.cpp"], "output_name": "/usr/local/lib/libDebuggerKit.dylib", "compiler_flags": ["-fPIC", "-shared"], "cpp_macros": [ diff --git a/src/DebuggerKit/src/NeKernelContract.cc b/src/DebuggerKit/src/NeKernelContract.cc deleted file mode 100644 index f39ba0d..0000000 --- a/src/DebuggerKit/src/NeKernelContract.cc +++ /dev/null @@ -1,89 +0,0 @@ -// Copyright 2024-2025, Amlal El Mahrouss (amlal@nekernel.org) -// Licensed under the Apache License, Version 2.0 (See accompanying -// file LICENSE or copy at http://www.apache.org/licenses/LICENSE-2.0) -// Official repository: https://github.com/nekernel-org/nectar - -#ifdef DK_NEKERNEL_DEBUGGER - -/// @author Amlal El Mahrouss -/// @brief Kernel Debugger Protocol - -#include -#include -#include -#include - -using namespace DebuggerKit::Detail; -using namespace DebuggerKit::NeKernel; - -NeKernelContract::NeKernelContract() = default; - -NeKernelContract::~NeKernelContract() = default; - -bool NeKernelContract::Attach(CompilerKit::STLString path, CompilerKit::STLString argv, - ProcessID& pid) noexcept { - if (path.empty() || argv.empty()) return NO; - - m_socket = ::socket(AF_INET, SOCK_STREAM, 0); - - if (m_socket == -1) return NO; - - struct sockaddr_in server_addr; - - server_addr.sin_family = AF_INET; - server_addr.sin_port = htons(kDebugPort); - - if (::inet_pton(AF_INET, argv.c_str(), &server_addr.sin_addr) <= 0) return NO; - - auto ret = (::connect(m_socket, (struct sockaddr*) &server_addr, sizeof(server_addr)) == -1); - - if (ret) return NO; - - CompilerKit::STLString pkt = Detail::kDebugMagic; - pkt += ";\r"; - - ret = ::send(m_socket, pkt.data(), pkt.size(), 0) > 0; - return ret; -} - -bool NeKernelContract::BreakAt(CompilerKit::STLString symbol) noexcept { - CompilerKit::STLString pkt = Detail::kDebugMagic; - pkt += ";SYM=\""; - pkt += symbol; - pkt += "\";\r"; - - if (pkt.size() > kDebugCmdLen) return NO; - - auto ret = ::send(m_socket, pkt.data(), pkt.size(), 0) > 0; - return ret; -} - -bool NeKernelContract::Break() noexcept { - CompilerKit::STLString pkt = Detail::kDebugMagic; - pkt += ";BRK=1;\r"; - - auto ret = ::send(m_socket, pkt.data(), pkt.size(), 0) > 0; - return ret; -} - -bool NeKernelContract::Continue() noexcept { - CompilerKit::STLString pkt = Detail::kDebugMagic; - pkt += ";CONT=1;\r"; - - auto ret = ::send(m_socket, pkt.data(), pkt.size(), 0) > 0; - return ret; - return NO; -} - -bool NeKernelContract::Detach() noexcept { - CompilerKit::STLString pkt = Detail::kDebugMagic; - pkt += ";DTCH=1;\r"; - - auto ret = ::send(m_socket, pkt.data(), pkt.size(), 0) > 0; - - if (ret) ::close(m_socket); - - return ret; -} - -#endif // DK_NEKERNEL_DEBUGGER diff --git a/src/DebuggerKit/src/NeKernelContract.cpp b/src/DebuggerKit/src/NeKernelContract.cpp new file mode 100644 index 0000000..f39ba0d --- /dev/null +++ b/src/DebuggerKit/src/NeKernelContract.cpp @@ -0,0 +1,89 @@ +// Copyright 2024-2025, Amlal El Mahrouss (amlal@nekernel.org) +// Licensed under the Apache License, Version 2.0 (See accompanying +// file LICENSE or copy at http://www.apache.org/licenses/LICENSE-2.0) +// Official repository: https://github.com/nekernel-org/nectar + +#ifdef DK_NEKERNEL_DEBUGGER + +/// @author Amlal El Mahrouss +/// @brief Kernel Debugger Protocol + +#include +#include +#include +#include + +using namespace DebuggerKit::Detail; +using namespace DebuggerKit::NeKernel; + +NeKernelContract::NeKernelContract() = default; + +NeKernelContract::~NeKernelContract() = default; + +bool NeKernelContract::Attach(CompilerKit::STLString path, CompilerKit::STLString argv, + ProcessID& pid) noexcept { + if (path.empty() || argv.empty()) return NO; + + m_socket = ::socket(AF_INET, SOCK_STREAM, 0); + + if (m_socket == -1) return NO; + + struct sockaddr_in server_addr; + + server_addr.sin_family = AF_INET; + server_addr.sin_port = htons(kDebugPort); + + if (::inet_pton(AF_INET, argv.c_str(), &server_addr.sin_addr) <= 0) return NO; + + auto ret = (::connect(m_socket, (struct sockaddr*) &server_addr, sizeof(server_addr)) == -1); + + if (ret) return NO; + + CompilerKit::STLString pkt = Detail::kDebugMagic; + pkt += ";\r"; + + ret = ::send(m_socket, pkt.data(), pkt.size(), 0) > 0; + return ret; +} + +bool NeKernelContract::BreakAt(CompilerKit::STLString symbol) noexcept { + CompilerKit::STLString pkt = Detail::kDebugMagic; + pkt += ";SYM=\""; + pkt += symbol; + pkt += "\";\r"; + + if (pkt.size() > kDebugCmdLen) return NO; + + auto ret = ::send(m_socket, pkt.data(), pkt.size(), 0) > 0; + return ret; +} + +bool NeKernelContract::Break() noexcept { + CompilerKit::STLString pkt = Detail::kDebugMagic; + pkt += ";BRK=1;\r"; + + auto ret = ::send(m_socket, pkt.data(), pkt.size(), 0) > 0; + return ret; +} + +bool NeKernelContract::Continue() noexcept { + CompilerKit::STLString pkt = Detail::kDebugMagic; + pkt += ";CONT=1;\r"; + + auto ret = ::send(m_socket, pkt.data(), pkt.size(), 0) > 0; + return ret; + return NO; +} + +bool NeKernelContract::Detach() noexcept { + CompilerKit::STLString pkt = Detail::kDebugMagic; + pkt += ";DTCH=1;\r"; + + auto ret = ::send(m_socket, pkt.data(), pkt.size(), 0) > 0; + + if (ret) ::close(m_socket); + + return ret; +} + +#endif // DK_NEKERNEL_DEBUGGER diff --git a/src/DebuggerKit/src/NeKernelContractCLI.cc b/src/DebuggerKit/src/NeKernelContractCLI.cc deleted file mode 100644 index e1b6a27..0000000 --- a/src/DebuggerKit/src/NeKernelContractCLI.cc +++ /dev/null @@ -1,99 +0,0 @@ -// Copyright 2024-2025, Amlal El Mahrouss (amlal@nekernel.org) -// Licensed under the Apache License, Version 2.0 (See accompanying -// file LICENSE or copy at http://www.apache.org/licenses/LICENSE-2.0) -// Official repository: https://github.com/nekernel-org/nectar - -#ifdef DK_NEKERNEL_DEBUGGER - -#include -#include -#include -#include - -#include - -using namespace DebuggerKit::NeKernel; - -static void dbgi_ctrlc_handler(std::int32_t _) { - if (!kPID || kPath.empty()) { - return; - } - - kKernelDebugger.Break(); - - pfd::notify("Debugger Event", "Breakpoint hit!"); - - kKeepRunning = false; -} - -NECTAR_MODULE(DebuggerNeKernel) { - pfd::notify("Debugger Event", - "Nectar Debugger\n(C) 2025 Amlal El Mahrouss and NeKernel.org contributors, all " - "rights reserved."); - - if (argc >= 5 && std::string(argv[1]) == "-k" && argv[2] != nullptr && - std::string(argv[3]) == "-ip" && argv[4] != nullptr) { - kPath = argv[2]; - kPath += ":"; - kPath += argv[4]; - - kStdOut << "[+] KIP (Kernel:IP) set to: " << kPath << "\n"; - - CompilerKit::install_signal(SIGINT, dbgi_ctrlc_handler); - - kKernelDebugger.Attach(kPath, argv[4], kPID); - - while (YES) { - if (kKeepRunning) { - continue; - } - - std::string cmd; - if (!std::getline(std::cin, cmd)) break; - - if (cmd == "c" || cmd == "cont" || cmd == "continue") { - if (kKernelDebugger.Continue()) { - kKeepRunning = true; - - kStdOut << "[+] Continuing...\n"; - - pfd::notify("Debugger Event", "Continuing..."); - } - } - - if (cmd == "d" || cmd == "detach") kKernelDebugger.Detach(); - - if (cmd == "start") { - kStdOut << "[?] Enter a argument to use: "; - std::getline(std::cin, cmd); - - kKernelDebugger.Attach(kPath, cmd, kPID); - } - - if (cmd == "exit") { - if (kPID > 0) kKernelDebugger.Detach(); - - break; - } - - if (cmd == "break" || cmd == "b") { - kStdOut << "[?] Enter a symbol to break on: "; - - std::getline(std::cin, cmd); - - if (kKernelDebugger.BreakAt(cmd)) { - pfd::notify("Debugger Event", "Add BreakAt at: " + cmd); - } - } - } - - return EXIT_SUCCESS; - } - - kStdOut << "usage: " << argv[0] << " -k -ip \n"; - kStdOut << "example: " << argv[0] << " -k /path/to/ne_kernel -ip 127.0.0.1\n"; - - return EXIT_FAILURE; -} - -#endif // DK_NEKERNEL_DEBUGGER diff --git a/src/DebuggerKit/src/NeKernelContractCLI.cpp b/src/DebuggerKit/src/NeKernelContractCLI.cpp new file mode 100644 index 0000000..e1b6a27 --- /dev/null +++ b/src/DebuggerKit/src/NeKernelContractCLI.cpp @@ -0,0 +1,99 @@ +// Copyright 2024-2025, Amlal El Mahrouss (amlal@nekernel.org) +// Licensed under the Apache License, Version 2.0 (See accompanying +// file LICENSE or copy at http://www.apache.org/licenses/LICENSE-2.0) +// Official repository: https://github.com/nekernel-org/nectar + +#ifdef DK_NEKERNEL_DEBUGGER + +#include +#include +#include +#include + +#include + +using namespace DebuggerKit::NeKernel; + +static void dbgi_ctrlc_handler(std::int32_t _) { + if (!kPID || kPath.empty()) { + return; + } + + kKernelDebugger.Break(); + + pfd::notify("Debugger Event", "Breakpoint hit!"); + + kKeepRunning = false; +} + +NECTAR_MODULE(DebuggerNeKernel) { + pfd::notify("Debugger Event", + "Nectar Debugger\n(C) 2025 Amlal El Mahrouss and NeKernel.org contributors, all " + "rights reserved."); + + if (argc >= 5 && std::string(argv[1]) == "-k" && argv[2] != nullptr && + std::string(argv[3]) == "-ip" && argv[4] != nullptr) { + kPath = argv[2]; + kPath += ":"; + kPath += argv[4]; + + kStdOut << "[+] KIP (Kernel:IP) set to: " << kPath << "\n"; + + CompilerKit::install_signal(SIGINT, dbgi_ctrlc_handler); + + kKernelDebugger.Attach(kPath, argv[4], kPID); + + while (YES) { + if (kKeepRunning) { + continue; + } + + std::string cmd; + if (!std::getline(std::cin, cmd)) break; + + if (cmd == "c" || cmd == "cont" || cmd == "continue") { + if (kKernelDebugger.Continue()) { + kKeepRunning = true; + + kStdOut << "[+] Continuing...\n"; + + pfd::notify("Debugger Event", "Continuing..."); + } + } + + if (cmd == "d" || cmd == "detach") kKernelDebugger.Detach(); + + if (cmd == "start") { + kStdOut << "[?] Enter a argument to use: "; + std::getline(std::cin, cmd); + + kKernelDebugger.Attach(kPath, cmd, kPID); + } + + if (cmd == "exit") { + if (kPID > 0) kKernelDebugger.Detach(); + + break; + } + + if (cmd == "break" || cmd == "b") { + kStdOut << "[?] Enter a symbol to break on: "; + + std::getline(std::cin, cmd); + + if (kKernelDebugger.BreakAt(cmd)) { + pfd::notify("Debugger Event", "Add BreakAt at: " + cmd); + } + } + } + + return EXIT_SUCCESS; + } + + kStdOut << "usage: " << argv[0] << " -k -ip \n"; + kStdOut << "example: " << argv[0] << " -k /path/to/ne_kernel -ip 127.0.0.1\n"; + + return EXIT_FAILURE; +} + +#endif // DK_NEKERNEL_DEBUGGER diff --git a/src/DebuggerKit/src/POSIXMachContractCLI.cc b/src/DebuggerKit/src/POSIXMachContractCLI.cc deleted file mode 100644 index dd1411c..0000000 --- a/src/DebuggerKit/src/POSIXMachContractCLI.cc +++ /dev/null @@ -1,96 +0,0 @@ -// Copyright 2024-2025, Amlal El Mahrouss (amlal@nekernel.org) -// Licensed under the Apache License, Version 2.0 (See accompanying -// file LICENSE or copy at http://www.apache.org/licenses/LICENSE-2.0) -// Official repository: https://github.com/nekernel-org/nectar - -#ifdef DK_MACH_DEBUGGER - -#include -#include -#include - -#ifdef DK_MACH_DEBUGGER -#include - -/// @internal -/// @brief Handles CTRL-C signal on debugger. -static void dbgi_ctrlc_handler(std::int32_t _) { - if (!kPID) { - return; - } - - kUserDebugger.Break(); - - pfd::notify("Debugger Event", "Breakpoint hit!"); - - kKeepRunning = false; -} - -NECTAR_MODULE(DebuggerMachPOSIX) { - pfd::notify( - "Debugger Event", - "Userland Debugger\n(C) 2025 Amlal El Mahrouss, licensed under the Apache 2.0 license."); - - if (argc >= 3 && std::string(argv[1]) == "-p" && argv[2] != nullptr) { - kPath = argv[2]; - kUserDebugger.SetPath(kPath); - - kStdOut << "[+] Image set to: " << kPath << "\n"; - } else { - kStdOut << "usage: " << argv[0] << " -p \n"; - kStdOut << "example: " << argv[0] << " -p \n"; - - return EXIT_FAILURE; - } - - CompilerKit::install_signal(SIGINT, dbgi_ctrlc_handler); - - while (YES) { - if (kKeepRunning) { - continue; - } - - std::string cmd; - if (!std::getline(std::cin, cmd)) break; - - if (cmd == "c" || cmd == "cont" || cmd == "continue") { - if (kUserDebugger.Continue()) { - kKeepRunning = true; - - kStdOut << "[+] Continuing...\n"; - - pfd::notify("Debugger Event", "Continuing..."); - } - } - - if (cmd == "d" || cmd == "detach") kUserDebugger.Detach(); - - if (cmd == "start") { - kStdOut << "[?] Enter a argument to use: "; - std::getline(std::cin, cmd); - - kUserDebugger.Attach(kPath, cmd, kPID); - } - - if (cmd == "exit") { - if (kPID > 0) kUserDebugger.Detach(); - - break; - } - - if (cmd == "break" || cmd == "b") { - kStdOut << "[?] Enter a symbol to break on: "; - - std::getline(std::cin, cmd); - - if (kUserDebugger.BreakAt(cmd)) { - pfd::notify("Debugger Event", "Add BreakAt at: " + cmd); - } - } - } - - return EXIT_SUCCESS; -} -#endif - -#endif diff --git a/src/DebuggerKit/src/POSIXMachContractCLI.cpp b/src/DebuggerKit/src/POSIXMachContractCLI.cpp new file mode 100644 index 0000000..dd1411c --- /dev/null +++ b/src/DebuggerKit/src/POSIXMachContractCLI.cpp @@ -0,0 +1,96 @@ +// Copyright 2024-2025, Amlal El Mahrouss (amlal@nekernel.org) +// Licensed under the Apache License, Version 2.0 (See accompanying +// file LICENSE or copy at http://www.apache.org/licenses/LICENSE-2.0) +// Official repository: https://github.com/nekernel-org/nectar + +#ifdef DK_MACH_DEBUGGER + +#include +#include +#include + +#ifdef DK_MACH_DEBUGGER +#include + +/// @internal +/// @brief Handles CTRL-C signal on debugger. +static void dbgi_ctrlc_handler(std::int32_t _) { + if (!kPID) { + return; + } + + kUserDebugger.Break(); + + pfd::notify("Debugger Event", "Breakpoint hit!"); + + kKeepRunning = false; +} + +NECTAR_MODULE(DebuggerMachPOSIX) { + pfd::notify( + "Debugger Event", + "Userland Debugger\n(C) 2025 Amlal El Mahrouss, licensed under the Apache 2.0 license."); + + if (argc >= 3 && std::string(argv[1]) == "-p" && argv[2] != nullptr) { + kPath = argv[2]; + kUserDebugger.SetPath(kPath); + + kStdOut << "[+] Image set to: " << kPath << "\n"; + } else { + kStdOut << "usage: " << argv[0] << " -p \n"; + kStdOut << "example: " << argv[0] << " -p \n"; + + return EXIT_FAILURE; + } + + CompilerKit::install_signal(SIGINT, dbgi_ctrlc_handler); + + while (YES) { + if (kKeepRunning) { + continue; + } + + std::string cmd; + if (!std::getline(std::cin, cmd)) break; + + if (cmd == "c" || cmd == "cont" || cmd == "continue") { + if (kUserDebugger.Continue()) { + kKeepRunning = true; + + kStdOut << "[+] Continuing...\n"; + + pfd::notify("Debugger Event", "Continuing..."); + } + } + + if (cmd == "d" || cmd == "detach") kUserDebugger.Detach(); + + if (cmd == "start") { + kStdOut << "[?] Enter a argument to use: "; + std::getline(std::cin, cmd); + + kUserDebugger.Attach(kPath, cmd, kPID); + } + + if (cmd == "exit") { + if (kPID > 0) kUserDebugger.Detach(); + + break; + } + + if (cmd == "break" || cmd == "b") { + kStdOut << "[?] Enter a symbol to break on: "; + + std::getline(std::cin, cmd); + + if (kUserDebugger.BreakAt(cmd)) { + pfd::notify("Debugger Event", "Add BreakAt at: " + cmd); + } + } + } + + return EXIT_SUCCESS; +} +#endif + +#endif -- cgit v1.2.3