From 3b30e6eaabd1f62c71acb4c50df3395231ef26d5 Mon Sep 17 00:00:00 2001 From: Amlal El Mahrouss Date: Mon, 14 Oct 2024 14:27:40 +0200 Subject: IMP: Reverted changes, remove camel_2_snake.py Signed-off-by: Amlal El Mahrouss --- camel_2_snake.py | 219 ------------------------------------------------------- run_format.sh | 3 - 2 files changed, 222 deletions(-) delete mode 100755 camel_2_snake.py diff --git a/camel_2_snake.py b/camel_2_snake.py deleted file mode 100755 index 5a652324..00000000 --- a/camel_2_snake.py +++ /dev/null @@ -1,219 +0,0 @@ -#!/usr/bin/env python3 - -# This script changes variables names in C++ files from -# camelCase style to snake_case style, with corresponding -# naming conventions observed (e.g. "bIsFoo" => "is_foo") -# -# -# Limitations caused by lack of syntax awareness: -# a. despite best efforts (see besteffort_* below), in corner cases it leaves some -# variables initialized by ()-style expression untouched if the variable does -# not end with an underscore, as it thinks the variable is a function: -# what it does: variableName(initArgument) => variableName(init_argument) -# it should do: variableName(initArgument) => variable_name(init_argument) -# b. it replaces function name if it is used as a pointer, as it thinks the function -# is a variable: -# what it does: int (*funcPtr)(int, int) = &addInts; => int (*func_ptr)(int, int) = &add_ints; -# it should do: int (*funcPtr)(int, int) = &addInts; => int (*func_ptr)(int, int) = &addInts; -# c. it does not check name collision: -# what it does: bool isGreek, bIsGreek; => bool is_greek, is_greek; -# it should do: bool isGreek, bIsGreek; => bool is_greek, is_greek_2; - -import os, sys -import re -import argparse - -REGEX_PIECE_1 = r"(\A|(?<=\W))([a-jl-z]|[a-z]{2,})([A-Z][a-z]*|[0-9]+)+_?(?=[^\w\(]|$)" -REGEX_PIECE_2 = r"(\A|(?<=\W))([a-jl-z]|[a-z]{2,})([A-Z][a-z]*|[0-9]+)+_(?=\()" -REGEX_PIECE_IN_CTOR_INIT = r"(\A|(?<=\)\s:\s|\S\),\s)|(?<=\A:\s|\s\s))[a-z]+([A-Z][a-z]*|[0-9]+)+(?=\()" -DROMEDARY_CAMEL_CASE_VAR = re.compile(REGEX_PIECE_1 + "|" + REGEX_PIECE_2) -DROMEDARY_CAMEL_CASE_VAR_IN_CTOR_INIT = re.compile( - REGEX_PIECE_1 + "|" + REGEX_PIECE_2 + "|" + REGEX_PIECE_IN_CTOR_INIT) - -BOOLEAN_PREFIXES = [ - "is", "are", "was", "were", - "has", "have", "had", - "does", "do", "did", "done", - "find", "found", "get", "got" -] -COMMON_ABBREVIATIONS = { - # NOTE no "obj", "num", "it", "iter", "var", "src", "dest", - # "ret", "init", "ptr", "op", "db" - "res" : "result", "buf" : "buffer", "vec" : "vector", "msg" : "message", - "seq" : "sequence", "cnt" : "count", "mem" : "memory", "val" : "value", - "loc" : "location", "ans" : "answer", "ctx" : "context", "elem" : "element", - "ty" : "type", -} - -CAMEL_CASE_PIECE_REGEX = re.compile(r"[A-Z]?[a-z]+|[A-Z]+(?=[A-Z]|[0-9]|_|$)|[0-9]+|\_") -def compute_snake_case(camel_case, testing=False): - splitted_words = CAMEL_CASE_PIECE_REGEX.findall(camel_case) - if testing: - print("%-15s => %s" % (camel_case, splitted_words)) - splitted_words = list(map(lambda w : w.lower(), splitted_words)) - ends_with_underscore = False - if splitted_words[-1] == '_': - splitted_words = splitted_words[:-1] - ends_with_underscore = True - assert len(splitted_words) >= 2 - # special rules in conversion: observe naming conventions for snake_case - had_hungarian_prefix = False - if ((splitted_words[0] == "p" or splitted_words[0] == "m" - or splitted_words[0] == "n" or splitted_words[0] == "f") - and splitted_words[1].isalpha()): - had_hungarian_prefix = True - splitted_words = splitted_words[1:] - if splitted_words[0] == "b" and splitted_words[1].isalpha(): - if splitted_words[1] in BOOLEAN_PREFIXES: - splitted_words = splitted_words[1:] - else: - splitted_words = [ "is" ] + splitted_words[1:] - if splitted_words[0] == "it": - splitted_words = [ "iter" ] + splitted_words[1:] - if splitted_words[-1] == "num": - splitted_words = splitted_words[:-1] + [ "number" ] - splitted_words = [ COMMON_ABBREVIATIONS.get(w, w) for w in splitted_words ] - # make snake_case - if not ((len(splitted_words) >= 1 if had_hungarian_prefix else len(splitted_words) >= 2) - and splitted_words[0].isalpha()): - raise RuntimeError("'%s' => %s" % (camel_case, splitted_words)) - snake_case = '_'.join(splitted_words) + ('_' if ends_with_underscore else '') - return snake_case - -MAX_LOOP_STEPS = 16 # unlikely to have more than this number of camelCase variables in one line -def process_one_line(old_line, in_ctor_init_list=False, testing=False): - step_count, instance_count = 0, 0 - line = old_line - while True: - step_count += 1 - if step_count > MAX_LOOP_STEPS: - raise RuntimeError("maximum loop steps (%d) exceeded, line:\n%s" % ( - MAX_LOOP_STEPS, old_line)) - regex_obj = DROMEDARY_CAMEL_CASE_VAR - if in_ctor_init_list: - regex_obj = DROMEDARY_CAMEL_CASE_VAR_IN_CTOR_INIT - matchObj = re.search(regex_obj, line) - if not matchObj: - return line, instance_count - camel_case_var = matchObj.group(0) - camel_case_var_start, camel_case_var_end = matchObj.start(), matchObj.end() - instance_count += 1 - snake_case_var = compute_snake_case(camel_case_var, testing) - line = line[:camel_case_var_start] + snake_case_var + line[camel_case_var_end:] - -def process_one_file(filepath, handling_dir, echo, rewrite): - with open(filepath, 'r') as f: - raw_lines = f.readlines() - instance_count, new_lines = 0, [] - if echo: - print() # newline - # best effort of determing whether "foo(a)" is a ()-style initialization or a function call - besteffort_in_ctor_init_list = False - for i, raw_line in enumerate(raw_lines): - old_line = raw_line.rstrip() - ### check if in ctor initializer list (1) - besteffort_index_of_colon = old_line.find(": ") # -1 if absent - if besteffort_index_of_colon >= 0: - if ((besteffort_index_of_colon >= 2 - and old_line[besteffort_index_of_colon - 2:besteffort_index_of_colon] == ") " - and " ?" not in old_line[:besteffort_index_of_colon]) - or (i >= 1 and len(old_line[:besteffort_index_of_colon].strip()) == 0 - and raw_lines[i - 1].rstrip().endswith(")") - and (" ?" not in raw_lines[i - 1]))): - besteffort_in_ctor_init_list = True - ### essential works - new_line, instance_count_in_line = process_one_line( - old_line, besteffort_in_ctor_init_list) - if echo: - ctor_init_list_mark = "\x1b[48;5;237m" if besteffort_in_ctor_init_list else "\x1b[0m" - if instance_count_in_line > 0: - print("-|\x1b[0m" + ctor_init_list_mark - + "\x1b[35m" + old_line + "\x1b[0m") - print("+|\x1b[0m" + ctor_init_list_mark - + "\x1b[32m" + new_line + "\x1b[0m") - else: - print(" |\x1b[0m" + ctor_init_list_mark + old_line + "\x1b[0m") - new_lines.append(new_line) - instance_count += instance_count_in_line - ### check if in ctor initializer list (2) - if besteffort_in_ctor_init_list: - besteffort_index_of_open_brace = old_line.find(" {") # -1 if absent - if besteffort_index_of_open_brace >= 0: - besteffort_in_ctor_init_list = False - if rewrite and instance_count: - with open(filepath, 'w') as f: - f.write('\n'.join(new_lines) + "\n\n") - if not rewrite and not echo and not handling_dir: - sys.stderr.write("--- begin : %s ---\n" % filepath) - sys.stdout.write('\n'.join(new_lines) + "\n\n") - sys.stderr.write("--- end : %s ---\n" % filepath) - return instance_count - -def is_c_cxx(filename): - if (filename.endswith(".h") or filename.endswith(".cc") - or filename.endswith(".cpp") or filename.endswith(".c")): - return True - return False - -def work(args): - if args.test != None: - new_line, _ = process_one_line(args.test, testing=True) - print("\x1b[32;m" + new_line + "\x1b[0m") - return - files_to_read, handling_dir = [], False - if os.path.isfile(args.path): - files_to_read = [ args.path ] - else: - assert os.path.isdir(args.path) - handling_dir = True - for (dirpath, dirnames, filenames) in os.walk(args.path, followlinks=True): - for filename in filter(lambda f : is_c_cxx(f), filenames): - if (((os.sep + "test-inputs") in dirpath) - or ((os.sep + "third-party") in dirpath) - or ((os.sep + "linters") in dirpath)): - continue - files_to_read.append(os.path.join(dirpath, filename)) - processed_instances_sum = 0 - for filepath in files_to_read: - sys.stderr.write("%s .." % (filepath)) - sys.stderr.flush() - processed_instances = process_one_file( - filepath, handling_dir=handling_dir, echo=args.echo, rewrite=args.rewrite) - processed_instances_sum += processed_instances - sys.stderr.write("\r%s count: %d\n" % (filepath, processed_instances)) - sys.stderr.flush() - if not args.rewrite: - if handling_dir: - sys.stderr.write("file count: %d, instance count: %d\n" % ( - len(files_to_read), processed_instances_sum)) - sys.stderr.write("\nTo rewrite files, use '--rewrite'; to echo lines, use '--echo'\n") - return 0 - -def main(): - parser = argparse.ArgumentParser(description="C/C++ vairable name camelCase => snake_case") - parser.add_argument("path", nargs='?', default=None, - help="file or directory") - parser.add_argument("--rewrite", action="store_true", - help="rewrite visited C/C++ files [[caution advised]]") - parser.add_argument("-e", "--echo", action='store_true', - help="echo each line, before and after") - parser.add_argument("-t", "--test", metavar="\"..\"", type=str, default=None, - help="(dev) test one line in \"..\"") - args = parser.parse_args() - has_error = False - if args.test == None and args.path == None: - has_error = True - sys.stderr.write("[Error] you need to give argument 'path' or use option '--test'\n") - if args.test != None and (args.path != None or args.rewrite or args.echo): - has_error = True - sys.stderr.write("[Error] '--test \"..\"' can only be used alone, but you gave something else too\n") - if args.test == None and (args.rewrite or args.echo) and args.path == None: - has_error = True - sys.stderr.write("[Error] you need to give the path argument while using '--rewrite' or '--echo'\n") - if args.path != None and not os.path.exists(args.path): - has_error = True - sys.stderr.write("[Error] not found: %s\n" % args.path) - return 1 if has_error else work(args) - -if __name__ == "__main__": - sys.exit(main()) diff --git a/run_format.sh b/run_format.sh index 7e7189a0..bd8dc9bf 100755 --- a/run_format.sh +++ b/run_format.sh @@ -15,6 +15,3 @@ echo -e "Files found to format = \n\"\"\"\n$FILE_LIST\n\"\"\"" # of as a new-line separated list of *many* file names! clang-format --verbose -i --style=file $FILE_LIST -foreach i ( $FILE_LIST ) - ./camel_2_snake.py i -end -- cgit v1.2.3