summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorAmlal El Mahrouss <amlal.elmahrouss@icloud.com>2024-10-14 14:18:44 +0200
committerAmlal El Mahrouss <amlal.elmahrouss@icloud.com>2024-10-14 14:18:44 +0200
commitd24165ef3361e00036a96ddb2ee706a7e61d3def (patch)
treeef65cbb691bc2de762a7f02855c1795b5837d2ab
parent5d09dd3d4cb414fa0ab86bc05c743fb224121287 (diff)
IMP: Add Python script.
Signed-off-by: Amlal El Mahrouss <amlal.elmahrouss@icloud.com>
-rwxr-xr-xcamel_2_snake.py219
-rwxr-xr-xrun_format.sh4
2 files changed, 223 insertions, 0 deletions
diff --git a/camel_2_snake.py b/camel_2_snake.py
new file mode 100755
index 00000000..5a652324
--- /dev/null
+++ b/camel_2_snake.py
@@ -0,0 +1,219 @@
+#!/usr/bin/env python3
+
+# This script changes variables names in C++ files from
+# camelCase style to snake_case style, with corresponding
+# naming conventions observed (e.g. "bIsFoo" => "is_foo")
+#
+#
+# Limitations caused by lack of syntax awareness:
+# a. despite best efforts (see besteffort_* below), in corner cases it leaves some
+# variables initialized by ()-style expression untouched if the variable does
+# not end with an underscore, as it thinks the variable is a function:
+# what it does: variableName(initArgument) => variableName(init_argument)
+# it should do: variableName(initArgument) => variable_name(init_argument)
+# b. it replaces function name if it is used as a pointer, as it thinks the function
+# is a variable:
+# what it does: int (*funcPtr)(int, int) = &addInts; => int (*func_ptr)(int, int) = &add_ints;
+# it should do: int (*funcPtr)(int, int) = &addInts; => int (*func_ptr)(int, int) = &addInts;
+# c. it does not check name collision:
+# what it does: bool isGreek, bIsGreek; => bool is_greek, is_greek;
+# it should do: bool isGreek, bIsGreek; => bool is_greek, is_greek_2;
+
+import os, sys
+import re
+import argparse
+
+REGEX_PIECE_1 = r"(\A|(?<=\W))([a-jl-z]|[a-z]{2,})([A-Z][a-z]*|[0-9]+)+_?(?=[^\w\(]|$)"
+REGEX_PIECE_2 = r"(\A|(?<=\W))([a-jl-z]|[a-z]{2,})([A-Z][a-z]*|[0-9]+)+_(?=\()"
+REGEX_PIECE_IN_CTOR_INIT = r"(\A|(?<=\)\s:\s|\S\),\s)|(?<=\A:\s|\s\s))[a-z]+([A-Z][a-z]*|[0-9]+)+(?=\()"
+DROMEDARY_CAMEL_CASE_VAR = re.compile(REGEX_PIECE_1 + "|" + REGEX_PIECE_2)
+DROMEDARY_CAMEL_CASE_VAR_IN_CTOR_INIT = re.compile(
+ REGEX_PIECE_1 + "|" + REGEX_PIECE_2 + "|" + REGEX_PIECE_IN_CTOR_INIT)
+
+BOOLEAN_PREFIXES = [
+ "is", "are", "was", "were",
+ "has", "have", "had",
+ "does", "do", "did", "done",
+ "find", "found", "get", "got"
+]
+COMMON_ABBREVIATIONS = {
+ # NOTE no "obj", "num", "it", "iter", "var", "src", "dest",
+ # "ret", "init", "ptr", "op", "db"
+ "res" : "result", "buf" : "buffer", "vec" : "vector", "msg" : "message",
+ "seq" : "sequence", "cnt" : "count", "mem" : "memory", "val" : "value",
+ "loc" : "location", "ans" : "answer", "ctx" : "context", "elem" : "element",
+ "ty" : "type",
+}
+
+CAMEL_CASE_PIECE_REGEX = re.compile(r"[A-Z]?[a-z]+|[A-Z]+(?=[A-Z]|[0-9]|_|$)|[0-9]+|\_")
+def compute_snake_case(camel_case, testing=False):
+ splitted_words = CAMEL_CASE_PIECE_REGEX.findall(camel_case)
+ if testing:
+ print("%-15s => %s" % (camel_case, splitted_words))
+ splitted_words = list(map(lambda w : w.lower(), splitted_words))
+ ends_with_underscore = False
+ if splitted_words[-1] == '_':
+ splitted_words = splitted_words[:-1]
+ ends_with_underscore = True
+ assert len(splitted_words) >= 2
+ # special rules in conversion: observe naming conventions for snake_case
+ had_hungarian_prefix = False
+ if ((splitted_words[0] == "p" or splitted_words[0] == "m"
+ or splitted_words[0] == "n" or splitted_words[0] == "f")
+ and splitted_words[1].isalpha()):
+ had_hungarian_prefix = True
+ splitted_words = splitted_words[1:]
+ if splitted_words[0] == "b" and splitted_words[1].isalpha():
+ if splitted_words[1] in BOOLEAN_PREFIXES:
+ splitted_words = splitted_words[1:]
+ else:
+ splitted_words = [ "is" ] + splitted_words[1:]
+ if splitted_words[0] == "it":
+ splitted_words = [ "iter" ] + splitted_words[1:]
+ if splitted_words[-1] == "num":
+ splitted_words = splitted_words[:-1] + [ "number" ]
+ splitted_words = [ COMMON_ABBREVIATIONS.get(w, w) for w in splitted_words ]
+ # make snake_case
+ if not ((len(splitted_words) >= 1 if had_hungarian_prefix else len(splitted_words) >= 2)
+ and splitted_words[0].isalpha()):
+ raise RuntimeError("'%s' => %s" % (camel_case, splitted_words))
+ snake_case = '_'.join(splitted_words) + ('_' if ends_with_underscore else '')
+ return snake_case
+
+MAX_LOOP_STEPS = 16 # unlikely to have more than this number of camelCase variables in one line
+def process_one_line(old_line, in_ctor_init_list=False, testing=False):
+ step_count, instance_count = 0, 0
+ line = old_line
+ while True:
+ step_count += 1
+ if step_count > MAX_LOOP_STEPS:
+ raise RuntimeError("maximum loop steps (%d) exceeded, line:\n%s" % (
+ MAX_LOOP_STEPS, old_line))
+ regex_obj = DROMEDARY_CAMEL_CASE_VAR
+ if in_ctor_init_list:
+ regex_obj = DROMEDARY_CAMEL_CASE_VAR_IN_CTOR_INIT
+ matchObj = re.search(regex_obj, line)
+ if not matchObj:
+ return line, instance_count
+ camel_case_var = matchObj.group(0)
+ camel_case_var_start, camel_case_var_end = matchObj.start(), matchObj.end()
+ instance_count += 1
+ snake_case_var = compute_snake_case(camel_case_var, testing)
+ line = line[:camel_case_var_start] + snake_case_var + line[camel_case_var_end:]
+
+def process_one_file(filepath, handling_dir, echo, rewrite):
+ with open(filepath, 'r') as f:
+ raw_lines = f.readlines()
+ instance_count, new_lines = 0, []
+ if echo:
+ print() # newline
+ # best effort of determing whether "foo(a)" is a ()-style initialization or a function call
+ besteffort_in_ctor_init_list = False
+ for i, raw_line in enumerate(raw_lines):
+ old_line = raw_line.rstrip()
+ ### check if in ctor initializer list (1)
+ besteffort_index_of_colon = old_line.find(": ") # -1 if absent
+ if besteffort_index_of_colon >= 0:
+ if ((besteffort_index_of_colon >= 2
+ and old_line[besteffort_index_of_colon - 2:besteffort_index_of_colon] == ") "
+ and " ?" not in old_line[:besteffort_index_of_colon])
+ or (i >= 1 and len(old_line[:besteffort_index_of_colon].strip()) == 0
+ and raw_lines[i - 1].rstrip().endswith(")")
+ and (" ?" not in raw_lines[i - 1]))):
+ besteffort_in_ctor_init_list = True
+ ### essential works
+ new_line, instance_count_in_line = process_one_line(
+ old_line, besteffort_in_ctor_init_list)
+ if echo:
+ ctor_init_list_mark = "\x1b[48;5;237m" if besteffort_in_ctor_init_list else "\x1b[0m"
+ if instance_count_in_line > 0:
+ print("-|\x1b[0m" + ctor_init_list_mark
+ + "\x1b[35m" + old_line + "\x1b[0m")
+ print("+|\x1b[0m" + ctor_init_list_mark
+ + "\x1b[32m" + new_line + "\x1b[0m")
+ else:
+ print(" |\x1b[0m" + ctor_init_list_mark + old_line + "\x1b[0m")
+ new_lines.append(new_line)
+ instance_count += instance_count_in_line
+ ### check if in ctor initializer list (2)
+ if besteffort_in_ctor_init_list:
+ besteffort_index_of_open_brace = old_line.find(" {") # -1 if absent
+ if besteffort_index_of_open_brace >= 0:
+ besteffort_in_ctor_init_list = False
+ if rewrite and instance_count:
+ with open(filepath, 'w') as f:
+ f.write('\n'.join(new_lines) + "\n\n")
+ if not rewrite and not echo and not handling_dir:
+ sys.stderr.write("--- begin : %s ---\n" % filepath)
+ sys.stdout.write('\n'.join(new_lines) + "\n\n")
+ sys.stderr.write("--- end : %s ---\n" % filepath)
+ return instance_count
+
+def is_c_cxx(filename):
+ if (filename.endswith(".h") or filename.endswith(".cc")
+ or filename.endswith(".cpp") or filename.endswith(".c")):
+ return True
+ return False
+
+def work(args):
+ if args.test != None:
+ new_line, _ = process_one_line(args.test, testing=True)
+ print("\x1b[32;m" + new_line + "\x1b[0m")
+ return
+ files_to_read, handling_dir = [], False
+ if os.path.isfile(args.path):
+ files_to_read = [ args.path ]
+ else:
+ assert os.path.isdir(args.path)
+ handling_dir = True
+ for (dirpath, dirnames, filenames) in os.walk(args.path, followlinks=True):
+ for filename in filter(lambda f : is_c_cxx(f), filenames):
+ if (((os.sep + "test-inputs") in dirpath)
+ or ((os.sep + "third-party") in dirpath)
+ or ((os.sep + "linters") in dirpath)):
+ continue
+ files_to_read.append(os.path.join(dirpath, filename))
+ processed_instances_sum = 0
+ for filepath in files_to_read:
+ sys.stderr.write("%s .." % (filepath))
+ sys.stderr.flush()
+ processed_instances = process_one_file(
+ filepath, handling_dir=handling_dir, echo=args.echo, rewrite=args.rewrite)
+ processed_instances_sum += processed_instances
+ sys.stderr.write("\r%s count: %d\n" % (filepath, processed_instances))
+ sys.stderr.flush()
+ if not args.rewrite:
+ if handling_dir:
+ sys.stderr.write("file count: %d, instance count: %d\n" % (
+ len(files_to_read), processed_instances_sum))
+ sys.stderr.write("\nTo rewrite files, use '--rewrite'; to echo lines, use '--echo'\n")
+ return 0
+
+def main():
+ parser = argparse.ArgumentParser(description="C/C++ vairable name camelCase => snake_case")
+ parser.add_argument("path", nargs='?', default=None,
+ help="file or directory")
+ parser.add_argument("--rewrite", action="store_true",
+ help="rewrite visited C/C++ files [[caution advised]]")
+ parser.add_argument("-e", "--echo", action='store_true',
+ help="echo each line, before and after")
+ parser.add_argument("-t", "--test", metavar="\"..\"", type=str, default=None,
+ help="(dev) test one line in \"..\"")
+ args = parser.parse_args()
+ has_error = False
+ if args.test == None and args.path == None:
+ has_error = True
+ sys.stderr.write("[Error] you need to give argument 'path' or use option '--test'\n")
+ if args.test != None and (args.path != None or args.rewrite or args.echo):
+ has_error = True
+ sys.stderr.write("[Error] '--test \"..\"' can only be used alone, but you gave something else too\n")
+ if args.test == None and (args.rewrite or args.echo) and args.path == None:
+ has_error = True
+ sys.stderr.write("[Error] you need to give the path argument while using '--rewrite' or '--echo'\n")
+ if args.path != None and not os.path.exists(args.path):
+ has_error = True
+ sys.stderr.write("[Error] not found: %s\n" % args.path)
+ return 1 if has_error else work(args)
+
+if __name__ == "__main__":
+ sys.exit(main())
diff --git a/run_format.sh b/run_format.sh
index 9385aed6..7e7189a0 100755
--- a/run_format.sh
+++ b/run_format.sh
@@ -14,3 +14,7 @@ echo -e "Files found to format = \n\"\"\"\n$FILE_LIST\n\"\"\""
# mistakenly see the entire blob of newline-separated file names as a SINGLE file name instead
# of as a new-line separated list of *many* file names!
clang-format --verbose -i --style=file $FILE_LIST
+
+foreach i ( $FILE_LIST )
+ ./camel_2_snake.py i
+end