summaryrefslogtreecommitdiff
path: root/compiler/StringUtils.ml
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/StringUtils.ml')
-rw-r--r--compiler/StringUtils.ml106
1 files changed, 106 insertions, 0 deletions
diff --git a/compiler/StringUtils.ml b/compiler/StringUtils.ml
new file mode 100644
index 00000000..0fd46136
--- /dev/null
+++ b/compiler/StringUtils.ml
@@ -0,0 +1,106 @@
+(** Utilities to work on strings, character per character.
+
+ They operate on ASCII strings, and are used by the project to convert
+ Rust names: Rust names are not fancy, so it shouldn't be a problem.
+
+ Rk.: the poor support of OCaml for char manipulation is really annoying...
+ *)
+
+let code_0 = 48
+let code_9 = 57
+let code_A = 65
+let code_Z = 90
+let code_a = 97
+let code_z = 122
+
+let is_lowercase_ascii (c : char) : bool =
+ let c = Char.code c in
+ code_a <= c && c <= code_z
+
+let is_uppercase_ascii (c : char) : bool =
+ let c = Char.code c in
+ code_A <= c && c <= code_Z
+
+let is_letter_ascii (c : char) : bool =
+ is_lowercase_ascii c || is_uppercase_ascii c
+
+let is_digit_ascii (c : char) : bool =
+ let c = Char.code c in
+ code_0 <= c && c <= code_9
+
+let lowercase_ascii = Char.lowercase_ascii
+let uppercase_ascii = Char.uppercase_ascii
+
+(** Using buffers as per:
+ {{: https://stackoverflow.com/questions/29957418/how-to-convert-char-list-to-string-in-ocaml} stackoverflow}
+ *)
+let string_of_chars (chars : char list) : string =
+ let buf = Buffer.create (List.length chars) in
+ List.iter (Buffer.add_char buf) chars;
+ Buffer.contents buf
+
+let string_to_chars (s : string) : char list =
+ let length = String.length s in
+ let rec apply i =
+ if i = length then [] else String.get s i :: apply (i + 1)
+ in
+ apply 0
+
+(** This operates on ASCII *)
+let to_camel_case (s : string) : string =
+ (* Note that we rebuild the string in reverse order *)
+ let apply ((prev_is_under, acc) : bool * char list) (c : char) :
+ bool * char list =
+ if c = '_' then (true, acc)
+ else
+ let c = if prev_is_under then uppercase_ascii c else c in
+ (false, c :: acc)
+ in
+ let _, chars = List.fold_left apply (true, []) (string_to_chars s) in
+ string_of_chars (List.rev chars)
+
+(** This operates on ASCII *)
+let to_snake_case (s : string) : string =
+ (* Note that we rebuild the string in reverse order *)
+ let apply ((prev_is_low, prev_is_digit, acc) : bool * bool * char list)
+ (c : char) : bool * bool * char list =
+ let acc =
+ if c = '_' then acc
+ else if prev_is_digit then if is_letter_ascii c then '_' :: acc else acc
+ else if prev_is_low then
+ if (is_lowercase_ascii c || is_digit_ascii c) && c <> '_' then acc
+ else '_' :: acc
+ else acc
+ in
+ let prev_is_low = is_lowercase_ascii c in
+ let prev_is_digit = is_digit_ascii c in
+ let c = lowercase_ascii c in
+ (prev_is_low, prev_is_digit, c :: acc)
+ in
+ let _, _, chars =
+ List.fold_left apply (false, false, []) (string_to_chars s)
+ in
+ string_of_chars (List.rev chars)
+
+(** Applies a map operation.
+
+ This is very inefficient, but shouldn't be used much.
+ *)
+let map (f : char -> string) (s : string) : string =
+ let sl = List.map f (string_to_chars s) in
+ let sl = List.map string_to_chars sl in
+ string_of_chars (List.concat sl)
+
+let capitalize_first_letter (s : string) : string =
+ let s = string_to_chars s in
+ let s = match s with [] -> s | c :: s' -> uppercase_ascii c :: s' in
+ string_of_chars s
+
+(** Unit tests *)
+let _ =
+ assert (to_camel_case "hello_world" = "HelloWorld");
+ assert (to_snake_case "HelloWorld36Hello" = "hello_world36_hello");
+ assert (to_snake_case "HELLO" = "hello");
+ assert (to_snake_case "T1" = "t1");
+ assert (to_camel_case "list" = "List");
+ assert (to_snake_case "is_cons" = "is_cons")