From d36c5d1d0dc98a9e5fd1cfadd1981a5b99ff7d8d Mon Sep 17 00:00:00 2001 From: Son Ho Date: Sat, 29 Jan 2022 21:23:05 +0100 Subject: Start working on StringUtils --- src/ExtractToFStar.ml | 17 +++++++++ src/StringUtils.ml | 96 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 113 insertions(+) create mode 100644 src/StringUtils.ml diff --git a/src/ExtractToFStar.ml b/src/ExtractToFStar.ml index d0726ac7..bb4054c2 100644 --- a/src/ExtractToFStar.ml +++ b/src/ExtractToFStar.ml @@ -4,8 +4,25 @@ open Errors open Pure open TranslateCore open PureToExtract +open StringUtils module F = Format +(* +let mk_name_formatter = +{ + bool_name : string; + char_name : string; + int_name : integer_type -> string; + str_name : string; + field_name : name -> string option -> string; + variant_name : name -> string -> string; + type_name : name -> string; + fun_name : A.fun_id -> name -> int -> region_group_info option -> string; + var_basename : StringSet.t -> ty -> string; + type_var_basename : StringSet.t -> string; + append_index : string -> int -> string; +}*) + (** [inside] constrols whether we should add parentheses or not around type application (if `true` we add parentheses). *) diff --git a/src/StringUtils.ml b/src/StringUtils.ml new file mode 100644 index 00000000..6089efc8 --- /dev/null +++ b/src/StringUtils.ml @@ -0,0 +1,96 @@ +(** Utilities to work on strings, character per character. + + They operate on ASCII strings, and are used by the project to convert + Rust names: Rust names are not fancy, so it shouldn't be a problem. + + Rk.: the poor support of OCaml for char manipulation is really annoying... + *) + +let code_0 = 48 + +let code_9 = 57 + +let code_A = 65 + +let code_Z = 90 + +let code_a = 97 + +let code_z = 122 + +let is_lowercase_ascii (c : char) : bool = + let c = Char.code c in + code_a <= c && c <= code_z + +let is_uppercase_ascii (c : char) : bool = + let c = Char.code c in + code_A <= c && c <= code_Z + +let is_letter_ascii (c : char) : bool = + is_lowercase_ascii c || is_uppercase_ascii c + +let is_digit_ascii (c : char) : bool = + let c = Char.code c in + code_0 <= c && c <= code_9 + +let lowercase_ascii = Char.lowercase_ascii + +let uppercase_ascii = Char.uppercase_ascii + +(** Using buffers as per: + [https://stackoverflow.com/questions/29957418/how-to-convert-char-list-to-string-in-ocaml] + *) +let string_of_chars (chars : char list) : string = + let buf = Buffer.create (List.length chars) in + List.iter (Buffer.add_char buf) chars; + Buffer.contents buf + +let string_to_chars (s : string) : char list = + let length = String.length s in + let rec apply i = + if i = length then [] else String.get s i :: apply (i + 1) + in + apply 0 + +(** This operates on ASCII *) +let to_camel_case (s : string) : string = + (* There are no [fold_left_map] in [String]... *) + let mk_upper = ref true in + (* TODO: remove '_' *) + let apply (c : char) : char = + if !mk_upper then ( + mk_upper := c = '_'; + uppercase_ascii c) + else ( + mk_upper := c = '_'; + c) + in + String.map apply s + +(** This operates on ASCII *) +let to_snake_case (s : string) : string = + (* Note that we rebuild the string in invert order *) + let apply ((prev_is_low, prev_is_digit, acc) : bool * bool * char list) + (c : char) : bool * bool * char list = + let acc = + if prev_is_digit then if is_letter_ascii c then '_' :: acc else acc + else if prev_is_low then + if is_lowercase_ascii c || is_digit_ascii c then acc else '_' :: acc + else acc + in + let prev_is_low = is_lowercase_ascii c in + let prev_is_digit = is_digit_ascii c in + let c = lowercase_ascii c in + (prev_is_low, prev_is_digit, c :: acc) + in + let _, _, chars = + List.fold_left apply (false, false, []) (string_to_chars s) + in + string_of_chars (List.rev chars) + +(** Unit tests *) +let _ = + assert (is_digit_ascii '3'); + assert (is_digit_ascii '6'); + assert (to_snake_case "HelloWorld36Hello" = "hello_world36_hello"); + assert (to_snake_case "HELLO" = "hello") -- cgit v1.2.3