summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gleam.toml1
-rw-r--r--manifest.toml11
-rw-r--r--src/bahnhofname.gleam209
3 files changed, 136 insertions, 85 deletions
diff --git a/gleam.toml b/gleam.toml
index 63ee397..22c7545 100644
--- a/gleam.toml
+++ b/gleam.toml
@@ -14,6 +14,7 @@ gleam_stdlib = "~> 0.19"
gleam_http = "~> 3.0"
mist = "~> 0.4"
gleam_hackney = "~> 1.0"
+haystack = "~> 0.1"
[dev-dependencies]
gleeunit = "~> 0.6"
diff --git a/manifest.toml b/manifest.toml
index 9627978..e5b8c42 100644
--- a/manifest.toml
+++ b/manifest.toml
@@ -3,20 +3,24 @@
packages = [
{ name = "certifi", version = "2.9.0", build_tools = ["rebar3"], requirements = [], otp_app = "certifi", source = "hex", outer_checksum = "266DA46BDB06D6C6D35FDE799BCB28D36D985D424AD7C08B5BB48F5B5CDD4641" },
+ { name = "decimal", version = "2.1.1", build_tools = ["mix"], requirements = [], otp_app = "decimal", source = "hex", outer_checksum = "53CFE5F497ED0E7771AE1A475575603D77425099BA5FAEF9394932B35020FFCC" },
{ name = "gleam_erlang", version = "0.18.1", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleam_erlang", source = "hex", outer_checksum = "C69F59D086AD50B80DE294FB0963550630971C9DC04E92B1F7AEEDD2C0BE226C" },
{ name = "gleam_hackney", version = "1.0.0", build_tools = ["gleam"], requirements = ["gleam_http", "gleam_stdlib", "hackney"], otp_app = "gleam_hackney", source = "hex", outer_checksum = "B3C1E6BD138D57252F9F9E499C741E9227EE7EE9B017CA650EC8193E02F734E1" },
{ name = "gleam_http", version = "3.2.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleam_http", source = "hex", outer_checksum = "D034F5CE0639CD142CBA210B7D5D14236C284B0C5772A043D2E22128594573AE" },
{ name = "gleam_otp", version = "0.5.3", build_tools = ["gleam"], requirements = ["gleam_erlang", "gleam_stdlib"], otp_app = "gleam_otp", source = "hex", outer_checksum = "6E705B69464237353E0380AC8143BDB29A3F0BF6168755D5F2D6E55A34A8B077" },
{ name = "gleam_stdlib", version = "0.28.1", build_tools = ["gleam"], requirements = [], otp_app = "gleam_stdlib", source = "hex", outer_checksum = "73F0A89FADE5022CBEF6D6C3551F9ADCE7054AFCE0CB1DC4C6D5AB4CA62D0111" },
{ name = "gleeunit", version = "0.10.1", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleeunit", source = "hex", outer_checksum = "ECEA2DE4BE6528D36AFE74F42A21CDF99966EC36D7F25DEB34D47DD0F7977BAF" },
- { name = "glisten", version = "0.7.0", build_tools = ["gleam"], requirements = ["gleam_erlang", "gleam_stdlib", "gleam_otp"], otp_app = "glisten", source = "hex", outer_checksum = "52B530FF25370590843998D1B6C4EC6169DB1300D5E4407A5CDA1575374B7AEC" },
- { name = "hackney", version = "1.18.1", build_tools = ["rebar3"], requirements = ["certifi", "metrics", "mimerl", "ssl_verify_fun", "idna", "parse_trans", "unicode_util_compat"], otp_app = "hackney", source = "hex", outer_checksum = "A4ECDAFF44297E9B5894AE499E9A070EA1888C84AFDD1FD9B7B2BC384950128E" },
+ { name = "glisten", version = "0.7.0", build_tools = ["gleam"], requirements = ["gleam_erlang", "gleam_otp", "gleam_stdlib"], otp_app = "glisten", source = "hex", outer_checksum = "52B530FF25370590843998D1B6C4EC6169DB1300D5E4407A5CDA1575374B7AEC" },
+ { name = "hackney", version = "1.18.1", build_tools = ["rebar3"], requirements = ["idna", "metrics", "parse_trans", "ssl_verify_fun", "mimerl", "certifi", "unicode_util_compat"], otp_app = "hackney", source = "hex", outer_checksum = "A4ECDAFF44297E9B5894AE499E9A070EA1888C84AFDD1FD9B7B2BC384950128E" },
+ { name = "haystack", version = "0.1.0", build_tools = ["mix"], requirements = ["jason", "stemmer"], otp_app = "haystack", source = "hex", outer_checksum = "27A582513EF933C1B11345B96F8D41EE137D03B25312BD85068FFE8FEC503635" },
{ name = "idna", version = "6.1.1", build_tools = ["rebar3"], requirements = ["unicode_util_compat"], otp_app = "idna", source = "hex", outer_checksum = "92376EB7894412ED19AC475E4A86F7B413C1B9FBB5BD16DCCD57934157944CEA" },
+ { name = "jason", version = "1.4.0", build_tools = ["mix"], requirements = ["decimal"], otp_app = "jason", source = "hex", outer_checksum = "79A3791085B2A0F743CA04CEC0F7BE26443738779D09302E01318F97BDB82121" },
{ name = "metrics", version = "1.0.1", build_tools = ["rebar3"], requirements = [], otp_app = "metrics", source = "hex", outer_checksum = "69B09ADDDC4F74A40716AE54D140F93BEB0FB8978D8636EADED0C31B6F099F16" },
{ name = "mimerl", version = "1.2.0", build_tools = ["rebar3"], requirements = [], otp_app = "mimerl", source = "hex", outer_checksum = "F278585650AA581986264638EBF698F8BB19DF297F66AD91B18910DFC6E19323" },
- { name = "mist", version = "0.10.0", build_tools = ["gleam"], requirements = ["gleam_erlang", "gleam_stdlib", "gleam_otp", "glisten", "gleam_http"], otp_app = "mist", source = "hex", outer_checksum = "5AFBABABF738BAB8720F047471051E4E9D102CA4694C120DB899FA12AD5D180B" },
+ { name = "mist", version = "0.10.0", build_tools = ["gleam"], requirements = ["gleam_stdlib", "gleam_otp", "glisten", "gleam_http", "gleam_erlang"], otp_app = "mist", source = "hex", outer_checksum = "5AFBABABF738BAB8720F047471051E4E9D102CA4694C120DB899FA12AD5D180B" },
{ name = "parse_trans", version = "3.3.1", build_tools = ["rebar3"], requirements = [], otp_app = "parse_trans", source = "hex", outer_checksum = "07CD9577885F56362D414E8C4C4E6BDF10D43A8767ABB92D24CBE8B24C54888B" },
{ name = "ssl_verify_fun", version = "1.1.6", build_tools = ["mix", "rebar3", "make"], requirements = [], otp_app = "ssl_verify_fun", source = "hex", outer_checksum = "BDB0D2471F453C88FF3908E7686F86F9BE327D065CC1EC16FA4540197EA04680" },
+ { name = "stemmer", version = "1.1.0", build_tools = ["mix"], requirements = [], otp_app = "stemmer", source = "hex", outer_checksum = "0CB5FAF73476B84500E371FF39FD9A494F60AB31D991689C1CD53B920556228F" },
{ name = "unicode_util_compat", version = "0.7.0", build_tools = ["rebar3"], requirements = [], otp_app = "unicode_util_compat", source = "hex", outer_checksum = "25EEE6D67DF61960CF6A794239566599B09E17E668D3700247BC498638152521" },
]
@@ -25,4 +29,5 @@ gleam_hackney = "~> 1.0"
gleam_http = "~> 3.0"
gleam_stdlib = "~> 0.19"
gleeunit = "~> 0.6"
+haystack = "~> 0.1"
mist = "~> 0.4"
diff --git a/src/bahnhofname.gleam b/src/bahnhofname.gleam
index 36ea035..a783955 100644
--- a/src/bahnhofname.gleam
+++ b/src/bahnhofname.gleam
@@ -3,70 +3,62 @@ import gleam/http/request.{Request}
import gleam/http.{Get}
import gleam/bit_builder.{BitBuilder}
import gleam/erlang/process
+import gleam/erlang/atom
import gleam/io
import gleam/int
import gleam/string
import gleam/bit_string
import gleam/list
import gleam/map.{Map}
-import gleam/result.{lazy_unwrap}
import gleam/uri
import gleam/hackney
-import gleam/option.{None, Some}
import mist
-fn do_distlist(
- b: String,
- distlist: List(Int),
- grapheme: String,
- new_distlist: List(Int),
- last_dist: Int,
-) {
- case #(b, distlist) {
- #("", _) -> list.reverse(new_distlist)
- #(_, [distlist_hd, distlist_snd, ..distlist_tl]) -> {
- let assert Ok(b_hd) = string.first(b)
- let b_tl = string.drop_left(b, up_to: 1)
- let diff = case #(b_hd, grapheme) {
- #(a, b) if a != b -> 1
- _ -> 0
- }
- let minimum =
- int.min(int.min(last_dist + 1, distlist_snd + 1), distlist_hd + diff)
- do_distlist(
- b_tl,
- [distlist_snd, ..distlist_tl],
- grapheme,
- [minimum, ..new_distlist],
- minimum,
- )
- }
- }
-}
+external type Index
+external type Field
-fn do_distance(a: String, b: String, distlist: List(Int), step: Int) {
- case a {
- "" -> result.unwrap(list.last(distlist), -1)
- _ -> {
- let assert Ok(src_hd) = string.first(a)
- let src_tl = string.drop_left(a, up_to: 1)
- let distlist = do_distlist(b, distlist, src_hd, [step], step)
- do_distance(src_tl, b, distlist, step + 1)
- }
- }
-}
+external fn index_new(atom.Atom) -> Index =
+ "Elixir.Haystack.Index" "new"
-fn levenshtein(a: String, b: String) -> Int {
- case #(a, b) {
- #(a, b) if a == b -> 0
- #("", b) -> string.length(b)
- #(a, "") -> string.length(a)
- #(a, b) -> {
- let distlist = list.range(0, string.length(b))
- do_distance(a, b, distlist, 1)
- }
- }
-}
+external fn index_ref(Index, Field) -> Index =
+ "Elixir.Haystack.Index" "ref"
+
+external fn index_field(Index, Field) -> Index =
+ "Elixir.Haystack.Index" "field"
+
+external fn field_term(String) -> Field =
+ "Elixir.Haystack.Index.Field" "term"
+
+external fn field_new(String) -> Field =
+ "Elixir.Haystack.Index.Field" "new"
+
+external fn index_add(Index, List(a)) -> Index =
+ "Elixir.Haystack.Index" "add"
+
+external fn index_search(Index, String) -> List(Map(atom.Atom, String)) =
+ "Elixir.Haystack.Index" "search"
+
+pub external fn inspect(a) -> a =
+ "Elixir.IO" "inspect"
+
+external type Query
+external type Clause
+external type Expression
+external fn query_new() -> Query =
+ "Elixir.Haystack.Query" "new"
+external fn query_clause(Query, Clause) -> Query =
+ "Elixir.Haystack.Query" "clause"
+external fn query_run(Query, Index) -> List(Map(atom.Atom, String)) =
+ "Elixir.Haystack.Query" "run"
+external fn clause_new(atom.Atom) -> Clause =
+ "Elixir.Haystack.Query.Clause" "new"
+external fn query_expressions(Clause, List(Expression)) -> Clause =
+ "Elixir.Haystack.Query.Clause" "expressions"
+external fn query_expression_new(atom.Atom, List(#(atom.Atom, String))) -> Expression =
+ "Elixir.Haystack.Query.Expression" "new"
+
+external fn tokenize(String) -> List(Map(atom.Atom, String)) =
+ "Elixir.Haystack.Tokenizer" "tokenize"
fn unpercent(encoded: String) -> String {
let #([head], chunks) =
@@ -94,34 +86,38 @@ fn unpercent(encoded: String) -> String {
res
}
-fn guess_station(query: String, stations: Map(String, String)) -> String {
- query
- stations
- |> map.keys
- |> list.map(fn(a) { #(levenshtein(query, a), a) })
- |> list.fold(
- from: #(string.length(query), query),
- with: fn(a, b) {
- case a.0 < b.0 {
- True -> a
- False -> b
- }
- },
- )
- |> fn(a: #(Int, String)) { a.1 }
-}
-
fn the_lookup(
query: String,
stations: Map(String, String),
ds100s: Map(String, String),
-) -> String {
- map.get(ds100s, query)
- |> lazy_unwrap(fn() {
- io.println(query)
- map.get(stations, query)
- |> lazy_unwrap(fn() { guess_station(query, stations) })
- })
+ fuzzy: fn(String) -> List(String)
+) -> #(Int, String) {
+ case map.get(ds100s, query) {
+ Ok(name) -> #(200, name)
+ _ -> {
+ io.println(query)
+ case map.get(stations, query) {
+ Ok(ds100) -> #(200, ds100)
+ _ -> {
+ let results = fuzzy(query)
+ |> list.filter_map(fn (res) { map.get(ds100s, string.uppercase(res)) })
+ case results {
+ // results -> {
+ // let names = results
+ // |> list.map (fn (res) {
+ // map.get(ds100s, string.uppercase(res))
+ // |> result.map(fn(a) { "/" <> a })
+ // |> result.unwrap("/")})
+ // #(200, string.join(names, "\n"))
+ // }
+ [res] -> #(302, res)
+ [res, ..] -> #(302, res)
+ _ -> #(404, "??")
+ }
+ }
+ }
+ }
+ }
}
fn lookup_station(
@@ -129,6 +125,7 @@ fn lookup_station(
stations: Map(String, String),
ds100s: Map(String, String),
baseurl: String,
+ fuzzy: fn (String) -> List(String)
) -> Response(BitBuilder) {
let #(code, text) = case request {
Request(method: Get, path: "/help", ..)
@@ -136,10 +133,8 @@ fn lookup_station(
200,
"ds100 → Name: " <> baseurl <> "/NN\n" <> "Name → ds100: " <> baseurl <> "/Nürnberg Hbf",
)
- Request(method: Get, path: "/" <> path, ..) -> #(
- 200,
- the_lookup(unpercent(path), stations, ds100s),
- )
+ Request(method: Get, path: "/" <> path, ..) ->
+ the_lookup(unpercent(path), stations, ds100s, fuzzy)
_ -> #(404, "intended usage is e.g. curl " <> baseurl <> "/FF")
}
let body = bit_builder.from_string(text)
@@ -154,6 +149,10 @@ fn lookup_station(
"https://stuebinm.eu/git/bahnhof.name",
)
|> response.prepend_header("content-type", "text/plain; charset=utf8")
+ |> fn (a) { case code == 302 {
+ True -> response.prepend_header(a, "location", text)
+ _ -> a
+ } }
|> response.set_body(body)
}
@@ -193,10 +192,56 @@ pub fn main() {
stations
|> list.map(fn(a) { #(a.1, a.0) })
|> map.from_list
+ let ref = atom.create_from_string("ref")
+ let index = index_new(atom.create_from_string("stations"))
+ |> index_ref(field_term("id"))
+ |> index_field(field_new("name"))
+ |> index_add(stations
+ |> list.map(fn(tuple) {case tuple {
+ #(name, ds100)
+ -> map.from_list([#("id", ds100), #("name", name)]
+ )}}))
+
+
+ let fuzzy = fn(searchterm: String) -> List(String) {
+ let query = query_new()
+ let match = atom.create_from_string("match")
+ let field = atom.create_from_string("field")
+ let term = atom.create_from_string("term")
+ let expressions = tokenize(inspect(searchterm))
+ |> list.filter_map(fn (a) { map.get(a, atom.create_from_string("v")) })
+ |> list.map(fn (token) { query_expression_new(match, [#(field, "name"), #(term, token)]) })
+ let clause = query_expressions(clause_new(atom.create_from_string("all")), expressions)
+ let query = query_clause(query, clause)
+
+ let matches = query_run(query, index)
+ |> list.filter_map(fn (a) { map.get(a, ref) })
+
+ inspect(matches)
+ case list.length(matches) > 5 {
+ True -> {
+ let query = query_new()
+ let clause = query_expressions(
+ clause_new(atom.create_from_string("all")),
+ [query_expression_new(match, [#(field, "name"), #(term, "hbf")]) , ..expressions]
+ )
+ let query = query_clause(query, clause)
+ let narrow = query_run(query, index)
+ |> list.filter_map(fn (a) { map.get(a, ref) })
+ case narrow {
+ [] -> matches
+ _ -> narrow
+ }
+ }
+ _ -> matches
+ }
+ }
+
+ io.println("compiled index, starting server …")
- mist.run_service(
+ let _ = mist.run_service(
2345,
- fn(req) { lookup_station(req, stationmap, ds100map, baseurl) },
+ fn(req) { lookup_station(req, stationmap, ds100map, baseurl, fuzzy) },
max_body_limit: 100,
)
process.sleep_forever()