import gleam/http/response.{Response} import gleam/http/request.{Request,get_header} import gleam/http.{Get} import gleam/bit_builder.{BitBuilder} import gleam/erlang/process import gleam/erlang/atom import gleam/erlang/file import gleam/io import gleam/int import gleam/string import gleam/bit_string import gleam/list import gleam/map.{Map} import gleam/uri import gleam/hackney import gleam/pair.{swap} import gleam/result import mist const ds100_domain = "ds100.bahnhof.name" const ril100_domain = "ril100.bahnhof.name" const leitpunkt_domain = "leitpunkt.bahnhof.name" const domain = "bahnhof.name" const proto = "https://" external type Index external type Field external fn index_new(atom.Atom) -> Index = "Elixir.Haystack.Index" "new" external fn index_ref(Index, Field) -> Index = "Elixir.Haystack.Index" "ref" external fn index_field(Index, Field) -> Index = "Elixir.Haystack.Index" "field" external fn field_term(String) -> Field = "Elixir.Haystack.Index.Field" "term" external fn field_new(String) -> Field = "Elixir.Haystack.Index.Field" "new" external fn index_add(Index, List(a)) -> Index = "Elixir.Haystack.Index" "add" pub external fn inspect(a) -> a = "Elixir.IO" "inspect" external type Query external type Clause external type Expression external fn query_new() -> Query = "Elixir.Haystack.Query" "new" external fn query_clause(Query, Clause) -> Query = "Elixir.Haystack.Query" "clause" external fn query_run(Query, Index) -> List(Map(atom.Atom, String)) = "Elixir.Haystack.Query" "run" external fn clause_new(atom.Atom) -> Clause = "Elixir.Haystack.Query.Clause" "new" external fn query_expressions(Clause, List(Expression)) -> Clause = "Elixir.Haystack.Query.Clause" "expressions" external fn query_expression_new(atom.Atom, List(#(atom.Atom, String))) -> Expression = "Elixir.Haystack.Query.Expression" "new" external fn tokenize(String) -> List(Map(atom.Atom, String)) = "Elixir.Haystack.Tokenizer" "tokenize" type IdKind { DS100 Leitpunkt } type Matched(t) { Exact(t) Fuzzy(t) Failed } fn unpercent(encoded: String) -> String { let #([head], chunks) = encoded |> string.split(on: "%") |> list.split(at: 1) let assert Ok(res) = chunks |> list.map(fn(str) { case string.length(str) < 2 { True -> bit_string.from_string(str) False -> { let assert Ok(codepoint) = str |> string.slice(at_index: 0, length: 2) |> int.base_parse(16) <> } } }) |> list.prepend(bit_string.from_string(head)) |> bit_string.concat |> bit_string.to_string |> result.map(fn (str) { string.replace(str, "_", " ") }) res } /// Looks up a query in a Map by exact value, no fuzzy matching. fn lookup_exact(query: String, lookup: Map(String, String)) -> #(Int, String) { case map.get(lookup, query) { Ok(result) -> #(200, result) _ -> #(404, "??") } } /// Looks up a station by its name, with fuzzy matching. fn lookup_fuzzy( query: String, kind: IdKind, fuzzy: fn(String, IdKind) -> Matched(String) ) -> #(Int, String) { case fuzzy(query, kind) { Exact(res) -> #(200, res) Fuzzy(res) -> #(302, res) Failed -> #(404, "??") } } fn if_not(res: #(Int,t), fallback: fn() -> #(Int,t)) -> #(Int,t) { inspect(case res { #(200, _) -> res _ -> fallback() }) } fn lookup_station( request: Request(t), ds100_to_name: Map(String, String), leitpunkt_to_name: Map(String, String), fuzzy: fn (String, IdKind) -> Matched(String) ) -> Response(BitBuilder) { let #(code, text) = case request { // blackhole favicon.ico requests instead of using the index Request(method: Get, path: "/favicon.ico", ..) -> #(404, "") Request(method: Get, path: "/help", ..) | Request(method: Get, path: "/", ..) -> #( 200, "ril100 → Name: " <> proto<>ril100_domain<>"/HG\n" <> "Name → ril100: " <> proto<>ril100_domain <> "/Göttingen\n\n" <> "Leitpunkt → Name: " <> proto<>leitpunkt_domain<>"/GOE\n" <> "Name → Leitpunkt: " <> proto<>leitpunkt_domain <> "/Göttingen\n\n"<> "Fuzzy:" <> proto<>domain<>"/...", ) Request(method: Get, path: "/" <> path, ..) -> { let query = unpercent(path) case get_header(request, "x-forwarded-host") { Ok(domain) if domain == leitpunkt_domain -> query |> lookup_exact(leitpunkt_to_name) |> if_not(fn() {lookup_fuzzy(query,Leitpunkt,fuzzy)}) Ok(domain) if domain == ril100_domain || domain == ds100_domain -> query |> lookup_exact(ds100_to_name) |> if_not(fn() {lookup_fuzzy(query,DS100, fuzzy)}) _ -> { let by_ds100 = lookup_exact(query, ds100_to_name) let by_lp = lookup_exact(query, leitpunkt_to_name) case #(by_ds100.0, by_lp.0) { #(200, _) -> #(302, proto<>ril100_domain<>"/"<>path) #(_, 200) -> #(302, proto<>leitpunkt_domain<>"/"<>path) _ -> #(302, proto<>ril100_domain<>"/"<>path) } } } } _ -> #(404, "intended usage is e.g. curl " <> proto<>domain<>"/FF") } let body = bit_builder.from_string(text) response.new(code) |> response.prepend_header( "x-data-source", "https://data.deutschebahn.com/dataset/data-betriebsstellen.html", ) |> response.prepend_header( "x-sources-at", "https://stuebinm.eu/git/bahnhof.name", ) |> response.prepend_header("content-type", "text/plain; charset=utf8") |> fn (a) { case code == 302 { True -> response.prepend_header(a, "location", text) _ -> a } } |> response.set_body(body) } pub fn main() { let assert Ok(bahn_ril100) = fetch_data() let ds100s = read_csv(bahn_ril100) |> list.filter_map(fn(fields) { case fields { [_, ds100, name, ..] -> Ok(#(name, ds100)) _ -> Error(fields) } }) let assert Ok(leitpunkte_raw) = file.read("data/leitpunkte.csv") let leitpunkte = read_csv(leitpunkte_raw) |> list.filter_map(fn(fields) { case fields { [lp, name, _ds100] -> Ok(#(name, lp)) _ -> Error(fields) } }) let name_to_ds100 = map.from_list(ds100s) let name_to_leitpunkt = map.from_list(leitpunkte) let ds100_to_name = map.from_list(list.map(ds100s, swap)) let leitpunkt_to_name = map.from_list(list.map(leitpunkte, swap)) let ds100index = index_new(atom.create_from_string("ds100")) |> index_ref(field_term("id")) |> index_field(field_new("name")) |> index_add(ds100s |> list.map(fn(tuple) {case tuple { #(name, ds100) -> map.from_list([#("id", ds100), #("name", name)] )}})) let leitpunkt_index = index_new(atom.create_from_string("leitpunkt")) |> index_ref(field_term("id")) |> index_field(field_new("name")) |> index_add(leitpunkte |> list.map(fn(tuple) {case tuple { #(name, leitpunkt) -> map.from_list([#("id", leitpunkt), #("name", name)] )}})) let ref = atom.create_from_string("ref") let fuzzy = fn(searchterm: String, kind: IdKind) -> List(String) { let query = query_new() let index = case kind { DS100 -> ds100index Leitpunkt -> leitpunkt_index } let match = atom.create_from_string("match") let field = atom.create_from_string("field") let term = atom.create_from_string("term") let expressions = tokenize(inspect(searchterm)) |> list.filter_map(fn (a) { map.get(a, atom.create_from_string("v")) }) |> list.map(fn (token) { query_expression_new(match, [#(field, "name"), #(term, token)]) }) let clause = query_expressions(clause_new(atom.create_from_string("all")), expressions) let query = query_clause(query, clause) let matches = query_run(query, index) |> list.filter_map(fn (a) { map.get(a, ref) }) inspect(matches) case list.length(matches) > 5 { True -> { let query = query_new() let clause = query_expressions( clause_new(atom.create_from_string("all")), [query_expression_new(match, [#(field, "name"), #(term, "hbf")]) , ..expressions] ) let query = query_clause(query, clause) let narrow = query_run(query, index) |> list.filter_map(fn (a) { map.get(a, ref) }) case narrow { [] -> matches _ -> narrow } } _ -> matches } } let exact_then_fuzzy = fn(searchterm: String, kind: IdKind) -> Matched(String) { let #(stations, ids) = case kind { DS100 -> #(name_to_ds100, ds100_to_name) Leitpunkt -> #(name_to_leitpunkt, leitpunkt_to_name) } case map.get(stations, searchterm) { Ok(id) -> Exact(id) _ -> { let results = fuzzy(searchterm, kind) |> list.filter_map(fn (res) { map.get(ids, string.uppercase(res)) }) case results { [res] -> Fuzzy(res) [res, ..] -> Fuzzy(res) _ -> Failed } } } } io.println("compiled indices, starting server …") let _ = mist.run_service( 2345, fn(req) { lookup_station( req, ds100_to_name, leitpunkt_to_name, exact_then_fuzzy ) }, max_body_limit: 100, ) process.sleep_forever() } fn fetch_data() -> Result(String, hackney.Error) { let assert Ok(uri) = uri.parse( "https://download-data.deutschebahn.com/static/datasets/betriebsstellen/DBNetz-Betriebsstellenverzeichnis-Stand2021-10.csv", ) let assert Ok(request) = request.from_uri(uri) let assert Ok(response) = hackney.send(request) // some ü are corrupted for some reason Ok(string.replace(response.body, "�", "ü")) } fn read_csv(contents) -> List(List(String)) { contents // the file doesn't use quotes, so this is fine |> string.split(on: "\n") // drop CSV header |> list.drop(1) |> list.map(fn(a) { string.split(a, on: ";") }) }