diff options
Diffstat (limited to 'src/bahnhofname.gleam')
-rw-r--r-- | src/bahnhofname.gleam | 430 |
1 files changed, 0 insertions, 430 deletions
diff --git a/src/bahnhofname.gleam b/src/bahnhofname.gleam deleted file mode 100644 index fe3b10f..0000000 --- a/src/bahnhofname.gleam +++ /dev/null @@ -1,430 +0,0 @@ -import gleam/http/response.{Response} -import gleam/http/request.{Request, get_header} -import gleam/http.{Get} -import gleam/bit_builder -import gleam/erlang/process -import gleam/erlang/atom -import gleam/erlang/file -import gleam/io -import gleam/int -import gleam/string -import gleam/bit_string -import gleam/list -import gleam/map.{Map} -import gleam/uri -import gleam/hackney -import gleam/pair.{swap} -import gleam/result -import mist - -const ds100_domain = "ds100.bahnhof.name" - -const ril100_domain = "ril100.bahnhof.name" - -const leitpunkt_domain = "leitpunkt.bahnhof.name" - -const domain = "bahnhof.name" - -const proto = "https://" - -type Index - -type Field - -@external(erlang, "Elixir.Haystack.Index", "new") -fn index_new(a: atom.Atom) -> Index - -@external(erlang, "Elixir.Haystack.Index", "ref") -fn index_ref(a: Index, b: Field) -> Index - -@external(erlang, "Elixir.Haystack.Index", "field") -fn index_field(a: Index, b: Field) -> Index - -@external(erlang, "Elixir.Haystack.Index.Field", "term") -fn field_term(a: String) -> Field - -@external(erlang, "Elixir.Haystack.Index.Field", "new") -fn field_new(a: String) -> Field - -@external(erlang, "Elixir.Haystack.Index", "add") -fn index_add(a: Index, b: List(a)) -> Index - -@external(erlang, "Elixir.IO", "inspect") -pub fn inspect(a: a) -> a - -type Query - -type Clause - -type Expression - -@external(erlang, "Elixir.Haystack.Query", "new") -fn query_new() -> Query - -@external(erlang, "Elixir.Haystack.Query", "clause") -fn query_clause(a: Query, b: Clause) -> Query - -@external(erlang, "Elixir.Haystack.Query", "run") -fn query_run(a: Query, b: Index) -> List(Map(atom.Atom, String)) - -@external(erlang, "Elixir.Haystack.Query.Clause", "new") -fn clause_new(a: atom.Atom) -> Clause - -@external(erlang, "Elixir.Haystack.Query.Clause", "expressions") -fn query_expressions(a: Clause, b: List(Expression)) -> Clause - -@external(erlang, "Elixir.Haystack.Query.Expression", "new") -fn query_expression_new( - a: atom.Atom, - b: List(#(atom.Atom, String)), -) -> Expression - -@external(erlang, "Elixir.Haystack.Tokenizer", "tokenize") -fn tokenize(a: String) -> List(Map(atom.Atom, String)) - -type IdKind { - DS100 - Leitpunkt -} - -type Matched(t) { - Exact(t) - Fuzzy(t, t) - Failed -} - -fn unpercent(encoded: String) -> String { - let #([head], chunks) = - encoded - |> string.split(on: "%") - |> list.split(at: 1) - - let assert Ok(res) = - chunks - |> list.map(fn(str) { - case string.length(str) < 2 { - True -> bit_string.from_string(str) - False -> { - let assert Ok(codepoint) = - str - |> string.slice(at_index: 0, length: 2) - |> int.base_parse(16) - <<codepoint:8, string.drop_left(str, 2):utf8>> - } - } - }) - |> list.prepend(bit_string.from_string(head)) - |> bit_string.concat - |> bit_string.to_string - |> result.map(fn(str) { string.replace(str, "_", " ") }) - res -} - -/// Looks up a query in a Map by exact value, no fuzzy matching. -fn lookup_exact(query: String, lookup: Map(String, String)) -> #(Int, String) { - case map.get(lookup, query) { - Ok(result) -> #(200, result) - _ -> #(404, "??") - } -} - -/// Looks up a station by its name, with fuzzy matching. -fn lookup_fuzzy( - query: String, - kind: IdKind, - fuzzy: fn(String, IdKind) -> Matched(String), -) -> #(Int, String) { - case fuzzy(query, kind) { - Exact(res) -> #(200, res) - Fuzzy(res, _) -> #(302, res) - Failed -> #(404, "??") - } -} - -fn if_not(res: #(Int, t), fallback: fn() -> #(Int, t)) -> #(Int, t) { - inspect(case res { - #(200, _) -> res - _ -> fallback() - }) -} - - -fn lookup_station( - request: Request(t), - ds100_to_name: Map(String, String), - leitpunkt_to_name: Map(String, String), - lookup_platform: fn(String) -> String, - fuzzy: fn(String, IdKind) -> Matched(String), -) -> Response(mist.ResponseData) { - let #(#(code, text), is_html) = case request { - // blackhole favicon.ico requests instead of using the index - Request(method: Get, path: "/favicon.ico", ..) -> #(#(404, ""), False) - Request(method: Get, path: "/help", ..) - | Request(method: Get, path: "/", ..) -> #(#( - 200, - "ril100 → Name: " <> proto <> ril100_domain <> "/HG\n" <> - "Name → ril100: " <> proto <> ril100_domain <> "/Göttingen\n\n" <> - "Leitpunkt → Name: " <> proto <> leitpunkt_domain <> "/GOE\n" <> - "Name → Leitpunkt: " <> proto <> leitpunkt_domain <> "/Göttingen\n\n" <> - "Fuzzy:" <> proto <> domain <> "/...", - ), False) - Request(method: Get, path: "/" <> path, ..) -> { - let raw_query = unpercent(path) - let show_platforms = string.ends_with(raw_query, "/gleis") - || string.ends_with(raw_query, "/bahnsteig") - || string.ends_with(raw_query, "/platforms") - || string.ends_with(raw_query, "/tracks") - || string.ends_with(raw_query, "/platform") - || string.ends_with(raw_query, "/track") - let query = raw_query - |> string.replace("/gleis","") - |> string.replace("/bahnsteig","") - |> string.replace("/platforms","") - |> string.replace("/tracks","") - |> string.replace("/platform","") - |> string.replace("/track","") - case #(show_platforms, get_header(request, "x-forwarded-host")) { - #(False, Ok(domain)) if domain == leitpunkt_domain -> - query - |> lookup_exact(leitpunkt_to_name) - |> if_not(fn() { lookup_fuzzy(query, Leitpunkt, fuzzy) }) - |> pair.new(False) - #(False, Ok(domain)) if domain == ril100_domain || domain == ds100_domain -> - query - |> lookup_exact(ds100_to_name) - |> if_not(fn() { lookup_fuzzy(query, DS100, fuzzy) }) - |> pair.new(False) - #(True, Ok(domain)) if domain == leitpunkt_domain -> { - let query = case map.get(leitpunkt_to_name, query) { - Ok(name) -> name - _ -> query - } - case fuzzy(query, DS100) { - Exact(code) -> #(200, lookup_platform(code)) - Fuzzy(_, code) -> #(200, lookup_platform(code)) - _ -> #(404, "") - } |> pair.new(True) - } - #(True, Ok(domain)) if domain == ril100_domain || domain == ds100_domain -> - case lookup_exact(query, ds100_to_name) { - #(200,_) -> #(200, lookup_platform(query)) - _ -> case fuzzy(query, DS100) { - Exact(code) -> #(200, lookup_platform(code)) - Fuzzy(_, code) -> #(200, lookup_platform(code)) - _ -> #(404, "") - } - } |> pair.new(True) - _ -> { - let by_ds100 = lookup_exact(query, ds100_to_name) - let by_lp = lookup_exact(query, leitpunkt_to_name) - case #(by_ds100.0, by_lp.0) { - #(200, _) -> #(302, proto <> ril100_domain <> "/" <> path) - #(_, 200) -> #(302, proto <> leitpunkt_domain <> "/" <> path) - _ -> #(302, proto <> ril100_domain <> "/" <> path) - } |> pair.new(False) - } - } - } - _ -> #(#(404, "intended usage is e.g. curl " <> proto <> domain <> "/FF"), False) - } - let body = text - |> bit_builder.from_string - |> mist.Bytes - - let content_type = case is_html { - True -> "text/html; charset=utf8" - False -> "text/plain; charset=utf8" - } - - response.new(code) - |> response.prepend_header( - "x-data-source", - "https://data.deutschebahn.com/dataset/data-betriebsstellen.html", - ) - |> response.prepend_header( - "x-sources-at", - "https://stuebinm.eu/git/bahnhof.name", - ) - |> response.prepend_header("content-type", content_type) - |> fn(a) { - case code == 302 { - True -> response.prepend_header(a, "location", text) - _ -> a - } - } - |> response.set_body(body) -} - -pub fn main() { - let assert Ok(bahn_ril100) = file.read("data/DBNetz-Betriebsstellenverzeichnis-Stand2021-10.csv") - - let ds100s = - read_csv(bahn_ril100, ";") - |> list.filter_map(fn(fields) { - case fields { - [_, ds100, name, ..] -> Ok(#(name, ds100)) - _ -> Error(fields) - } - }) - let assert Ok(leitpunkte_raw) = file.read("data/leitpunkte.csv") - let leitpunkte = - read_csv(leitpunkte_raw, ";") - |> list.filter_map(fn(fields) { - case fields { - [lp, name, _ds100] -> Ok(#(name, lp)) - _ -> Error(fields) - } - }) - let assert Ok(platforms_raw) = file.read("data/platforms.tsv") - let platforms = read_csv(platforms_raw, "\t") - - let name_to_ds100 = map.from_list(ds100s) - let name_to_leitpunkt = map.from_list(leitpunkte) - let ds100_to_name = map.from_list(list.map(ds100s, swap)) - let leitpunkt_to_name = map.from_list(list.map(leitpunkte, swap)) - let ds100index = - index_new(atom.create_from_string("ds100")) - |> index_ref(field_term("id")) - |> index_field(field_new("name")) - |> index_add( - ds100s - |> list.map(fn(tuple) { - case tuple { - #(name, ds100) -> map.from_list([#("id", ds100), #("name", name)]) - } - }), - ) - let leitpunkt_index = - index_new(atom.create_from_string("leitpunkt")) - |> index_ref(field_term("id")) - |> index_field(field_new("name")) - |> index_add( - leitpunkte - |> list.map(fn(tuple) { - case tuple { - #(name, leitpunkt) -> - map.from_list([#("id", leitpunkt), #("name", name)]) - } - }), - ) - - let ref = atom.create_from_string("ref") - let fuzzy = fn(searchterm: String, kind: IdKind) -> List(String) { - let query = query_new() - let index = case kind { - DS100 -> ds100index - Leitpunkt -> leitpunkt_index - } - let match = atom.create_from_string("match") - let field = atom.create_from_string("field") - let term = atom.create_from_string("term") - let expressions = - tokenize(inspect(searchterm)) - |> list.filter_map(fn(a) { map.get(a, atom.create_from_string("v")) }) - |> list.map(fn(token) { - query_expression_new(match, [#(field, "name"), #(term, token)]) - }) - let clause = - query_expressions(clause_new(atom.create_from_string("all")), expressions) - let query = query_clause(query, clause) - - let matches = - query_run(query, index) - |> list.filter_map(fn(a) { map.get(a, ref) }) - - inspect(matches) - case list.length(matches) > 5 { - True -> { - let query = query_new() - let clause = - query_expressions( - clause_new(atom.create_from_string("all")), - [ - query_expression_new(match, [#(field, "name"), #(term, "hbf")]), - ..expressions - ], - ) - let query = query_clause(query, clause) - let narrow = - query_run(query, index) - |> list.filter_map(fn(a) { map.get(a, ref) }) - case narrow { - [] -> matches - _ -> narrow - } - } - _ -> matches - } - } - - let exact_then_fuzzy = fn(searchterm: String, kind: IdKind) -> Matched(String) { - let #(stations, ids) = case kind { - DS100 -> #(name_to_ds100, ds100_to_name) - Leitpunkt -> #(name_to_leitpunkt, leitpunkt_to_name) - } - case map.get(stations, searchterm) { - Ok(id) -> Exact(id) - _ -> { - let results = - fuzzy(searchterm, kind) - |> list.filter_map(fn(res) { map.get(ids, string.uppercase(res)) }) - case results { - [res] -> { - let assert Ok(station) = map.get(stations, res) - Fuzzy(res, station) - } - [res, ..] -> { - let assert Ok(station) = map.get(stations, res) - Fuzzy(res, station) - } - _ -> Failed - } - } - } - } - - let lookup_platform = fn(ds100: String) -> String { - inspect(ds100) - platforms - |> list.filter(fn(a) { list.first(a) == Ok(ds100) }) - |> list.map(fn(line) { case line { - [_code,osmid,osmtype,info] -> "<a href=\"https://osm.org/"<>osmtype<>"/"<>osmid<>"\">"<>info<>"</a>" - }}) - |> string.join("<br>\n") - |> inspect - } - - io.println("compiled indices, starting server …") - - let assert Ok(_) = - fn(req: Request(mist.Connection)) -> Response(mist.ResponseData) { - lookup_station(req, ds100_to_name, leitpunkt_to_name, lookup_platform, exact_then_fuzzy) - } - |> mist.new - |> mist.port(2345) - |> mist.start_http - - process.sleep_forever() -} - -fn fetch_data() -> Result(String, hackney.Error) { - let assert Ok(uri) = - uri.parse( - "https://download-data.deutschebahn.com/static/datasets/betriebsstellen/DBNetz-Betriebsstellenverzeichnis-Stand2021-10.csv", - ) - let assert Ok(request) = request.from_uri(uri) - io.println("got response") - let assert Ok(response) = hackney.send(request) - - // some ü are corrupted for some reason - Ok(string.replace(response.body, "�", "ü")) -} - -fn read_csv(contents, sep) -> List(List(String)) { - contents - // the file doesn't use quotes, so this is fine - |> string.split(on: "\n") - // drop CSV header - |> list.drop(1) - |> list.map(fn(a) { string.split(a, on: sep) }) -} |