summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/bahnhofname.gleam430
1 files changed, 0 insertions, 430 deletions
diff --git a/src/bahnhofname.gleam b/src/bahnhofname.gleam
deleted file mode 100644
index fe3b10f..0000000
--- a/src/bahnhofname.gleam
+++ /dev/null
@@ -1,430 +0,0 @@
-import gleam/http/response.{Response}
-import gleam/http/request.{Request, get_header}
-import gleam/http.{Get}
-import gleam/bit_builder
-import gleam/erlang/process
-import gleam/erlang/atom
-import gleam/erlang/file
-import gleam/io
-import gleam/int
-import gleam/string
-import gleam/bit_string
-import gleam/list
-import gleam/map.{Map}
-import gleam/uri
-import gleam/hackney
-import gleam/pair.{swap}
-import gleam/result
-import mist
-
-const ds100_domain = "ds100.bahnhof.name"
-
-const ril100_domain = "ril100.bahnhof.name"
-
-const leitpunkt_domain = "leitpunkt.bahnhof.name"
-
-const domain = "bahnhof.name"
-
-const proto = "https://"
-
-type Index
-
-type Field
-
-@external(erlang, "Elixir.Haystack.Index", "new")
-fn index_new(a: atom.Atom) -> Index
-
-@external(erlang, "Elixir.Haystack.Index", "ref")
-fn index_ref(a: Index, b: Field) -> Index
-
-@external(erlang, "Elixir.Haystack.Index", "field")
-fn index_field(a: Index, b: Field) -> Index
-
-@external(erlang, "Elixir.Haystack.Index.Field", "term")
-fn field_term(a: String) -> Field
-
-@external(erlang, "Elixir.Haystack.Index.Field", "new")
-fn field_new(a: String) -> Field
-
-@external(erlang, "Elixir.Haystack.Index", "add")
-fn index_add(a: Index, b: List(a)) -> Index
-
-@external(erlang, "Elixir.IO", "inspect")
-pub fn inspect(a: a) -> a
-
-type Query
-
-type Clause
-
-type Expression
-
-@external(erlang, "Elixir.Haystack.Query", "new")
-fn query_new() -> Query
-
-@external(erlang, "Elixir.Haystack.Query", "clause")
-fn query_clause(a: Query, b: Clause) -> Query
-
-@external(erlang, "Elixir.Haystack.Query", "run")
-fn query_run(a: Query, b: Index) -> List(Map(atom.Atom, String))
-
-@external(erlang, "Elixir.Haystack.Query.Clause", "new")
-fn clause_new(a: atom.Atom) -> Clause
-
-@external(erlang, "Elixir.Haystack.Query.Clause", "expressions")
-fn query_expressions(a: Clause, b: List(Expression)) -> Clause
-
-@external(erlang, "Elixir.Haystack.Query.Expression", "new")
-fn query_expression_new(
- a: atom.Atom,
- b: List(#(atom.Atom, String)),
-) -> Expression
-
-@external(erlang, "Elixir.Haystack.Tokenizer", "tokenize")
-fn tokenize(a: String) -> List(Map(atom.Atom, String))
-
-type IdKind {
- DS100
- Leitpunkt
-}
-
-type Matched(t) {
- Exact(t)
- Fuzzy(t, t)
- Failed
-}
-
-fn unpercent(encoded: String) -> String {
- let #([head], chunks) =
- encoded
- |> string.split(on: "%")
- |> list.split(at: 1)
-
- let assert Ok(res) =
- chunks
- |> list.map(fn(str) {
- case string.length(str) < 2 {
- True -> bit_string.from_string(str)
- False -> {
- let assert Ok(codepoint) =
- str
- |> string.slice(at_index: 0, length: 2)
- |> int.base_parse(16)
- <<codepoint:8, string.drop_left(str, 2):utf8>>
- }
- }
- })
- |> list.prepend(bit_string.from_string(head))
- |> bit_string.concat
- |> bit_string.to_string
- |> result.map(fn(str) { string.replace(str, "_", " ") })
- res
-}
-
-/// Looks up a query in a Map by exact value, no fuzzy matching.
-fn lookup_exact(query: String, lookup: Map(String, String)) -> #(Int, String) {
- case map.get(lookup, query) {
- Ok(result) -> #(200, result)
- _ -> #(404, "??")
- }
-}
-
-/// Looks up a station by its name, with fuzzy matching.
-fn lookup_fuzzy(
- query: String,
- kind: IdKind,
- fuzzy: fn(String, IdKind) -> Matched(String),
-) -> #(Int, String) {
- case fuzzy(query, kind) {
- Exact(res) -> #(200, res)
- Fuzzy(res, _) -> #(302, res)
- Failed -> #(404, "??")
- }
-}
-
-fn if_not(res: #(Int, t), fallback: fn() -> #(Int, t)) -> #(Int, t) {
- inspect(case res {
- #(200, _) -> res
- _ -> fallback()
- })
-}
-
-
-fn lookup_station(
- request: Request(t),
- ds100_to_name: Map(String, String),
- leitpunkt_to_name: Map(String, String),
- lookup_platform: fn(String) -> String,
- fuzzy: fn(String, IdKind) -> Matched(String),
-) -> Response(mist.ResponseData) {
- let #(#(code, text), is_html) = case request {
- // blackhole favicon.ico requests instead of using the index
- Request(method: Get, path: "/favicon.ico", ..) -> #(#(404, ""), False)
- Request(method: Get, path: "/help", ..)
- | Request(method: Get, path: "/", ..) -> #(#(
- 200,
- "ril100 → Name: " <> proto <> ril100_domain <> "/HG\n" <>
- "Name → ril100: " <> proto <> ril100_domain <> "/Göttingen\n\n" <>
- "Leitpunkt → Name: " <> proto <> leitpunkt_domain <> "/GOE\n" <>
- "Name → Leitpunkt: " <> proto <> leitpunkt_domain <> "/Göttingen\n\n" <>
- "Fuzzy:" <> proto <> domain <> "/...",
- ), False)
- Request(method: Get, path: "/" <> path, ..) -> {
- let raw_query = unpercent(path)
- let show_platforms = string.ends_with(raw_query, "/gleis")
- || string.ends_with(raw_query, "/bahnsteig")
- || string.ends_with(raw_query, "/platforms")
- || string.ends_with(raw_query, "/tracks")
- || string.ends_with(raw_query, "/platform")
- || string.ends_with(raw_query, "/track")
- let query = raw_query
- |> string.replace("/gleis","")
- |> string.replace("/bahnsteig","")
- |> string.replace("/platforms","")
- |> string.replace("/tracks","")
- |> string.replace("/platform","")
- |> string.replace("/track","")
- case #(show_platforms, get_header(request, "x-forwarded-host")) {
- #(False, Ok(domain)) if domain == leitpunkt_domain ->
- query
- |> lookup_exact(leitpunkt_to_name)
- |> if_not(fn() { lookup_fuzzy(query, Leitpunkt, fuzzy) })
- |> pair.new(False)
- #(False, Ok(domain)) if domain == ril100_domain || domain == ds100_domain ->
- query
- |> lookup_exact(ds100_to_name)
- |> if_not(fn() { lookup_fuzzy(query, DS100, fuzzy) })
- |> pair.new(False)
- #(True, Ok(domain)) if domain == leitpunkt_domain -> {
- let query = case map.get(leitpunkt_to_name, query) {
- Ok(name) -> name
- _ -> query
- }
- case fuzzy(query, DS100) {
- Exact(code) -> #(200, lookup_platform(code))
- Fuzzy(_, code) -> #(200, lookup_platform(code))
- _ -> #(404, "")
- } |> pair.new(True)
- }
- #(True, Ok(domain)) if domain == ril100_domain || domain == ds100_domain ->
- case lookup_exact(query, ds100_to_name) {
- #(200,_) -> #(200, lookup_platform(query))
- _ -> case fuzzy(query, DS100) {
- Exact(code) -> #(200, lookup_platform(code))
- Fuzzy(_, code) -> #(200, lookup_platform(code))
- _ -> #(404, "")
- }
- } |> pair.new(True)
- _ -> {
- let by_ds100 = lookup_exact(query, ds100_to_name)
- let by_lp = lookup_exact(query, leitpunkt_to_name)
- case #(by_ds100.0, by_lp.0) {
- #(200, _) -> #(302, proto <> ril100_domain <> "/" <> path)
- #(_, 200) -> #(302, proto <> leitpunkt_domain <> "/" <> path)
- _ -> #(302, proto <> ril100_domain <> "/" <> path)
- } |> pair.new(False)
- }
- }
- }
- _ -> #(#(404, "intended usage is e.g. curl " <> proto <> domain <> "/FF"), False)
- }
- let body = text
- |> bit_builder.from_string
- |> mist.Bytes
-
- let content_type = case is_html {
- True -> "text/html; charset=utf8"
- False -> "text/plain; charset=utf8"
- }
-
- response.new(code)
- |> response.prepend_header(
- "x-data-source",
- "https://data.deutschebahn.com/dataset/data-betriebsstellen.html",
- )
- |> response.prepend_header(
- "x-sources-at",
- "https://stuebinm.eu/git/bahnhof.name",
- )
- |> response.prepend_header("content-type", content_type)
- |> fn(a) {
- case code == 302 {
- True -> response.prepend_header(a, "location", text)
- _ -> a
- }
- }
- |> response.set_body(body)
-}
-
-pub fn main() {
- let assert Ok(bahn_ril100) = file.read("data/DBNetz-Betriebsstellenverzeichnis-Stand2021-10.csv")
-
- let ds100s =
- read_csv(bahn_ril100, ";")
- |> list.filter_map(fn(fields) {
- case fields {
- [_, ds100, name, ..] -> Ok(#(name, ds100))
- _ -> Error(fields)
- }
- })
- let assert Ok(leitpunkte_raw) = file.read("data/leitpunkte.csv")
- let leitpunkte =
- read_csv(leitpunkte_raw, ";")
- |> list.filter_map(fn(fields) {
- case fields {
- [lp, name, _ds100] -> Ok(#(name, lp))
- _ -> Error(fields)
- }
- })
- let assert Ok(platforms_raw) = file.read("data/platforms.tsv")
- let platforms = read_csv(platforms_raw, "\t")
-
- let name_to_ds100 = map.from_list(ds100s)
- let name_to_leitpunkt = map.from_list(leitpunkte)
- let ds100_to_name = map.from_list(list.map(ds100s, swap))
- let leitpunkt_to_name = map.from_list(list.map(leitpunkte, swap))
- let ds100index =
- index_new(atom.create_from_string("ds100"))
- |> index_ref(field_term("id"))
- |> index_field(field_new("name"))
- |> index_add(
- ds100s
- |> list.map(fn(tuple) {
- case tuple {
- #(name, ds100) -> map.from_list([#("id", ds100), #("name", name)])
- }
- }),
- )
- let leitpunkt_index =
- index_new(atom.create_from_string("leitpunkt"))
- |> index_ref(field_term("id"))
- |> index_field(field_new("name"))
- |> index_add(
- leitpunkte
- |> list.map(fn(tuple) {
- case tuple {
- #(name, leitpunkt) ->
- map.from_list([#("id", leitpunkt), #("name", name)])
- }
- }),
- )
-
- let ref = atom.create_from_string("ref")
- let fuzzy = fn(searchterm: String, kind: IdKind) -> List(String) {
- let query = query_new()
- let index = case kind {
- DS100 -> ds100index
- Leitpunkt -> leitpunkt_index
- }
- let match = atom.create_from_string("match")
- let field = atom.create_from_string("field")
- let term = atom.create_from_string("term")
- let expressions =
- tokenize(inspect(searchterm))
- |> list.filter_map(fn(a) { map.get(a, atom.create_from_string("v")) })
- |> list.map(fn(token) {
- query_expression_new(match, [#(field, "name"), #(term, token)])
- })
- let clause =
- query_expressions(clause_new(atom.create_from_string("all")), expressions)
- let query = query_clause(query, clause)
-
- let matches =
- query_run(query, index)
- |> list.filter_map(fn(a) { map.get(a, ref) })
-
- inspect(matches)
- case list.length(matches) > 5 {
- True -> {
- let query = query_new()
- let clause =
- query_expressions(
- clause_new(atom.create_from_string("all")),
- [
- query_expression_new(match, [#(field, "name"), #(term, "hbf")]),
- ..expressions
- ],
- )
- let query = query_clause(query, clause)
- let narrow =
- query_run(query, index)
- |> list.filter_map(fn(a) { map.get(a, ref) })
- case narrow {
- [] -> matches
- _ -> narrow
- }
- }
- _ -> matches
- }
- }
-
- let exact_then_fuzzy = fn(searchterm: String, kind: IdKind) -> Matched(String) {
- let #(stations, ids) = case kind {
- DS100 -> #(name_to_ds100, ds100_to_name)
- Leitpunkt -> #(name_to_leitpunkt, leitpunkt_to_name)
- }
- case map.get(stations, searchterm) {
- Ok(id) -> Exact(id)
- _ -> {
- let results =
- fuzzy(searchterm, kind)
- |> list.filter_map(fn(res) { map.get(ids, string.uppercase(res)) })
- case results {
- [res] -> {
- let assert Ok(station) = map.get(stations, res)
- Fuzzy(res, station)
- }
- [res, ..] -> {
- let assert Ok(station) = map.get(stations, res)
- Fuzzy(res, station)
- }
- _ -> Failed
- }
- }
- }
- }
-
- let lookup_platform = fn(ds100: String) -> String {
- inspect(ds100)
- platforms
- |> list.filter(fn(a) { list.first(a) == Ok(ds100) })
- |> list.map(fn(line) { case line {
- [_code,osmid,osmtype,info] -> "<a href=\"https://osm.org/"<>osmtype<>"/"<>osmid<>"\">"<>info<>"</a>"
- }})
- |> string.join("<br>\n")
- |> inspect
- }
-
- io.println("compiled indices, starting server …")
-
- let assert Ok(_) =
- fn(req: Request(mist.Connection)) -> Response(mist.ResponseData) {
- lookup_station(req, ds100_to_name, leitpunkt_to_name, lookup_platform, exact_then_fuzzy)
- }
- |> mist.new
- |> mist.port(2345)
- |> mist.start_http
-
- process.sleep_forever()
-}
-
-fn fetch_data() -> Result(String, hackney.Error) {
- let assert Ok(uri) =
- uri.parse(
- "https://download-data.deutschebahn.com/static/datasets/betriebsstellen/DBNetz-Betriebsstellenverzeichnis-Stand2021-10.csv",
- )
- let assert Ok(request) = request.from_uri(uri)
- io.println("got response")
- let assert Ok(response) = hackney.send(request)
-
- // some ü are corrupted for some reason
- Ok(string.replace(response.body, "�", "ü"))
-}
-
-fn read_csv(contents, sep) -> List(List(String)) {
- contents
- // the file doesn't use quotes, so this is fine
- |> string.split(on: "\n")
- // drop CSV header
- |> list.drop(1)
- |> list.map(fn(a) { string.split(a, on: sep) })
-}