From 87689d245aa57d1da97ff4a431eb3ece58e8304c Mon Sep 17 00:00:00 2001 From: stuebinm Date: Sun, 12 Jun 2022 22:02:45 +0200 Subject: hacky converter script --- Readme.org | 6 ++ fahrplan.rkt | 184 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 190 insertions(+) create mode 100644 Readme.org create mode 100755 fahrplan.rkt diff --git a/Readme.org b/Readme.org new file mode 100644 index 0000000..fa557d3 --- /dev/null +++ b/Readme.org @@ -0,0 +1,6 @@ +#+TITLE: excel to gtfs converter + +a hacky script that converts the idiosyncratic Excel documents used by Ilztalbahn +to plan train timetables into GTFS (assuming they've been exported to csv first) + +might eventually rewrite this mess in Gauche Scheme (or Idris2, if I'm feeling funny) diff --git a/fahrplan.rkt b/fahrplan.rkt new file mode 100755 index 0000000..07f2b08 --- /dev/null +++ b/fahrplan.rkt @@ -0,0 +1,184 @@ +#!/usr/bin/env racket +#lang racket/base + +(define input-file "fahrplan-2022.csv") +(define gtfs-prefix "gtfs-template/") +(define out-prefix "gtfs/") + + +(require racket/string + racket/function + racket/list + racket/match + racket/dict) + +(require csv-reading + csv-writing) + + +(define csvreader + (make-csv-reader-maker '((seperator-chars #\,) + (strip-leading-whitespace . #t) + (strip-trailing-whitespace . #t)))) + +(define (read-csv path) + (csv->list (csvreader (open-input-file path)))) + + +;; the fahrplan +(define fahrplan (read-csv input-file)) + +;; filter out empty lines since they're annoying +(define gtfs-stops + (filter (curry (compose1 not equal?) '("")) + (read-csv (string-append gtfs-prefix "stops.txt")))) + +;; convert a long station name (from the input) +;; to an id used in the gtfs +(define (station-id longname) + (let* ((header (car gtfs-stops)) + (stop-id-col (index-of header "stop_id")) + (stop-name-col (index-of header "stop_name")) + (lines (filter + (lambda (line) + (equal? longname (list-ref line stop-name-col))) + gtfs-stops))) + (if (empty? lines) + (display (format "unknown station name \"~a\"\n" longname)) + (list-ref (car lines) stop-id-col)))) + +;; fahrplan, but as a vector +(define fahrplanvec + (list->vector (map list->vector fahrplan))) + +;; a single cell in the csv +(define (fp-cell i j) + (vector-ref (vector-ref fahrplanvec i) j)) + +;; does anything in the list fullfil pred? +(define (any pred xs) + (foldr (lambda (a b) (or a b)) #f (map pred xs))) + + +;; transpose a table +(define (transpose xss) + (apply map list xss)) + +(define planfahr + (transpose fahrplan)) + +(define stations-column + (index-where planfahr (curry any (lambda (entry) (string-contains? entry "Fahrt"))))) + +(define stations + (list-ref planfahr stations-column)) + +(define fahrten-starts + (indexes-where (list-ref planfahr stations-column) (curry equal? "Fahrt"))) + +;; which line ranges could contain stops of the same trips? +(define stop-ranges + (drop-right + (foldr (lambda (x acc) + (cons (range x (caar acc)) acc)) + `((,(length stations))) + fahrten-starts) + 1)) + +;; does this string look like a station name? +(define (station? str) + (or (string-contains? str " ab") (string-contains? str " an"))) + +;; list of trips, along with trip numbers. +;; "primitive" because stations have not been merged yet, +;; i.e. " an" and " ab"-stops are still seperate +(define trips-primitive + ;; this append is here to flatten the list a single level + (append* + '() + ;; map over all columns behind the one that contains station names + (filter-map + (lambda (col) + (let ((stops (filter-map + (lambda (chunk) + (let ((number (fp-cell (car chunk) col))) + ; does this column contain a train number? + (if (equal? "" number) + #f + ; extract all lines that look like they contain stations + `(,number + ,(filter-map + (lambda (line) + (if (station? (list-ref stations line)) + `(,(list-ref stations line) + ,(list-ref (list-ref planfahr col) line)) + #f)) + chunk))))) + stop-ranges))) + ; filter out lines that don't define any stops + (if (equal? '() stops) + #f + stops))) + (range (add1 stations-column) (length planfahr))))) + + +(define (stop-description->station desc) + (string-replace (string-replace desc " ab" "")" an" "")) + +;; format time in a gtfs-suitable format +;; (for now just assumes that it's given times in xx:xx format) +(define (format-time time) + (string-append time ":00")) + +(define (stop-time-set dict name stop-time number seq kind) + (dict-update dict name + (lambda (old) + (match old + [(list num arr dep id seq) + (match kind + ['arrival `(,num ,stop-time ,dep ,id ,seq)] + ['departure `(,num ,arr ,stop-time ,id ,seq)])])) + `(,number ,stop-time ,stop-time ,(station-id name) ,seq))) + +;; extracts gtfs-like stops from an internal trip list +;; as contained in primitive-trips +(define (primitive-trip->gtfs-stop-times trip) + (let ((number (car trip)) + (stops (car (cdr trip)))) + (sort + (dict-values + (car (foldl (lambda (s acc) + (let ((name (stop-description->station (car s))) + (time (format-time (cadr s)))) + (cond [(string-contains? (car s) " an") + `(,(stop-time-set (car acc) name time number (cdr acc) 'arrival) + . ,(add1 (cdr acc)))] + [(string-contains? (car s) " ab") + `(,(stop-time-set (car acc) name time number (cdr acc) 'departure) + . ,(add1 (cdr acc)))]))) + `(,#hash() . 0) + stops))) + (lambda (a b) (<= (fifth a) (fifth b)))))) + +;; content of gtfs stop_times.txt +(define gtfs-stop-times + (append* + '((trip_id arrival_time departure_time stop_id stop_sequence)) + (map primitive-trip->gtfs-stop-times trips-primitive))) + + + +(define (write-csv table filename) + (call-with-output-file + (string-append out-prefix filename) + #:exists 'replace + (lambda (port) (display-table table port)))) + + + +(if (not (directory-exists? out-prefix)) + (make-directory out-prefix) + 'nil) + +(write-csv gtfs-stop-times "stop_times.txt") +(write-csv gtfs-stops "stops.txt") -- cgit v1.2.3