summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorstuebinm2022-06-12 22:02:45 +0200
committerstuebinm2022-06-12 22:02:45 +0200
commit87689d245aa57d1da97ff4a431eb3ece58e8304c (patch)
treed7192ae6d0d4cc6ca74de8dd2fbd318a7caca355
hacky converter script
-rw-r--r--Readme.org6
-rwxr-xr-xfahrplan.rkt184
2 files changed, 190 insertions, 0 deletions
diff --git a/Readme.org b/Readme.org
new file mode 100644
index 0000000..fa557d3
--- /dev/null
+++ b/Readme.org
@@ -0,0 +1,6 @@
+#+TITLE: excel to gtfs converter
+
+a hacky script that converts the idiosyncratic Excel documents used by Ilztalbahn
+to plan train timetables into GTFS (assuming they've been exported to csv first)
+
+might eventually rewrite this mess in Gauche Scheme (or Idris2, if I'm feeling funny)
diff --git a/fahrplan.rkt b/fahrplan.rkt
new file mode 100755
index 0000000..07f2b08
--- /dev/null
+++ b/fahrplan.rkt
@@ -0,0 +1,184 @@
+#!/usr/bin/env racket
+#lang racket/base
+
+(define input-file "fahrplan-2022.csv")
+(define gtfs-prefix "gtfs-template/")
+(define out-prefix "gtfs/")
+
+
+(require racket/string
+ racket/function
+ racket/list
+ racket/match
+ racket/dict)
+
+(require csv-reading
+ csv-writing)
+
+
+(define csvreader
+ (make-csv-reader-maker '((seperator-chars #\,)
+ (strip-leading-whitespace . #t)
+ (strip-trailing-whitespace . #t))))
+
+(define (read-csv path)
+ (csv->list (csvreader (open-input-file path))))
+
+
+;; the fahrplan
+(define fahrplan (read-csv input-file))
+
+;; filter out empty lines since they're annoying
+(define gtfs-stops
+ (filter (curry (compose1 not equal?) '(""))
+ (read-csv (string-append gtfs-prefix "stops.txt"))))
+
+;; convert a long station name (from the input)
+;; to an id used in the gtfs
+(define (station-id longname)
+ (let* ((header (car gtfs-stops))
+ (stop-id-col (index-of header "stop_id"))
+ (stop-name-col (index-of header "stop_name"))
+ (lines (filter
+ (lambda (line)
+ (equal? longname (list-ref line stop-name-col)))
+ gtfs-stops)))
+ (if (empty? lines)
+ (display (format "unknown station name \"~a\"\n" longname))
+ (list-ref (car lines) stop-id-col))))
+
+;; fahrplan, but as a vector
+(define fahrplanvec
+ (list->vector (map list->vector fahrplan)))
+
+;; a single cell in the csv
+(define (fp-cell i j)
+ (vector-ref (vector-ref fahrplanvec i) j))
+
+;; does anything in the list fullfil pred?
+(define (any pred xs)
+ (foldr (lambda (a b) (or a b)) #f (map pred xs)))
+
+
+;; transpose a table
+(define (transpose xss)
+ (apply map list xss))
+
+(define planfahr
+ (transpose fahrplan))
+
+(define stations-column
+ (index-where planfahr (curry any (lambda (entry) (string-contains? entry "Fahrt")))))
+
+(define stations
+ (list-ref planfahr stations-column))
+
+(define fahrten-starts
+ (indexes-where (list-ref planfahr stations-column) (curry equal? "Fahrt")))
+
+;; which line ranges could contain stops of the same trips?
+(define stop-ranges
+ (drop-right
+ (foldr (lambda (x acc)
+ (cons (range x (caar acc)) acc))
+ `((,(length stations)))
+ fahrten-starts)
+ 1))
+
+;; does this string look like a station name?
+(define (station? str)
+ (or (string-contains? str " ab") (string-contains? str " an")))
+
+;; list of trips, along with trip numbers.
+;; "primitive" because stations have not been merged yet,
+;; i.e. " an" and " ab"-stops are still seperate
+(define trips-primitive
+ ;; this append is here to flatten the list a single level
+ (append*
+ '()
+ ;; map over all columns behind the one that contains station names
+ (filter-map
+ (lambda (col)
+ (let ((stops (filter-map
+ (lambda (chunk)
+ (let ((number (fp-cell (car chunk) col)))
+ ; does this column contain a train number?
+ (if (equal? "" number)
+ #f
+ ; extract all lines that look like they contain stations
+ `(,number
+ ,(filter-map
+ (lambda (line)
+ (if (station? (list-ref stations line))
+ `(,(list-ref stations line)
+ ,(list-ref (list-ref planfahr col) line))
+ #f))
+ chunk)))))
+ stop-ranges)))
+ ; filter out lines that don't define any stops
+ (if (equal? '() stops)
+ #f
+ stops)))
+ (range (add1 stations-column) (length planfahr)))))
+
+
+(define (stop-description->station desc)
+ (string-replace (string-replace desc " ab" "")" an" ""))
+
+;; format time in a gtfs-suitable format
+;; (for now just assumes that it's given times in xx:xx format)
+(define (format-time time)
+ (string-append time ":00"))
+
+(define (stop-time-set dict name stop-time number seq kind)
+ (dict-update dict name
+ (lambda (old)
+ (match old
+ [(list num arr dep id seq)
+ (match kind
+ ['arrival `(,num ,stop-time ,dep ,id ,seq)]
+ ['departure `(,num ,arr ,stop-time ,id ,seq)])]))
+ `(,number ,stop-time ,stop-time ,(station-id name) ,seq)))
+
+;; extracts gtfs-like stops from an internal trip list
+;; as contained in primitive-trips
+(define (primitive-trip->gtfs-stop-times trip)
+ (let ((number (car trip))
+ (stops (car (cdr trip))))
+ (sort
+ (dict-values
+ (car (foldl (lambda (s acc)
+ (let ((name (stop-description->station (car s)))
+ (time (format-time (cadr s))))
+ (cond [(string-contains? (car s) " an")
+ `(,(stop-time-set (car acc) name time number (cdr acc) 'arrival)
+ . ,(add1 (cdr acc)))]
+ [(string-contains? (car s) " ab")
+ `(,(stop-time-set (car acc) name time number (cdr acc) 'departure)
+ . ,(add1 (cdr acc)))])))
+ `(,#hash() . 0)
+ stops)))
+ (lambda (a b) (<= (fifth a) (fifth b))))))
+
+;; content of gtfs stop_times.txt
+(define gtfs-stop-times
+ (append*
+ '((trip_id arrival_time departure_time stop_id stop_sequence))
+ (map primitive-trip->gtfs-stop-times trips-primitive)))
+
+
+
+(define (write-csv table filename)
+ (call-with-output-file
+ (string-append out-prefix filename)
+ #:exists 'replace
+ (lambda (port) (display-table table port))))
+
+
+
+(if (not (directory-exists? out-prefix))
+ (make-directory out-prefix)
+ 'nil)
+
+(write-csv gtfs-stop-times "stop_times.txt")
+(write-csv gtfs-stops "stops.txt")