hacky converter script

author: stuebinm 2022-06-12 22:02:45 +0200
committer: stuebinm 2022-06-12 22:02:45 +0200
commit: 87689d245aa57d1da97ff4a431eb3ece58e8304c (patch)
tree: d7192ae6d0d4cc6ca74de8dd2fbd318a7caca355
2 files changed, 190 insertions, 0 deletions
diff --git a/Readme.org b/Readme.org
new file mode 100644
index 0000000..fa557d3
--- /dev/null
+++ b/Readme.org
@@ -0,0 +1,6 @@
+#+TITLE: excel to gtfs converter
+
+a hacky script that converts the idiosyncratic Excel documents used by Ilztalbahn
+to plan train timetables into GTFS (assuming they've been exported to csv first)
+
+might eventually rewrite this mess in Gauche Scheme (or Idris2, if I'm feeling funny)
diff --git a/fahrplan.rkt b/fahrplan.rkt
new file mode 100755
index 0000000..07f2b08
--- /dev/null
+++ b/fahrplan.rkt
@@ -0,0 +1,184 @@
+#!/usr/bin/env racket
+#lang racket/base
+
+(define input-file "fahrplan-2022.csv")
+(define gtfs-prefix "gtfs-template/")
+(define out-prefix "gtfs/")
+
+
+(require racket/string
+         racket/function
+         racket/list
+         racket/match
+         racket/dict)
+
+(require csv-reading
+         csv-writing)
+
+
+(define csvreader
+  (make-csv-reader-maker '((seperator-chars #\,)
+                          (strip-leading-whitespace . #t)
+                          (strip-trailing-whitespace . #t))))
+
+(define (read-csv path)
+  (csv->list (csvreader (open-input-file path))))
+
+
+;; the fahrplan
+(define fahrplan (read-csv input-file))
+
+;; filter out empty lines since they're annoying
+(define gtfs-stops
+  (filter (curry (compose1 not equal?) '(""))
+          (read-csv (string-append gtfs-prefix "stops.txt"))))
+
+;; convert a long station name (from the input)
+;; to an id used in the gtfs
+(define (station-id longname)
+  (let* ((header (car gtfs-stops))
+        (stop-id-col (index-of header "stop_id"))
+        (stop-name-col (index-of header "stop_name"))
+        (lines (filter
+                (lambda (line)
+                  (equal? longname (list-ref line stop-name-col)))
+                gtfs-stops)))
+    (if (empty? lines)
+        (display (format "unknown station name \"~a\"\n" longname))
+        (list-ref (car lines) stop-id-col))))
+
+;; fahrplan, but as a vector
+(define fahrplanvec
+  (list->vector (map list->vector fahrplan)))
+
+;; a single cell in the csv
+(define (fp-cell i j)
+  (vector-ref (vector-ref fahrplanvec i) j))
+
+;; does anything in the list fullfil pred?
+(define (any pred xs)
+  (foldr (lambda (a b) (or a b)) #f (map pred xs)))
+
+
+;; transpose a table
+(define (transpose xss)
+  (apply map list xss))
+
+(define planfahr
+  (transpose fahrplan))
+
+(define stations-column
+  (index-where planfahr (curry any (lambda (entry) (string-contains? entry "Fahrt")))))
+
+(define stations
+  (list-ref planfahr stations-column))
+
+(define fahrten-starts
+  (indexes-where (list-ref planfahr stations-column) (curry equal? "Fahrt")))
+
+;; which line ranges could contain stops of the same trips?
+(define stop-ranges
+  (drop-right
+   (foldr (lambda (x acc)
+            (cons (range x (caar acc)) acc))
+          `((,(length stations)))
+          fahrten-starts)
+   1))
+
+;; does this string look like a station name?
+(define (station? str)
+  (or (string-contains? str " ab") (string-contains? str " an")))
+
+;; list of trips, along with trip numbers.
+;; "primitive" because stations have not been merged yet,
+;; i.e. " an" and " ab"-stops are still seperate
+(define trips-primitive
+  ;; this append is here to flatten the list a single level
+  (append*
+   '()
+   ;; map over all columns behind the one that contains station names
+   (filter-map
+    (lambda (col)
+      (let ((stops (filter-map
+                    (lambda (chunk)
+                      (let ((number (fp-cell (car chunk) col)))
+                        ; does this column contain a train number?
+                        (if (equal? "" number)
+                            #f
+                            ; extract all lines that look like they contain stations
+                            `(,number
+                              ,(filter-map
+                                (lambda (line)
+                                  (if (station? (list-ref stations line))
+                                      `(,(list-ref stations line)
+                                        ,(list-ref (list-ref planfahr col) line))
+                                      #f))
+                                chunk)))))
+                    stop-ranges)))
+        ; filter out lines that don't define any stops
+        (if (equal? '() stops)
+            #f
+            stops)))
+    (range (add1 stations-column) (length planfahr)))))
+
+
+(define (stop-description->station desc)
+  (string-replace (string-replace desc " ab" "")" an" ""))
+
+;; format time in a gtfs-suitable format
+;; (for now just assumes that it's given times in xx:xx format)
+(define (format-time time)
+  (string-append time ":00"))
+
+(define (stop-time-set dict name stop-time number seq kind)
+  (dict-update dict name
+               (lambda (old)
+                 (match old
+                   [(list num arr dep id seq)
+                    (match kind
+                      ['arrival `(,num ,stop-time ,dep ,id ,seq)]
+                      ['departure `(,num ,arr ,stop-time ,id ,seq)])]))
+               `(,number ,stop-time ,stop-time ,(station-id name) ,seq)))
+
+;; extracts gtfs-like stops from an internal trip list
+;; as contained in primitive-trips
+(define (primitive-trip->gtfs-stop-times trip)
+  (let ((number (car trip))
+        (stops (car (cdr trip))))
+    (sort
+     (dict-values
+      (car (foldl (lambda (s acc)
+                    (let ((name (stop-description->station (car s)))
+                          (time (format-time (cadr s))))
+                      (cond [(string-contains? (car s) " an")
+                             `(,(stop-time-set (car acc) name time number (cdr acc) 'arrival)
+                               . ,(add1 (cdr acc)))]
+                            [(string-contains? (car s) " ab")
+                             `(,(stop-time-set (car acc) name time number (cdr acc) 'departure)
+                               . ,(add1 (cdr acc)))])))
+                  `(,#hash() . 0)
+                  stops)))
+     (lambda (a b) (<= (fifth a) (fifth b))))))
+
+;; content of gtfs stop_times.txt
+(define gtfs-stop-times
+  (append*
+   '((trip_id arrival_time departure_time stop_id stop_sequence))
+   (map primitive-trip->gtfs-stop-times trips-primitive)))
+
+
+
+(define (write-csv table filename)
+  (call-with-output-file
+    (string-append out-prefix filename)
+    #:exists 'replace
+    (lambda (port) (display-table table port))))
+
+
+
+(if (not (directory-exists? out-prefix))
+    (make-directory out-prefix)
+    'nil)
+
+(write-csv gtfs-stop-times "stop_times.txt")
+(write-csv gtfs-stops "stops.txt")
author	stuebinm	2022-06-12 22:02:45 +0200
committer	stuebinm	2022-06-12 22:02:45 +0200
commit	87689d245aa57d1da97ff4a431eb3ece58e8304c (patch)
tree	d7192ae6d0d4cc6ca74de8dd2fbd318a7caca355