aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorstuebinm2022-06-05 00:03:43 +0200
committerstuebinm2022-06-05 00:03:43 +0200
commit35bb210c9aced65795ba09a5ed30e9d28a89dc3b (patch)
treeb38e9c661b9af798beab700636bc16670a997808
initial hacking
-rw-r--r--.gitignore1
-rw-r--r--CHANGELOG.md5
-rw-r--r--app/Main.hs308
-rw-r--r--haskell-gtfs.cabal44
-rw-r--r--hie.yaml4
-rw-r--r--shell.nix5
6 files changed, 367 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..b5e3679
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+dist-newstyle/*
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 0000000..1df15d0
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,5 @@
+# Revision history for haskell-gtfs
+
+## 0.1.0.0 -- YYYY-mm-dd
+
+* First version. Released on an unsuspecting world.
diff --git a/app/Main.hs b/app/Main.hs
new file mode 100644
index 0000000..31c9882
--- /dev/null
+++ b/app/Main.hs
@@ -0,0 +1,308 @@
+{-# LANGUAGE DataKinds #-}
+{-# LANGUAGE TypeFamilies #-}
+{-# LANGUAGE KindSignatures #-}
+{-# LANGUAGE StandaloneKindSignatures #-}
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE StandaloneDeriving #-}
+{-# LANGUAGE TypeApplications #-}
+{-# LANGUAGE LambdaCase #-}
+{-# LANGUAGE RecordWildCards #-}
+
+
+module Main where
+
+
+import qualified Data.ByteString.Lazy as LB
+import qualified Data.ByteString as BS
+import qualified Data.Csv as CSV
+import Data.Csv ((.:))
+import qualified Codec.Archive.Zip as Zip
+import qualified Data.Vector as V
+import Data.Vector (Vector)
+import Text.Regex.TDFA ( (=~) )
+import Data.Text (Text)
+import Fmt ( (+|), (|+) )
+import Data.Kind (Type)
+import Data.Maybe (fromMaybe, fromJust)
+import Data.Functor ((<&>))
+import qualified Data.Time.Calendar.OrdinalDate as Day
+import Data.Time.Calendar.OrdinalDate (Day)
+import Data.Time.Calendar.MonthDay (monthAndDayToDayOfYearValid)
+import Data.Time (getCurrentTime, UTCTime (utctDay), dayOfWeek)
+import Data.Time.Calendar.WeekDate (DayOfWeek(..))
+
+
+newtype Time = Time { toSeconds :: Int }
+
+instance CSV.FromField Time where
+ parseField f = do
+ text :: String <- CSV.parseField f
+ let (_,_,_,subs) = text =~ ("([0-9][0-9]?):([0-9][0-9]?):([0-9][0-9]?)" :: Text)
+ :: (String, String, String, [String])
+ case subs of
+ [hh,mm,ss] -> pure $ Time $ read hh * 3600 + read mm * 60 + read ss
+ _ -> fail $ "encountered an invalid date: " <> text
+
+instance Show Time where
+ show (Time seconds) = ""
+ +|seconds `div` 3600|+":"
+ +|(seconds `mod` 3600) `div` 60|+":"
+ +|seconds `mod` 60|+""
+
+instance CSV.FromField Day where
+ parseField f = do
+ text :: String <- CSV.parseField f
+ let (_,_,_,subs) = text =~ ("([0-9][0-9][0-9][0-9])([0-9][0-9])([0-9][0-9])" :: Text)
+ :: (String, String, String, [String])
+ case subs of
+ [yyyy,mm,dd] -> do
+ let Just dayOfYear = monthAndDayToDayOfYearValid (Day.isLeapYear (read yyyy)) (read mm) (read dd)
+ pure $ Day.fromOrdinalDate (read yyyy) dayOfYear
+ _ -> fail $ "invalid date encountered: " <> show f
+
+
+
+data Depth = Shallow | Deep
+type Switch :: Depth -> Type -> Type -> Type
+type family Switch c a b where
+ Switch Deep a b = a
+ Switch Shallow a b = b
+
+type StationID = Text
+
+-- | This is what's called a Stop in GTFS
+data Station = Station
+ { stationId :: StationID
+ , stationName :: Text
+ , stationLat :: Float
+ , stationLon :: Float
+ } deriving Show
+
+-- | This is what's called a stop time in GTFS
+data Stop (deep :: Depth) = Stop
+ { stopTrip :: Int
+ , stopArrival :: Time
+ , stopDeparture :: Time
+ , stopStation:: Switch deep Station StationID
+ , stopSequence :: Int
+ }
+
+deriving instance Show (Stop 'Shallow)
+deriving instance Show (Stop 'Deep)
+
+
+data Calendar = Calendar
+ { calServiceId :: Text
+ , calMonday :: Bool
+ , calTuesday :: Bool
+ , calWednesday :: Bool
+ , calThursday :: Bool
+ , calFriday :: Bool
+ , calSaturday :: Bool
+ , calSunday :: Bool
+ , calStartDate :: Day
+ , calEndDate :: Day
+ } deriving Show
+
+data CalendarExceptionType = ServiceAdded | ServiceRemoved
+ deriving (Show, Eq)
+
+data CalendarDate = CalendarDate
+ { caldateServiceId :: Text
+ , caldateDate :: Day
+ , caldateExceptionType :: CalendarExceptionType
+ } deriving Show
+
+type TripID = Text
+type ServiceID = Text
+
+data Trip = Trip
+ { tripRoute :: Text
+ , tripTripID :: TripID
+ , tripHeadsign :: Maybe Text
+ , tripShortName :: Maybe Text
+ , tripDirection :: Maybe Bool
+ -- NOTE: there's also block_id, which we're unlikely to ever need
+ , tripServiceId :: Text
+ -- , tripWheelchairAccessible :: Bool
+ -- , tripBikesAllowed :: Bool
+ , tripShapeId :: Text
+ } deriving Show
+
+-- | helper function to find things in Vectors of things
+tableLookup :: Eq key => (a -> key) -> key -> Vector a -> Maybe a
+tableLookup proj key = V.find (\a -> proj a == key)
+
+instance CSV.FromNamedRecord Station where
+ parseNamedRecord r = Station
+ <$> r .: "stop_id"
+ <*> r .: "stop_name"
+ <*> r .: "stop_lat"
+ <*> r .: "stop_lon"
+
+instance CSV.FromNamedRecord (Stop 'Shallow) where
+ parseNamedRecord r = Stop
+ <$> r .: "trip_id"
+ <*> r .: "arrival_time"
+ <*> r .: "departure_time"
+ <*> r .: "stop_id"
+ <*> r .: "stop_sequence"
+
+instance CSV.FromNamedRecord Calendar where
+ parseNamedRecord r = Calendar
+ <$> r .: "service_id"
+ <*> intAsBool' r "monday"
+ <*> intAsBool' r "tuesday"
+ <*> intAsBool' r "wednesday"
+ <*> intAsBool' r "thursday"
+ <*> intAsBool' r "friday"
+ <*> intAsBool' r "saturday"
+ <*> intAsBool' r "sunday"
+ <*> r .: "start_date"
+ <*> r .: "end_date"
+
+intAsBool :: CSV.NamedRecord -> BS.ByteString -> CSV.Parser (Maybe Bool)
+intAsBool r field = do
+ int <- r .: field
+ pure $ case int :: Int of
+ 1 -> Just True
+ 0 -> Just False
+ _ -> Nothing
+
+intAsBool' :: CSV.NamedRecord -> BS.ByteString -> CSV.Parser Bool
+intAsBool' r field = intAsBool r field >>= maybe
+ (fail "unexpected value for a boolean.")
+ pure
+
+
+instance CSV.FromNamedRecord CalendarDate where
+ parseNamedRecord r = CalendarDate
+ <$> r .: "service_id"
+ <*> r .: "date"
+ <*> do
+ int <- r .: "exception_type"
+ case int :: Int of
+ 1 -> pure ServiceAdded
+ 2 -> pure ServiceRemoved
+ _ -> fail $ "unexpected value in exception_type: "+|int|+"."
+
+
+instance CSV.FromNamedRecord Trip where
+ parseNamedRecord r = Trip
+ <$> r .: "route_id"
+ <*> r .: "trip_id"
+ <*> r .: "trip_headsign"
+ <*> r .: "trip_short_name"
+ <*> intAsBool r "direction_id"
+ <*> r .: "service_id"
+ -- NOTE: these aren't booleans but triple-values
+ -- <*> intAsBool r "wheelchair_accessible"
+ -- <*> intAsBool r "bikes_allowed"
+ <*> r .: "shape_id"
+
+data GTFS (depth :: Depth) = GTFS
+ { stations :: Vector Station
+ , stops :: Vector (Stop depth)
+ , trips :: Vector Trip
+ , calendar :: Maybe (Vector Calendar)
+ , calendarDates :: Maybe (Vector CalendarDate)
+ }
+
+deriving instance Show (GTFS Shallow)
+deriving instance Show (GTFS Deep)
+
+class Loadable depth where
+ loadGtfs :: FilePath -> IO (GTFS depth)
+
+instance Loadable Shallow where
+ loadGtfs path = do
+ zip <- Zip.toArchive <$> LB.readFile "./gtfs.zip"
+ GTFS <$> decodeTable' "stops.txt" zip
+ <*> decodeTable' "stop_times.txt" zip
+ <*> decodeTable' "trips.txt" zip
+ <*> decodeTable "calendar.txt" zip
+ <*> decodeTable "calendar_dates.txt" zip
+ where
+ decodeTable :: CSV.FromNamedRecord a => FilePath -> Zip.Archive -> IO (Maybe (Vector a))
+ decodeTable path zip = do
+ case Zip.findEntryByPath path zip of
+ Nothing -> pure Nothing
+ Just csv -> case CSV.decodeByName (Zip.fromEntry csv) of
+ Left err -> error "blah"
+ Right (_,v :: a) -> pure (Just v)
+ decodeTable' path zip =
+ decodeTable path zip >>= \case
+ Nothing -> fail $ "required file "+|path|+" not found in gtfs.zip"
+ Just a -> pure a
+
+instance Loadable Deep where
+ loadGtfs path = do
+ shallow <- loadGtfs @Shallow path
+ stops' <- V.mapM (pushStop (stations shallow)) (stops shallow)
+ pure $ shallow { stops = stops' }
+ where
+ pushStop :: Vector Station -> Stop Shallow -> IO (Stop Deep)
+ pushStop stations stop = do
+ station <- case tableLookup stationId (stopStation stop) stations of
+ Just a -> pure a
+ Nothing -> fail $ "station with id "+|stopStation stop|+"is mentioned but not defined."
+ pure $ stop { stopStation = station }
+
+
+
+servicesOnDay :: GTFS Deep -> Day -> Vector ServiceID
+servicesOnDay GTFS{..} day =
+ fmap caldateServiceId added <> V.filter notCancelled regular
+ where (added,removed) = case calendarDates of
+ Nothing -> (mempty,mempty)
+ Just exs ->
+ V.partition (\cd -> caldateExceptionType cd == ServiceAdded)
+ $ V.filter (\cd -> caldateDate cd == day) exs
+ regular = case calendar of
+ Nothing -> mempty
+ Just cs -> V.mapMaybe (\cal -> if isRunning cal then Just (calServiceId cal) else Nothing) cs
+ where isRunning Calendar{..} =
+ day >= calStartDate &&
+ day <= calEndDate &&
+ case weekday of
+ Monday -> calMonday
+ Tuesday -> calTuesday
+ Wednesday -> calWednesday
+ Thursday -> calThursday
+ Friday -> calFriday
+ Saturday -> calSaturday
+ Sunday -> calSunday
+ weekday = dayOfWeek day
+ notCancelled serviceID =
+ null (tableLookup caldateServiceId serviceID removed)
+
+tripsOfService :: GTFS Deep -> ServiceID -> Vector Trip
+tripsOfService GTFS{..} serviceId =
+ V.filter (\trip -> tripServiceId trip == serviceId ) trips
+
+
+main :: IO ()
+main = do
+ gtfs <- loadGtfs @Deep "./gtfs.zip"
+
+ -- today <- getCurrentTime <&> utctDay
+ -- print (calendar gtfs)
+ let today = Day.fromOrdinalDate 2022 (fromJust $ monthAndDayToDayOfYearValid False 6 6)
+ print today
+
+ putStrLn "trips today:"
+ print (fmap (tripsOfService gtfs) (servicesOnDay gtfs today))
+
+
+{-
+TODO:
+there should be a basic API allowing the questions:
+ - what are the next trips leaving from $station? (or $geolocation?)
+ - all stops of a given tripID
+
+then the "ingress" API:
+ - train ping (location, estimated delay, etc.)
+ - cancel trip
+ - add trip?
+
+-}
diff --git a/haskell-gtfs.cabal b/haskell-gtfs.cabal
new file mode 100644
index 0000000..642c5d9
--- /dev/null
+++ b/haskell-gtfs.cabal
@@ -0,0 +1,44 @@
+cabal-version: 2.4
+name: haskell-gtfs
+version: 0.1.0.0
+
+-- A short (one-line) description of the package.
+-- synopsis:
+
+-- A longer description of the package.
+-- description:
+
+-- A URL where users can report bugs.
+-- bug-reports:
+
+-- The license under which the package is released.
+-- license:
+author: stuebinm
+maintainer: stuebinm@disroot.org
+
+-- A copyright notice.
+-- copyright:
+-- category:
+extra-source-files: CHANGELOG.md
+
+executable haskell-gtfs
+ main-is: Main.hs
+
+ -- Modules included in this executable, other than Main.
+ -- other-modules:
+
+ -- LANGUAGE extensions used by modules in this package.
+ -- other-extensions:
+ build-depends: base ^>=4.14.3.0
+ , zip-archive >= 0.4.2.1
+ , cassava >= 0.5.2.0
+ , bytestring >= 0.10.10.0
+ , vector >= 0.12.3.1
+ , regex-tdfa >= 1.3.1.2
+ , text >= 2.0
+ , fmt >= 0.6.3.0
+ , time >= 1.12.2
+ hs-source-dirs: app
+ default-language: Haskell2010
+ default-extensions: OverloadedStrings
+ , ScopedTypeVariables
diff --git a/hie.yaml b/hie.yaml
new file mode 100644
index 0000000..7cca5eb
--- /dev/null
+++ b/hie.yaml
@@ -0,0 +1,4 @@
+cradle:
+ cabal:
+ - path: "app/Main.hs"
+ component: "haskell-gtfs:exe:haskell-gtfs"
diff --git a/shell.nix b/shell.nix
new file mode 100644
index 0000000..e010081
--- /dev/null
+++ b/shell.nix
@@ -0,0 +1,5 @@
+{ pkgs ? import <nixpkgs> {} }:
+
+pkgs.mkShell {
+ buildInputs = [ pkgs.zlib pkgs.openssh ];
+}