aboutsummaryrefslogtreecommitdiff
path: root/lib/GTFS.hs
diff options
context:
space:
mode:
Diffstat (limited to 'lib/GTFS.hs')
-rw-r--r--lib/GTFS.hs314
1 files changed, 314 insertions, 0 deletions
diff --git a/lib/GTFS.hs b/lib/GTFS.hs
new file mode 100644
index 0000000..cadc930
--- /dev/null
+++ b/lib/GTFS.hs
@@ -0,0 +1,314 @@
+{-# LANGUAGE DataKinds #-}
+{-# LANGUAGE TypeFamilies #-}
+{-# LANGUAGE KindSignatures #-}
+{-# LANGUAGE StandaloneKindSignatures #-}
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE StandaloneDeriving #-}
+{-# LANGUAGE TypeApplications #-}
+{-# LANGUAGE LambdaCase #-}
+{-# LANGUAGE RecordWildCards #-}
+{-# LANGUAGE DeriveGeneric #-}
+{-# LANGUAGE DeriveAnyClass #-}
+{-# LANGUAGE DerivingStrategies #-}
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+
+
+module GTFS where
+
+
+import qualified Data.ByteString.Lazy as LB
+import qualified Data.ByteString as BS
+import qualified Data.Csv as CSV
+import Data.Csv ((.:))
+import qualified Codec.Archive.Zip as Zip
+import qualified Data.Vector as V
+import Data.Vector (Vector)
+import Text.Regex.TDFA ( (=~) )
+import Data.Text (Text)
+import Fmt ( (+|), (|+) )
+import Data.Kind (Type)
+import Data.Maybe (fromMaybe, fromJust)
+import Data.Functor ((<&>))
+import qualified Data.Time.Calendar.OrdinalDate as Day
+import Data.Time.Calendar (Day, DayOfWeek(..))
+import Data.Time.Calendar.MonthDay (monthAndDayToDayOfYearValid)
+import Data.Time (getCurrentTime, UTCTime (utctDay), dayOfWeek)
+import Data.Aeson (ToJSON, FromJSON)
+import qualified Data.Aeson as A
+import GHC.Generics (Generic)
+
+
+newtype Time = Time { toSeconds :: Int }
+ deriving newtype (ToJSON, FromJSON)
+
+instance CSV.FromField Time where
+ parseField f = do
+ text :: String <- CSV.parseField f
+ let (_,_,_,subs) = text =~ ("([0-9][0-9]?):([0-9][0-9]?):([0-9][0-9]?)" :: Text)
+ :: (String, String, String, [String])
+ case subs of
+ [hh,mm,ss] -> pure $ Time $ read hh * 3600 + read mm * 60 + read ss
+ _ -> fail $ "encountered an invalid date: " <> text
+
+instance Show Time where
+ show (Time seconds) = ""
+ +|seconds `div` 3600|+":"
+ +|(seconds `mod` 3600) `div` 60|+":"
+ +|seconds `mod` 60|+""
+
+instance CSV.FromField Day where
+ parseField f = do
+ text :: String <- CSV.parseField f
+ let (_,_,_,subs) = text =~ ("([0-9][0-9][0-9][0-9])([0-9][0-9])([0-9][0-9])" :: Text)
+ :: (String, String, String, [String])
+ case subs of
+ [yyyy,mm,dd] -> do
+ let Just dayOfYear = monthAndDayToDayOfYearValid (Day.isLeapYear (read yyyy)) (read mm) (read dd)
+ pure $ Day.fromOrdinalDate (read yyyy) dayOfYear
+ _ -> fail $ "invalid date encountered: " <> show f
+
+
+
+data Depth = Shallow | Deep
+type Switch :: Depth -> Type -> Type -> Type
+type family Switch c a b where
+ Switch Deep a b = a
+ Switch Shallow a b = b
+type family Optional c a where
+ Optional Deep a = a
+ Optional Shallow _ = ()
+
+type StationID = Text
+type TripID = Text
+type ServiceID = Text
+
+
+-- | This is what's called a Stop in GTFS
+data Station = Station
+ { stationId :: StationID
+ , stationName :: Text
+ , stationLat :: Float
+ , stationLon :: Float
+ } deriving (Show, Generic, ToJSON)
+
+-- | This is what's called a stop time in GTFS
+data Stop (deep :: Depth) = Stop
+ { stopTrip :: TripID
+ , stopArrival :: Time
+ , stopDeparture :: Time
+ , stopStation:: Switch deep Station StationID
+ , stopSequence :: Int
+ } deriving Generic
+
+deriving instance Show (Stop 'Shallow)
+deriving instance Show (Stop 'Deep)
+deriving instance ToJSON (Stop 'Deep)
+
+
+data Calendar = Calendar
+ { calServiceId :: Text
+ , calMonday :: Bool
+ , calTuesday :: Bool
+ , calWednesday :: Bool
+ , calThursday :: Bool
+ , calFriday :: Bool
+ , calSaturday :: Bool
+ , calSunday :: Bool
+ , calStartDate :: Day
+ , calEndDate :: Day
+ } deriving (Show, Generic, ToJSON)
+
+data CalendarExceptionType = ServiceAdded | ServiceRemoved
+ deriving (Show, Eq, Generic, ToJSON)
+
+data CalendarDate = CalendarDate
+ { caldateServiceId :: Text
+ , caldateDate :: Day
+ , caldateExceptionType :: CalendarExceptionType
+ } deriving (Show, Generic, ToJSON)
+
+data Trip (deep :: Depth) = Trip
+ { tripRoute :: Text
+ , tripTripID :: TripID
+ , tripHeadsign :: Maybe Text
+ , tripShortName :: Maybe Text
+ , tripDirection :: Maybe Bool
+ -- NOTE: there's also block_id, which we're unlikely to ever need
+ , tripServiceId :: Text
+ -- , tripWheelchairAccessible :: Bool
+ -- , tripBikesAllowed :: Bool
+ , tripShapeId :: Text
+ , tripStops :: Optional deep (Vector (Stop deep))
+ } deriving Generic
+
+deriving instance Show (Trip Shallow)
+deriving instance Show (Trip Deep)
+deriving instance ToJSON (Trip Deep)
+
+-- | helper function to find things in Vectors of things
+tableLookup :: Eq key => (a -> key) -> key -> Vector a -> Maybe a
+tableLookup proj key = V.find (\a -> proj a == key)
+
+instance CSV.FromNamedRecord Station where
+ parseNamedRecord r = Station
+ <$> r .: "stop_id"
+ <*> r .: "stop_name"
+ <*> r .: "stop_lat"
+ <*> r .: "stop_lon"
+
+instance CSV.FromNamedRecord (Stop 'Shallow) where
+ parseNamedRecord r = Stop
+ <$> r .: "trip_id"
+ <*> r .: "arrival_time"
+ <*> r .: "departure_time"
+ <*> r .: "stop_id"
+ <*> r .: "stop_sequence"
+
+instance CSV.FromNamedRecord Calendar where
+ parseNamedRecord r = Calendar
+ <$> r .: "service_id"
+ <*> intAsBool' r "monday"
+ <*> intAsBool' r "tuesday"
+ <*> intAsBool' r "wednesday"
+ <*> intAsBool' r "thursday"
+ <*> intAsBool' r "friday"
+ <*> intAsBool' r "saturday"
+ <*> intAsBool' r "sunday"
+ <*> r .: "start_date"
+ <*> r .: "end_date"
+
+intAsBool :: CSV.NamedRecord -> BS.ByteString -> CSV.Parser (Maybe Bool)
+intAsBool r field = do
+ int <- r .: field
+ pure $ case int :: Int of
+ 1 -> Just True
+ 0 -> Just False
+ _ -> Nothing
+
+intAsBool' :: CSV.NamedRecord -> BS.ByteString -> CSV.Parser Bool
+intAsBool' r field = intAsBool r field >>= maybe
+ (fail "unexpected value for a boolean.")
+ pure
+
+
+instance CSV.FromNamedRecord CalendarDate where
+ parseNamedRecord r = CalendarDate
+ <$> r .: "service_id"
+ <*> r .: "date"
+ <*> do
+ int <- r .: "exception_type"
+ case int :: Int of
+ 1 -> pure ServiceAdded
+ 2 -> pure ServiceRemoved
+ _ -> fail $ "unexpected value in exception_type: "+|int|+"."
+
+
+instance CSV.FromNamedRecord (Trip Shallow) where
+ parseNamedRecord r = Trip
+ <$> r .: "route_id"
+ <*> r .: "trip_id"
+ <*> r .: "trip_headsign"
+ <*> r .: "trip_short_name"
+ <*> intAsBool r "direction_id"
+ <*> r .: "service_id"
+ -- NOTE: these aren't booleans but triple-values
+ -- <*> intAsBool r "wheelchair_accessible"
+ -- <*> intAsBool r "bikes_allowed"
+ <*> r .: "shape_id"
+ <*> pure ()
+
+data GTFS (depth :: Depth) = GTFS
+ { stations :: Vector Station
+ , stops :: Vector (Stop depth)
+ , trips :: Vector (Trip depth)
+ , calendar :: Maybe (Vector Calendar)
+ , calendarDates :: Maybe (Vector CalendarDate)
+ }
+
+deriving instance Show (GTFS Shallow)
+deriving instance Show (GTFS Deep)
+
+class Loadable depth where
+ loadGtfs :: FilePath -> IO (GTFS depth)
+
+instance Loadable Shallow where
+ loadGtfs path = do
+ zip <- Zip.toArchive <$> LB.readFile "./gtfs.zip"
+ GTFS <$> decodeTable' "stops.txt" zip
+ <*> decodeTable' "stop_times.txt" zip
+ <*> decodeTable' "trips.txt" zip
+ <*> decodeTable "calendar.txt" zip
+ <*> decodeTable "calendar_dates.txt" zip
+ where
+ decodeTable :: CSV.FromNamedRecord a => FilePath -> Zip.Archive -> IO (Maybe (Vector a))
+ decodeTable path zip = do
+ case Zip.findEntryByPath path zip of
+ Nothing -> pure Nothing
+ Just csv -> case CSV.decodeByName (Zip.fromEntry csv) of
+ Left err -> error $ "could not decode file "+|path|+": "+|err|+"."
+ Right (_,v :: a) -> pure (Just v)
+ decodeTable' path zip =
+ decodeTable path zip >>= \case
+ Nothing -> fail $ "required file "+|path|+" not found in gtfs.zip"
+ Just a -> pure a
+
+instance Loadable Deep where
+ loadGtfs path = do
+ shallow <- loadGtfs @Shallow path
+ stops' <- V.mapM (pushStop (stations shallow)) (stops shallow)
+ trips' <- V.mapM (pushTrip stops') (trips shallow)
+ pure $ shallow { stops = stops', trips = trips' }
+ where
+ pushStop :: Vector Station -> Stop Shallow -> IO (Stop Deep)
+ pushStop stations stop = do
+ station <- case tableLookup stationId (stopStation stop) stations of
+ Just a -> pure a
+ Nothing -> fail $ "station with id "+|stopStation stop|+"is mentioned but not defined."
+ pure $ stop { stopStation = station }
+ pushTrip :: Vector (Stop Deep) -> Trip Shallow -> IO (Trip Deep)
+ pushTrip stops trip = if V.length alongRoute < 2
+ then fail $ "trip with id "+|tripTripID trip|+" has no stops"
+ else pure $ trip { tripStops = alongRoute }
+ where alongRoute = -- TODO: sort these according to stops
+ V.filter (\s -> stopTrip s == tripTripID trip) stops
+
+
+
+servicesOnDay :: GTFS Deep -> Day -> Vector ServiceID
+servicesOnDay GTFS{..} day =
+ fmap caldateServiceId added <> V.filter notCancelled regular
+ where (added,removed) = case calendarDates of
+ Nothing -> (mempty,mempty)
+ Just exs ->
+ V.partition (\cd -> caldateExceptionType cd == ServiceAdded)
+ $ V.filter (\cd -> caldateDate cd == day) exs
+ regular = case calendar of
+ Nothing -> mempty
+ Just cs -> V.mapMaybe (\cal -> if isRunning cal then Just (calServiceId cal) else Nothing) cs
+ where isRunning Calendar{..} =
+ day >= calStartDate &&
+ day <= calEndDate &&
+ case weekday of
+ Monday -> calMonday
+ Tuesday -> calTuesday
+ Wednesday -> calWednesday
+ Thursday -> calThursday
+ Friday -> calFriday
+ Saturday -> calSaturday
+ Sunday -> calSunday
+ weekday = dayOfWeek day
+ notCancelled serviceID =
+ null (tableLookup caldateServiceId serviceID removed)
+
+tripsOfService :: GTFS Deep -> ServiceID -> Vector (Trip Deep)
+tripsOfService GTFS{..} serviceId =
+ V.filter (\trip -> tripServiceId trip == serviceId ) trips
+
+-- TODO: this should filter out trips ending there
+tripsAtStation :: GTFS Deep -> StationID -> Vector TripID
+tripsAtStation GTFS{..} at = fmap stopTrip stops
+ where
+ stops = V.filter (\(stop :: Stop Deep) -> stationId (stopStation stop) == at) stops
+
+tripsOnDay :: GTFS Deep -> Day -> Vector (Trip Deep)
+tripsOnDay gtfs today = V.concatMap (tripsOfService gtfs) (servicesOnDay gtfs today)