{-# LANGUAGE DataKinds #-} {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE KindSignatures #-} {-# LANGUAGE StandaloneKindSignatures #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE StandaloneDeriving #-} {-# LANGUAGE TypeApplications #-} {-# LANGUAGE LambdaCase #-} {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE DeriveAnyClass #-} {-# LANGUAGE DerivingStrategies #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE UndecidableInstances #-} module GTFS where import qualified Data.ByteString.Lazy as LB import qualified Data.ByteString as BS import qualified Data.Csv as CSV import Data.Csv ((.:)) import qualified Codec.Archive.Zip as Zip import qualified Data.Vector as V import Data.Vector (Vector) import Text.Regex.TDFA ( (=~) ) import Data.Text (Text) import Fmt ( (+|), (|+) ) import Data.Kind (Type) import Data.Maybe (fromMaybe, fromJust) import Data.Functor ((<&>)) import qualified Data.Time.Calendar.OrdinalDate as Day import Data.Time.Calendar (Day, DayOfWeek(..)) import Data.Time.Calendar.MonthDay (monthAndDayToDayOfYearValid) import Data.Time (getCurrentTime, UTCTime (utctDay), dayOfWeek) import Data.Aeson ( ToJSON, FromJSON, Options(fieldLabelModifier), genericParseJSON, genericToJSON, defaultOptions ) import qualified Data.Aeson as A import GHC.Generics (Generic) -- import Data.Aeson.Generic (Options(fieldLabelModifier), deriveJSON, defaultOptions) import qualified Data.Text as T import Data.Char (toLower) aesonOptions prefix = defaultOptions { fieldLabelModifier = fieldModifier (T.length prefix) } where fieldModifier n label = case drop n label of c:rest -> toLower c : rest "" -> "" newtype Time = Time { toSeconds :: Int } deriving newtype (ToJSON, FromJSON) instance CSV.FromField Time where parseField f = do text :: String <- CSV.parseField f let (_,_,_,subs) = text =~ ("([0-9][0-9]?):([0-9][0-9]?):([0-9][0-9]?)" :: Text) :: (String, String, String, [String]) case subs of [hh,mm,ss] -> pure $ Time $ read hh * 3600 + read mm * 60 + read ss _ -> fail $ "encountered an invalid date: " <> text instance Show Time where show (Time seconds) = "" +|seconds `div` 3600|+":" +|(seconds `mod` 3600) `div` 60|+":" +|seconds `mod` 60|+"" instance CSV.FromField Day where parseField f = do text :: String <- CSV.parseField f let (_,_,_,subs) = text =~ ("([0-9][0-9][0-9][0-9])([0-9][0-9])([0-9][0-9])" :: Text) :: (String, String, String, [String]) case subs of [yyyy,mm,dd] -> do let Just dayOfYear = monthAndDayToDayOfYearValid (Day.isLeapYear (read yyyy)) (read mm) (read dd) pure $ Day.fromOrdinalDate (read yyyy) dayOfYear _ -> fail $ "invalid date encountered: " <> show f data Depth = Shallow | Deep type Switch :: Depth -> Type -> Type -> Type type family Switch c a b where Switch Deep a b = a Switch Shallow a b = b type family Optional c a where Optional Deep a = a Optional Shallow _ = () type StationID = Text type TripID = Text type ServiceID = Text -- | This is what's called a Stop in GTFS data Station = Station { stationId :: StationID , stationName :: Text , stationLat :: Float , stationLon :: Float } deriving (Show, Generic) instance FromJSON Station where parseJSON = genericParseJSON (aesonOptions "station") instance ToJSON Station where toJSON = genericToJSON (aesonOptions "station") -- | This is what's called a stop time in GTFS data Stop (deep :: Depth) = Stop { stopTrip :: TripID , stopArrival :: Time , stopDeparture :: Time , stopStation:: Switch deep Station StationID , stopSequence :: Int } deriving Generic deriving instance Show (Stop 'Shallow) deriving instance Show (Stop 'Deep) instance FromJSON (Switch a Station StationID) => FromJSON (Stop a) where parseJSON = genericParseJSON (aesonOptions "stop") instance ToJSON (Switch a Station StationID) => ToJSON (Stop a) where toJSON = genericToJSON (aesonOptions "stop") data Calendar = Calendar { calServiceId :: Text , calMonday :: Bool , calTuesday :: Bool , calWednesday :: Bool , calThursday :: Bool , calFriday :: Bool , calSaturday :: Bool , calSunday :: Bool , calStartDate :: Day , calEndDate :: Day } deriving (Show, Generic) data CalendarExceptionType = ServiceAdded | ServiceRemoved deriving (Show, Eq, Generic, ToJSON, FromJSON) data CalendarDate = CalendarDate { caldateServiceId :: Text , caldateDate :: Day , caldateExceptionType :: CalendarExceptionType } deriving (Show, Generic) instance FromJSON CalendarDate where parseJSON = genericParseJSON (aesonOptions "caldate") instance ToJSON CalendarDate where toJSON = genericToJSON (aesonOptions "caldate") data Trip (deep :: Depth) = Trip { tripRoute :: Text , tripTripID :: TripID , tripHeadsign :: Maybe Text , tripShortName :: Maybe Text , tripDirection :: Maybe Bool -- NOTE: there's also block_id, which we're unlikely to ever need , tripServiceId :: Text -- , tripWheelchairAccessible :: Bool -- , tripBikesAllowed :: Bool , tripShapeId :: Text , tripStops :: Optional deep (Vector (Stop deep)) } deriving Generic deriving instance Show (Trip Shallow) deriving instance Show (Trip Deep) instance FromJSON (Optional d (Vector (Stop d))) => FromJSON (Trip d) where parseJSON = genericParseJSON (aesonOptions "trip") instance ToJSON (Optional d (Vector (Stop d))) => ToJSON (Trip d) where toJSON = genericToJSON (aesonOptions "trip") -- | helper function to find things in Vectors of things tableLookup :: Eq key => (a -> key) -> key -> Vector a -> Maybe a tableLookup proj key = V.find (\a -> proj a == key) instance CSV.FromNamedRecord Station where parseNamedRecord r = Station <$> r .: "stop_id" <*> r .: "stop_name" <*> r .: "stop_lat" <*> r .: "stop_lon" instance CSV.FromNamedRecord (Stop 'Shallow) where parseNamedRecord r = Stop <$> r .: "trip_id" <*> r .: "arrival_time" <*> r .: "departure_time" <*> r .: "stop_id" <*> r .: "stop_sequence" instance CSV.FromNamedRecord Calendar where parseNamedRecord r = Calendar <$> r .: "service_id" <*> intAsBool' r "monday" <*> intAsBool' r "tuesday" <*> intAsBool' r "wednesday" <*> intAsBool' r "thursday" <*> intAsBool' r "friday" <*> intAsBool' r "saturday" <*> intAsBool' r "sunday" <*> r .: "start_date" <*> r .: "end_date" intAsBool :: CSV.NamedRecord -> BS.ByteString -> CSV.Parser (Maybe Bool) intAsBool r field = do int <- r .: field pure $ case int :: Int of 1 -> Just True 0 -> Just False _ -> Nothing intAsBool' :: CSV.NamedRecord -> BS.ByteString -> CSV.Parser Bool intAsBool' r field = intAsBool r field >>= maybe (fail "unexpected value for a boolean.") pure instance CSV.FromNamedRecord CalendarDate where parseNamedRecord r = CalendarDate <$> r .: "service_id" <*> r .: "date" <*> do int <- r .: "exception_type" case int :: Int of 1 -> pure ServiceAdded 2 -> pure ServiceRemoved _ -> fail $ "unexpected value in exception_type: "+|int|+"." instance CSV.FromNamedRecord (Trip Shallow) where parseNamedRecord r = Trip <$> r .: "route_id" <*> r .: "trip_id" <*> r .: "trip_headsign" <*> r .: "trip_short_name" <*> intAsBool r "direction_id" <*> r .: "service_id" -- NOTE: these aren't booleans but triple-values -- <*> intAsBool r "wheelchair_accessible" -- <*> intAsBool r "bikes_allowed" <*> r .: "shape_id" <*> pure () data GTFS (depth :: Depth) = GTFS { stations :: Vector Station , stops :: Vector (Stop depth) , trips :: Vector (Trip depth) , calendar :: Maybe (Vector Calendar) , calendarDates :: Maybe (Vector CalendarDate) } deriving instance Show (GTFS Shallow) deriving instance Show (GTFS Deep) class Loadable depth where loadGtfs :: FilePath -> IO (GTFS depth) instance Loadable Shallow where loadGtfs path = do zip <- Zip.toArchive <$> LB.readFile "./gtfs.zip" GTFS <$> decodeTable' "stops.txt" zip <*> decodeTable' "stop_times.txt" zip <*> decodeTable' "trips.txt" zip <*> decodeTable "calendar.txt" zip <*> decodeTable "calendar_dates.txt" zip where decodeTable :: CSV.FromNamedRecord a => FilePath -> Zip.Archive -> IO (Maybe (Vector a)) decodeTable path zip = do case Zip.findEntryByPath path zip of Nothing -> pure Nothing Just csv -> case CSV.decodeByName (Zip.fromEntry csv) of Left err -> error $ "could not decode file "+|path|+": "+|err|+"." Right (_,v :: a) -> pure (Just v) decodeTable' path zip = decodeTable path zip >>= \case Nothing -> fail $ "required file "+|path|+" not found in gtfs.zip" Just a -> pure a instance Loadable Deep where loadGtfs path = do shallow <- loadGtfs @Shallow path stops' <- V.mapM (pushStop (stations shallow)) (stops shallow) trips' <- V.mapM (pushTrip stops') (trips shallow) pure $ shallow { stops = stops', trips = trips' } where pushStop :: Vector Station -> Stop Shallow -> IO (Stop Deep) pushStop stations stop = do station <- case tableLookup stationId (stopStation stop) stations of Just a -> pure a Nothing -> fail $ "station with id "+|stopStation stop|+"is mentioned but not defined." pure $ stop { stopStation = station } pushTrip :: Vector (Stop Deep) -> Trip Shallow -> IO (Trip Deep) pushTrip stops trip = if V.length alongRoute < 2 then fail $ "trip with id "+|tripTripID trip|+" has no stops" else pure $ trip { tripStops = alongRoute } where alongRoute = -- TODO: sort these according to stops V.filter (\s -> stopTrip s == tripTripID trip) stops servicesOnDay :: GTFS Deep -> Day -> Vector ServiceID servicesOnDay GTFS{..} day = fmap caldateServiceId added <> V.filter notCancelled regular where (added,removed) = case calendarDates of Nothing -> (mempty,mempty) Just exs -> V.partition (\cd -> caldateExceptionType cd == ServiceAdded) $ V.filter (\cd -> caldateDate cd == day) exs regular = case calendar of Nothing -> mempty Just cs -> V.mapMaybe (\cal -> if isRunning cal then Just (calServiceId cal) else Nothing) cs where isRunning Calendar{..} = day >= calStartDate && day <= calEndDate && case weekday of Monday -> calMonday Tuesday -> calTuesday Wednesday -> calWednesday Thursday -> calThursday Friday -> calFriday Saturday -> calSaturday Sunday -> calSunday weekday = dayOfWeek day notCancelled serviceID = null (tableLookup caldateServiceId serviceID removed) tripsOfService :: GTFS Deep -> ServiceID -> Vector (Trip Deep) tripsOfService GTFS{..} serviceId = V.filter (\trip -> tripServiceId trip == serviceId ) trips -- TODO: this should filter out trips ending there tripsAtStation :: GTFS Deep -> StationID -> Vector TripID tripsAtStation GTFS{..} at = fmap stopTrip stops where stops = V.filter (\(stop :: Stop Deep) -> stationId (stopStation stop) == at) stops tripsOnDay :: GTFS Deep -> Day -> Vector (Trip Deep) tripsOnDay gtfs today = V.concatMap (tripsOfService gtfs) (servicesOnDay gtfs today)