{-# LANGUAGE DataKinds #-} {-# LANGUAGE DeriveAnyClass #-} {-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE DerivingStrategies #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE KindSignatures #-} {-# LANGUAGE LambdaCase #-} {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE StandaloneDeriving #-} {-# LANGUAGE StandaloneKindSignatures #-} {-# LANGUAGE TupleSections #-} {-# LANGUAGE TypeApplications #-} {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE UndecidableInstances #-} module GTFS where import qualified Codec.Archive.Zip as Zip import Data.Aeson (FromJSON, Options (fieldLabelModifier), ToJSON, defaultOptions, genericParseJSON, genericToJSON) import qualified Data.Aeson as A import qualified Data.ByteString as BS import qualified Data.ByteString.Lazy as LB import Data.Csv ((.:)) import qualified Data.Csv as CSV import Data.Functor ((<&>)) import Data.Kind (Type) import Data.Maybe (fromJust, fromMaybe) import Data.Text (Text) import Data.Time (UTCTime (utctDay), dayOfWeek, getCurrentTime) import Data.Time.Calendar (Day, DayOfWeek (..)) import Data.Time.Calendar.MonthDay (monthAndDayToDayOfYearValid) import qualified Data.Time.Calendar.OrdinalDate as Day import Data.Vector (Vector) import qualified Data.Vector as V import Fmt ((+|), (|+)) import GHC.Generics (Generic) import Text.Regex.TDFA ((=~)) -- import Data.Aeson.Generic (Options(fieldLabelModifier), deriveJSON, defaultOptions) import Control.Lens import Data.Char (toLower) import Data.Foldable (Foldable (fold)) import Data.Map (Map) import qualified Data.Map as M import Data.Proxy (Proxy (Proxy)) import Data.Swagger (ParamSchema (..), SchemaOptions, ToSchema (declareNamedSchema), defaultSchemaOptions, genericDeclareNamedSchema) import qualified Data.Swagger as S import qualified Data.Text as T aesonOptions prefix = defaultOptions { fieldLabelModifier = fieldModifier (T.length prefix) } where fieldModifier n label = case drop n label of c:rest -> toLower c : rest "" -> "" swaggerOptions :: Text -> SchemaOptions swaggerOptions prefix = defaultSchemaOptions { S.fieldLabelModifier = fieldModifier (T.length prefix) } where fieldModifier n label = case drop n label of c:rest -> toLower c : rest "" -> "" newtype Time = Time { toSeconds :: Int } deriving newtype (ToJSON, FromJSON) deriving (Generic) instance CSV.FromField Time where parseField f = do text :: String <- CSV.parseField f let (_,_,_,subs) = text =~ ("([0-9][0-9]?):([0-9][0-9]?):([0-9][0-9]?)" :: Text) :: (String, String, String, [String]) case subs of [hh,mm,ss] -> pure $ Time $ read hh * 3600 + read mm * 60 + read ss _ -> fail $ "encountered an invalid date: " <> text instance Show Time where show (Time seconds) = "" +|seconds `div` 3600|+":" +|(seconds `mod` 3600) `div` 60|+":" +|seconds `mod` 60|+"" instance CSV.FromField Day where parseField f = do text :: String <- CSV.parseField f let (_,_,_,subs) = text =~ ("([0-9][0-9][0-9][0-9])([0-9][0-9])([0-9][0-9])" :: Text) :: (String, String, String, [String]) case subs of [yyyy,mm,dd] -> do let Just dayOfYear = monthAndDayToDayOfYearValid (Day.isLeapYear (read yyyy)) (read mm) (read dd) pure $ Day.fromOrdinalDate (read yyyy) dayOfYear _ -> fail $ "invalid date encountered: " <> show f instance ToSchema Time where declareNamedSchema _ = do dings <- declareNamedSchema (Proxy @Int) pure $ (set (S.schema . S.description) (Just "Zeit in Sekunden seit Tagesanfang") dings) data Depth = Shallow | Deep type Switch :: Depth -> Type -> Type -> Type type family Switch c a b where Switch Deep a b = a Switch Shallow a b = b type family Optional c a where Optional Deep a = a Optional Shallow _ = () type StationID = Text type TripID = Text type ServiceID = Text -- | This is what's called a Stop in GTFS data Station = Station { stationId :: StationID , stationName :: Text , stationLat :: Float , stationLon :: Float } deriving (Show, Generic) instance ToSchema Station where declareNamedSchema = genericDeclareNamedSchema (swaggerOptions "station") instance FromJSON Station where parseJSON = genericParseJSON (aesonOptions "station") instance ToJSON Station where toJSON = genericToJSON (aesonOptions "station") -- | This is what's called a stop time in GTFS data Stop (deep :: Depth) = Stop { stopTrip :: TripID , stopArrival :: Time , stopDeparture :: Time , stopStation :: Switch deep Station StationID , stopSequence :: Int } deriving Generic deriving instance Show (Stop 'Shallow) deriving instance Show (Stop 'Deep) instance FromJSON (Switch a Station StationID) => FromJSON (Stop a) where parseJSON = genericParseJSON (aesonOptions "stop") instance ToJSON (Switch a Station StationID) => ToJSON (Stop a) where toJSON = genericToJSON (aesonOptions "stop") instance ToSchema (Stop Deep)where declareNamedSchema = genericDeclareNamedSchema (swaggerOptions "stop") data Calendar = Calendar { calServiceId :: Text , calMonday :: Bool , calTuesday :: Bool , calWednesday :: Bool , calThursday :: Bool , calFriday :: Bool , calSaturday :: Bool , calSunday :: Bool , calStartDate :: Day , calEndDate :: Day } deriving (Show, Generic) data CalendarExceptionType = ServiceAdded | ServiceRemoved deriving (Show, Eq, Generic, ToJSON, FromJSON) data CalendarDate = CalendarDate { caldateServiceId :: Text , caldateDate :: Day , caldateExceptionType :: CalendarExceptionType } deriving (Show, Generic) instance FromJSON CalendarDate where parseJSON = genericParseJSON (aesonOptions "caldate") instance ToJSON CalendarDate where toJSON = genericToJSON (aesonOptions "caldate") data Trip (deep :: Depth) = Trip { tripRoute :: Text , tripTripID :: TripID , tripHeadsign :: Maybe Text , tripShortName :: Maybe Text , tripDirection :: Maybe Bool -- NOTE: there's also block_id, which we're unlikely to ever need , tripServiceId :: Text -- , tripWheelchairAccessible :: Bool -- , tripBikesAllowed :: Bool , tripShapeId :: Text , tripStops :: Optional deep (Vector (Stop deep)) } deriving Generic deriving instance Show (Trip Shallow) deriving instance Show (Trip Deep) instance FromJSON (Optional d (Vector (Stop d))) => FromJSON (Trip d) where parseJSON = genericParseJSON (aesonOptions "trip") instance ToJSON (Optional d (Vector (Stop d))) => ToJSON (Trip d) where toJSON = genericToJSON (aesonOptions "trip") instance ToSchema (Trip Deep) where declareNamedSchema = genericDeclareNamedSchema (swaggerOptions "trip") -- | helper function to find things in Vectors of things tableLookup :: Eq key => (a -> key) -> key -> Vector a -> Maybe a tableLookup proj key = V.find (\a -> proj a == key) instance CSV.FromNamedRecord Station where parseNamedRecord r = Station <$> r .: "stop_id" <*> r .: "stop_name" <*> r .: "stop_lat" <*> r .: "stop_lon" instance CSV.FromNamedRecord (Stop 'Shallow) where parseNamedRecord r = Stop <$> r .: "trip_id" <*> r .: "arrival_time" <*> r .: "departure_time" <*> r .: "stop_id" <*> r .: "stop_sequence" instance CSV.FromNamedRecord Calendar where parseNamedRecord r = Calendar <$> r .: "service_id" <*> intAsBool' r "monday" <*> intAsBool' r "tuesday" <*> intAsBool' r "wednesday" <*> intAsBool' r "thursday" <*> intAsBool' r "friday" <*> intAsBool' r "saturday" <*> intAsBool' r "sunday" <*> r .: "start_date" <*> r .: "end_date" intAsBool :: CSV.NamedRecord -> BS.ByteString -> CSV.Parser (Maybe Bool) intAsBool r field = do int <- r .: field pure $ case int :: Int of 1 -> Just True 0 -> Just False _ -> Nothing intAsBool' :: CSV.NamedRecord -> BS.ByteString -> CSV.Parser Bool intAsBool' r field = intAsBool r field >>= maybe (fail "unexpected value for a boolean.") pure instance CSV.FromNamedRecord CalendarDate where parseNamedRecord r = CalendarDate <$> r .: "service_id" <*> r .: "date" <*> do int <- r .: "exception_type" case int :: Int of 1 -> pure ServiceAdded 2 -> pure ServiceRemoved _ -> fail $ "unexpected value in exception_type: "+|int|+"." instance CSV.FromNamedRecord (Trip Shallow) where parseNamedRecord r = Trip <$> r .: "route_id" <*> r .: "trip_id" <*> r .: "trip_headsign" <*> r .: "trip_short_name" <*> intAsBool r "direction_id" <*> r .: "service_id" -- NOTE: these aren't booleans but triple-values -- <*> intAsBool r "wheelchair_accessible" -- <*> intAsBool r "bikes_allowed" <*> r .: "shape_id" <*> pure () data RawGTFS = RawGTFS { rawStations :: Vector Station , rawStops :: Vector (Stop Shallow) , rawTrips :: Vector (Trip Shallow) , rawCalendar :: Maybe (Vector Calendar) , rawCalendarDates :: Maybe (Vector CalendarDate) } data GTFS = GTFS { stations :: Map StationID Station , trips :: Map TripID (Trip Deep) , calendar :: Map DayOfWeek (Vector Calendar) , calendarDates :: Map Day (Vector CalendarDate) , fancyCalendar :: Day -> (Vector ServiceID, Vector (Trip Deep)) -- ^ a more "fancy" encoding of the calendar? } -- deriving Show loadRawGtfs :: FilePath -> IO RawGTFS loadRawGtfs path = do zip <- Zip.toArchive <$> LB.readFile "./gtfs.zip" RawGTFS <$> decodeTable' "stops.txt" zip <*> decodeTable' "stop_times.txt" zip <*> decodeTable' "trips.txt" zip <*> decodeTable "calendar.txt" zip <*> decodeTable "calendar_dates.txt" zip where decodeTable :: CSV.FromNamedRecord a => FilePath -> Zip.Archive -> IO (Maybe (Vector a)) decodeTable path zip = case Zip.findEntryByPath path zip of Nothing -> pure Nothing Just csv -> case CSV.decodeByName (Zip.fromEntry csv) of Left err -> error $ "could not decode file "+|path|+": "+|err|+"." Right (_,v :: a) -> pure (Just v) decodeTable' path zip = decodeTable path zip >>= \case Nothing -> fail $ "required file "+|path|+" not found in gtfs.zip" Just a -> pure a loadGtfs :: FilePath -> IO GTFS loadGtfs path = do shallow@RawGTFS{..} <- loadRawGtfs path stops' <- V.mapM (pushStop rawStations) rawStops trips' <- V.mapM (pushTrip stops') rawTrips pure $ GTFS { stations = M.fromList $ (\station -> (stationId station, station)) <$> V.toList rawStations , trips = M.fromList $ (\trip -> (tripTripID trip, trip)) <$> V.toList trips' , calendar = fmap V.fromList $ M.fromListWith (<>) $ concatMap (\cal -> (, [cal]) <$> weekdays cal) $ V.toList (fromMaybe mempty rawCalendar) , calendarDates = fmap V.fromList $ M.fromListWith (<>) $ (\cd -> (caldateDate cd, [cd])) <$> V.toList (fromMaybe mempty rawCalendarDates) } where weekdays Calendar{..} = if calMonday then [Monday] else [] <> if calTuesday then [Tuesday] else [] <> if calWednesday then [Wednesday] else [] <> if calThursday then [Thursday] else [] <> if calFriday then [Friday] else [] <> if calSaturday then [Saturday] else [] <> [Sunday | calSunday] pushStop :: Vector Station -> Stop Shallow -> IO (Stop Deep) pushStop stations stop = do station <- case tableLookup stationId (stopStation stop) stations of Just a -> pure a Nothing -> fail $ "station with id "+|stopStation stop|+"is mentioned but not defined." pure $ stop { stopStation = station } pushTrip :: Vector (Stop Deep) -> Trip Shallow -> IO (Trip Deep) pushTrip stops trip = if V.length alongRoute < 2 then fail $ "trip with id "+|tripTripID trip|+" has no stops" else pure $ trip { tripStops = alongRoute } where alongRoute = -- TODO: sort these according to stops V.filter (\s -> stopTrip s == tripTripID trip) stops servicesOnDay :: GTFS -> Day -> Vector ServiceID servicesOnDay GTFS{..} day = fmap caldateServiceId added <> V.filter notCancelled regular where (added,removed) = V.partition (\cd -> caldateExceptionType cd == ServiceAdded) . fromMaybe mempty $ M.lookup day calendarDates regular = maybe mempty (fmap calServiceId) $ M.lookup (dayOfWeek day) calendar notCancelled serviceID = null (tableLookup caldateServiceId serviceID removed) tripsOfService :: GTFS -> ServiceID -> Map TripID (Trip Deep) tripsOfService GTFS{..} serviceId = M.filter (\trip -> tripServiceId trip == serviceId ) trips -- TODO: this should filter out trips ending there tripsAtStation :: GTFS -> StationID -> Vector TripID tripsAtStation GTFS{..} at = fmap stopTrip stops where stops = V.filter (\(stop :: Stop Deep) -> stationId (stopStation stop) == at) stops tripsOnDay :: GTFS -> Day -> Map TripID (Trip Deep) tripsOnDay gtfs today = foldMap (tripsOfService gtfs) (servicesOnDay gtfs today)