{-# LANGUAGE DataKinds #-} {-# LANGUAGE DeriveAnyClass #-} {-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE DerivingStrategies #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE LambdaCase #-} {-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE StandaloneDeriving #-} {-# LANGUAGE StandaloneKindSignatures #-} {-# LANGUAGE TupleSections #-} {-# LANGUAGE TypeApplications #-} {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE UndecidableInstances #-} -- | All kinds of stuff that has to deal with GTFS directly -- (i.e. parsing, querying, Aeson instances, etc.) module GTFS where import qualified Codec.Archive.Zip as Zip import Control.Monad.ST (runST) import Data.Aeson (FromJSON, Options (fieldLabelModifier), ToJSON, defaultOptions, genericParseJSON, genericToJSON) import qualified Data.Aeson as A import qualified Data.ByteString as BS import qualified Data.ByteString.Lazy as LB import Data.Csv ((.:)) import qualified Data.Csv as CSV import Data.Function (on) import Data.Functor ((<&>)) import Data.Kind (Type) import Data.Maybe (fromJust, fromMaybe) import Data.Text (Text) import Data.Time (UTCTime (utctDay), dayOfWeek, getCurrentTime) import Data.Time.Calendar (Day, DayOfWeek (..)) import Data.Time.Calendar.MonthDay (monthAndDayToDayOfYearValid) import qualified Data.Time.Calendar.OrdinalDate as Day import Data.Vector (Vector) import qualified Data.Vector as V import qualified Data.Vector.Algorithms.Intro as V import Fmt ((+|), (|+)) import GHC.Generics (Generic) import Text.Regex.TDFA ((=~)) -- import Data.Aeson.Generic (Options(fieldLabelModifier), deriveJSON, defaultOptions) import Control.Lens import Data.Char (toLower) import Data.Foldable (Foldable (fold)) import Data.Map (Map) import qualified Data.Map as M import Data.Proxy (Proxy (Proxy)) import Data.Swagger (ParamSchema (..), SchemaOptions, ToSchema (declareNamedSchema), defaultSchemaOptions, genericDeclareNamedSchema) import qualified Data.Swagger as S import qualified Data.Text as T aesonOptions prefix = defaultOptions { fieldLabelModifier = fieldModifier (T.length prefix) } where fieldModifier n label = case drop n label of c:rest -> toLower c : rest "" -> "" swaggerOptions :: Text -> SchemaOptions swaggerOptions prefix = defaultSchemaOptions { S.fieldLabelModifier = fieldModifier (T.length prefix) } where fieldModifier n label = case drop n label of c:rest -> toLower c : rest "" -> "" newtype Time = Time { toSeconds :: Int } deriving newtype (ToJSON, FromJSON) deriving (Generic) instance CSV.FromField Time where parseField f = do text :: String <- CSV.parseField f let (_,_,_,subs) = text =~ ("([0-9][0-9]?):([0-9][0-9]?):([0-9][0-9]?)" :: Text) :: (String, String, String, [String]) case subs of [hh,mm,ss] -> pure $ Time $ read hh * 3600 + read mm * 60 + read ss _ -> fail $ "encountered an invalid date: " <> text instance Show Time where show (Time seconds) = "" +|seconds `div` 3600|+":" +|(seconds `mod` 3600) `div` 60|+":" +|seconds `mod` 60|+"" instance CSV.FromField Day where parseField f = do text :: String <- CSV.parseField f let (_,_,_,subs) = text =~ ("([0-9][0-9][0-9][0-9])([0-9][0-9])([0-9][0-9])" :: Text) :: (String, String, String, [String]) case subs of [yyyy,mm,dd] -> do let Just dayOfYear = monthAndDayToDayOfYearValid (Day.isLeapYear (read yyyy)) (read mm) (read dd) pure $ Day.fromOrdinalDate (read yyyy) dayOfYear _ -> fail $ "invalid date encountered: " <> show f instance ToSchema Time where declareNamedSchema _ = do dings <- declareNamedSchema (Proxy @Int) pure (set (S.schema . S.description) (Just "Zeit in Sekunden seit Tagesanfang") dings) data Depth = Shallow | Deep type Switch :: Depth -> Type -> Type -> Type type family Switch c a b where Switch Deep a b = a Switch Shallow a b = b type family Optional c a where Optional Deep a = a Optional Shallow _ = () type StationID = Text type TripID = Text type ServiceID = Text -- | This is what's called a Stop in GTFS data Station = Station { stationId :: StationID , stationName :: Text , stationLat :: Float , stationLon :: Float } deriving (Show, Generic) instance ToSchema Station where declareNamedSchema = genericDeclareNamedSchema (swaggerOptions "station") instance FromJSON Station where parseJSON = genericParseJSON (aesonOptions "station") instance ToJSON Station where toJSON = genericToJSON (aesonOptions "station") -- | This is what's called a stop time in GTFS data Stop (deep :: Depth) = Stop { stopTrip :: TripID , stopArrival :: Time , stopDeparture :: Time , stopStation :: Switch deep Station StationID , stopSequence :: Int } deriving Generic deriving instance Show (Stop 'Shallow) deriving instance Show (Stop 'Deep) instance FromJSON (Switch a Station StationID) => FromJSON (Stop a) where parseJSON = genericParseJSON (aesonOptions "stop") instance ToJSON (Switch a Station StationID) => ToJSON (Stop a) where toJSON = genericToJSON (aesonOptions "stop") instance ToSchema (Stop Deep)where declareNamedSchema = genericDeclareNamedSchema (swaggerOptions "stop") data Calendar = Calendar { calServiceId :: Text , calMonday :: Bool , calTuesday :: Bool , calWednesday :: Bool , calThursday :: Bool , calFriday :: Bool , calSaturday :: Bool , calSunday :: Bool , calStartDate :: Day , calEndDate :: Day } deriving (Show, Generic) data CalendarExceptionType = ServiceAdded | ServiceRemoved deriving (Show, Eq, Generic, ToJSON, FromJSON) data CalendarDate = CalendarDate { caldateServiceId :: Text , caldateDate :: Day , caldateExceptionType :: CalendarExceptionType } deriving (Show, Generic) instance FromJSON CalendarDate where parseJSON = genericParseJSON (aesonOptions "caldate") instance ToJSON CalendarDate where toJSON = genericToJSON (aesonOptions "caldate") data Trip (deep :: Depth) = Trip { tripRoute :: Text , tripTripID :: TripID , tripHeadsign :: Maybe Text , tripShortName :: Maybe Text , tripDirection :: Maybe Bool -- NOTE: there's also block_id, which we're unlikely to ever need , tripServiceId :: Text -- , tripWheelchairAccessible :: Bool -- , tripBikesAllowed :: Bool , tripShape :: Switch deep Shape Text , tripStops :: Optional deep (Vector (Stop deep)) } deriving Generic deriving instance Show (Trip Shallow) deriving instance Show (Trip Deep) instance (FromJSON (Switch d Shape Text), FromJSON (Optional d (Vector (Stop d)))) => FromJSON (Trip d) where parseJSON = genericParseJSON (aesonOptions "trip") instance (ToJSON (Switch d Shape Text), ToJSON (Optional d (Vector (Stop d)))) => ToJSON (Trip d) where toJSON = genericToJSON (aesonOptions "trip") instance ToSchema (Trip Deep) where declareNamedSchema = genericDeclareNamedSchema (swaggerOptions "trip") -- | helper function to find things in Vectors of things tableLookup :: Eq key => (a -> key) -> key -> Vector a -> Maybe a tableLookup proj key = V.find (\a -> proj a == key) data ShapePoint = ShapePoint { shapePtId :: Text , shapePtLat :: Double , shapePtLong :: Double , shapePtSequence :: Int } deriving Generic data Shape = Shape { shapeId :: Text , shapePoints :: Vector (Double,Double) } deriving (Generic, Show) instance FromJSON Shape where parseJSON = genericParseJSON (aesonOptions "shape") instance ToJSON Shape where toJSON = genericToJSON (aesonOptions "shape") instance ToSchema Shape where declareNamedSchema = genericDeclareNamedSchema (swaggerOptions "shape") instance CSV.FromNamedRecord Station where parseNamedRecord r = Station <$> r .: "stop_id" <*> r .: "stop_name" <*> r .: "stop_lat" <*> r .: "stop_lon" instance CSV.FromNamedRecord (Stop 'Shallow) where parseNamedRecord r = Stop <$> r .: "trip_id" <*> r .: "arrival_time" <*> r .: "departure_time" <*> r .: "stop_id" <*> r .: "stop_sequence" instance CSV.FromNamedRecord Calendar where parseNamedRecord r = Calendar <$> r .: "service_id" <*> intAsBool' r "monday" <*> intAsBool' r "tuesday" <*> intAsBool' r "wednesday" <*> intAsBool' r "thursday" <*> intAsBool' r "friday" <*> intAsBool' r "saturday" <*> intAsBool' r "sunday" <*> r .: "start_date" <*> r .: "end_date" instance CSV.FromNamedRecord ShapePoint where parseNamedRecord r = ShapePoint <$> r .: "shape_id" <*> r .: "shape_pt_lat" <*> r .: "shape_pt_lon" <*> r .: "shape_pt_sequence" intAsBool :: CSV.NamedRecord -> BS.ByteString -> CSV.Parser (Maybe Bool) intAsBool r field = do int <- r .: field pure $ case int :: Int of 1 -> Just True 0 -> Just False _ -> Nothing intAsBool' :: CSV.NamedRecord -> BS.ByteString -> CSV.Parser Bool intAsBool' r field = intAsBool r field >>= maybe (fail "unexpected value for a boolean.") pure instance CSV.FromNamedRecord CalendarDate where parseNamedRecord r = CalendarDate <$> r .: "service_id" <*> r .: "date" <*> do int <- r .: "exception_type" case int :: Int of 1 -> pure ServiceAdded 2 -> pure ServiceRemoved _ -> fail $ "unexpected value in exception_type: "+|int|+"." instance CSV.FromNamedRecord (Trip Shallow) where parseNamedRecord r = Trip <$> r .: "route_id" <*> r .: "trip_id" <*> r .: "trip_headsign" <*> r .: "trip_short_name" <*> intAsBool r "direction_id" <*> r .: "service_id" -- NOTE: these aren't booleans but triple-values -- <*> intAsBool r "wheelchair_accessible" -- <*> intAsBool r "bikes_allowed" <*> r .: "shape_id" <*> pure () data RawGTFS = RawGTFS { rawStations :: Vector Station , rawStops :: Vector (Stop Shallow) , rawTrips :: Vector (Trip Shallow) , rawCalendar :: Maybe (Vector Calendar) , rawCalendarDates :: Maybe (Vector CalendarDate) , rawShapePoints :: Maybe (Vector ShapePoint) } data GTFS = GTFS { stations :: Map StationID Station , trips :: Map TripID (Trip Deep) , calendar :: Map DayOfWeek (Vector Calendar) , calendarDates :: Map Day (Vector CalendarDate) , shapes :: Map Text Shape , fancyCalendar :: Day -> (Vector ServiceID, Vector (Trip Deep)) -- ^ a more "fancy" encoding of the calendar? } -- deriving Show loadRawGtfs :: FilePath -> IO RawGTFS loadRawGtfs path = do zip <- Zip.toArchive <$> LB.readFile "./gtfs.zip" RawGTFS <$> decodeTable' "stops.txt" zip <*> decodeTable' "stop_times.txt" zip <*> decodeTable' "trips.txt" zip <*> decodeTable "calendar.txt" zip <*> decodeTable "calendar_dates.txt" zip <*> decodeTable "shapes.txt" zip where decodeTable :: CSV.FromNamedRecord a => FilePath -> Zip.Archive -> IO (Maybe (Vector a)) decodeTable path zip = case Zip.findEntryByPath path zip of Nothing -> pure Nothing Just csv -> case CSV.decodeByName (Zip.fromEntry csv) of Left err -> error $ "could not decode file "+|path|+": "+|err|+"." Right (_,v :: a) -> pure (Just v) decodeTable' path zip = decodeTable path zip >>= \case Nothing -> fail $ "required file "+|path|+" not found in gtfs.zip" Just a -> pure a loadGtfs :: FilePath -> IO GTFS loadGtfs path = do shallow@RawGTFS{..} <- loadRawGtfs path -- TODO: sort these according to sequence numbers let shapes = V.foldr' sortShapePoint mempty $ V.modify (V.sortBy (compare `on` shapePtSequence)) (fromMaybe mempty rawShapePoints) stops' <- V.mapM (pushStop rawStations) rawStops trips' <- V.mapM (pushTrip stops' shapes) rawTrips pure $ GTFS { stations = M.fromList $ (\station -> (stationId station, station)) <$> V.toList rawStations , trips = M.fromList $ (\trip -> (tripTripID trip, trip)) <$> V.toList trips' , calendar = fmap V.fromList $ M.fromListWith (<>) $ concatMap (\cal -> (, [cal]) <$> weekdays cal) $ V.toList (fromMaybe mempty rawCalendar) , calendarDates = fmap V.fromList $ M.fromListWith (<>) $ (\cd -> (caldateDate cd, [cd])) <$> V.toList (fromMaybe mempty rawCalendarDates) , shapes } where weekdays Calendar{..} = if calMonday then [Monday] else [] <> if calTuesday then [Tuesday] else [] <> if calWednesday then [Wednesday] else [] <> if calThursday then [Thursday] else [] <> if calFriday then [Friday] else [] <> if calSaturday then [Saturday] else [] <> [Sunday | calSunday] pushStop :: Vector Station -> Stop Shallow -> IO (Stop Deep) pushStop stations stop = do station <- case tableLookup stationId (stopStation stop) stations of Just a -> pure a Nothing -> fail $ "station with id "+|stopStation stop|+"is mentioned but not defined." pure $ stop { stopStation = station } pushTrip :: Vector (Stop Deep) -> Map Text Shape -> Trip Shallow -> IO (Trip Deep) pushTrip stops shapes trip = if V.length alongRoute < 2 then fail $ "trip with id "+|tripTripID trip|+" has no stops" else do a <- case M.lookup (tripShape trip) shapes of Nothing -> fail $ "trip with id "+|tripTripID trip|+" mentions a shape that does not exist." Just a -> pure a pure $ trip { tripStops = alongRoute, tripShape = a } where alongRoute = V.modify (V.sortBy (compare `on` stopSequence)) $ V.filter (\s -> stopTrip s == tripTripID trip) stops sortShapePoint :: ShapePoint -> Map Text Shape -> Map Text Shape sortShapePoint ShapePoint{..} shapes = M.alter appendPoint shapePtId shapes where point = (shapePtLat, shapePtLong) appendPoint = \case Just shape -> Just $ shape { shapePoints = V.cons point (shapePoints shape) } Nothing -> Just $ Shape { shapeId = shapePtId, shapePoints = V.singleton point } servicesOnDay :: GTFS -> Day -> Vector ServiceID servicesOnDay GTFS{..} day = fmap caldateServiceId added <> V.filter notCancelled regular where (added,removed) = V.partition (\cd -> caldateExceptionType cd == ServiceAdded) . fromMaybe mempty $ M.lookup day calendarDates regular = maybe mempty (fmap calServiceId) $ M.lookup (dayOfWeek day) calendar notCancelled serviceID = null (tableLookup caldateServiceId serviceID removed) tripsOfService :: GTFS -> ServiceID -> Map TripID (Trip Deep) tripsOfService GTFS{..} serviceId = M.filter (\trip -> tripServiceId trip == serviceId ) trips -- TODO: this should filter out trips ending there tripsAtStation :: GTFS -> StationID -> Vector TripID tripsAtStation GTFS{..} at = fmap stopTrip stops where stops = V.filter (\(stop :: Stop Deep) -> stationId (stopStation stop) == at) stops tripsOnDay :: GTFS -> Day -> Map TripID (Trip Deep) tripsOnDay gtfs today = foldMap (tripsOfService gtfs) (servicesOnDay gtfs today)