{-# LANGUAGE DataKinds #-} {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE KindSignatures #-} {-# LANGUAGE StandaloneKindSignatures #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE StandaloneDeriving #-} {-# LANGUAGE TypeApplications #-} {-# LANGUAGE LambdaCase #-} {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE DeriveAnyClass #-} {-# LANGUAGE DerivingStrategies #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE UndecidableInstances #-} {-# LANGUAGE TupleSections #-} {-# LANGUAGE TupleSections #-} {-# LANGUAGE TupleSections #-} {-# LANGUAGE TupleSections #-} {-# LANGUAGE TupleSections #-} module GTFS where import qualified Data.ByteString.Lazy as LB import qualified Data.ByteString as BS import qualified Data.Csv as CSV import Data.Csv ((.:)) import qualified Codec.Archive.Zip as Zip import qualified Data.Vector as V import Data.Vector (Vector) import Text.Regex.TDFA ( (=~) ) import Data.Text (Text) import Fmt ( (+|), (|+) ) import Data.Kind (Type) import Data.Maybe (fromMaybe, fromJust) import Data.Functor ((<&>)) import qualified Data.Time.Calendar.OrdinalDate as Day import Data.Time.Calendar (Day, DayOfWeek(..)) import Data.Time.Calendar.MonthDay (monthAndDayToDayOfYearValid) import Data.Time (getCurrentTime, UTCTime (utctDay), dayOfWeek) import Data.Aeson ( ToJSON, FromJSON, Options(fieldLabelModifier), genericParseJSON, genericToJSON, defaultOptions ) import qualified Data.Aeson as A import GHC.Generics (Generic) -- import Data.Aeson.Generic (Options(fieldLabelModifier), deriveJSON, defaultOptions) import qualified Data.Text as T import Data.Char (toLower) import Data.Map (Map) import qualified Data.Map as M import Data.Foldable (Foldable(fold)) aesonOptions prefix = defaultOptions { fieldLabelModifier = fieldModifier (T.length prefix) } where fieldModifier n label = case drop n label of c:rest -> toLower c : rest "" -> "" newtype Time = Time { toSeconds :: Int } deriving newtype (ToJSON, FromJSON) instance CSV.FromField Time where parseField f = do text :: String <- CSV.parseField f let (_,_,_,subs) = text =~ ("([0-9][0-9]?):([0-9][0-9]?):([0-9][0-9]?)" :: Text) :: (String, String, String, [String]) case subs of [hh,mm,ss] -> pure $ Time $ read hh * 3600 + read mm * 60 + read ss _ -> fail $ "encountered an invalid date: " <> text instance Show Time where show (Time seconds) = "" +|seconds `div` 3600|+":" +|(seconds `mod` 3600) `div` 60|+":" +|seconds `mod` 60|+"" instance CSV.FromField Day where parseField f = do text :: String <- CSV.parseField f let (_,_,_,subs) = text =~ ("([0-9][0-9][0-9][0-9])([0-9][0-9])([0-9][0-9])" :: Text) :: (String, String, String, [String]) case subs of [yyyy,mm,dd] -> do let Just dayOfYear = monthAndDayToDayOfYearValid (Day.isLeapYear (read yyyy)) (read mm) (read dd) pure $ Day.fromOrdinalDate (read yyyy) dayOfYear _ -> fail $ "invalid date encountered: " <> show f data Depth = Shallow | Deep type Switch :: Depth -> Type -> Type -> Type type family Switch c a b where Switch Deep a b = a Switch Shallow a b = b type family Optional c a where Optional Deep a = a Optional Shallow _ = () type StationID = Text type TripID = Text type ServiceID = Text -- | This is what's called a Stop in GTFS data Station = Station { stationId :: StationID , stationName :: Text , stationLat :: Float , stationLon :: Float } deriving (Show, Generic) instance FromJSON Station where parseJSON = genericParseJSON (aesonOptions "station") instance ToJSON Station where toJSON = genericToJSON (aesonOptions "station") -- | This is what's called a stop time in GTFS data Stop (deep :: Depth) = Stop { stopTrip :: TripID , stopArrival :: Time , stopDeparture :: Time , stopStation :: Switch deep Station StationID , stopSequence :: Int } deriving Generic deriving instance Show (Stop 'Shallow) deriving instance Show (Stop 'Deep) instance FromJSON (Switch a Station StationID) => FromJSON (Stop a) where parseJSON = genericParseJSON (aesonOptions "stop") instance ToJSON (Switch a Station StationID) => ToJSON (Stop a) where toJSON = genericToJSON (aesonOptions "stop") data Calendar = Calendar { calServiceId :: Text , calMonday :: Bool , calTuesday :: Bool , calWednesday :: Bool , calThursday :: Bool , calFriday :: Bool , calSaturday :: Bool , calSunday :: Bool , calStartDate :: Day , calEndDate :: Day } deriving (Show, Generic) data CalendarExceptionType = ServiceAdded | ServiceRemoved deriving (Show, Eq, Generic, ToJSON, FromJSON) data CalendarDate = CalendarDate { caldateServiceId :: Text , caldateDate :: Day , caldateExceptionType :: CalendarExceptionType } deriving (Show, Generic) instance FromJSON CalendarDate where parseJSON = genericParseJSON (aesonOptions "caldate") instance ToJSON CalendarDate where toJSON = genericToJSON (aesonOptions "caldate") data Trip (deep :: Depth) = Trip { tripRoute :: Text , tripTripID :: TripID , tripHeadsign :: Maybe Text , tripShortName :: Maybe Text , tripDirection :: Maybe Bool -- NOTE: there's also block_id, which we're unlikely to ever need , tripServiceId :: Text -- , tripWheelchairAccessible :: Bool -- , tripBikesAllowed :: Bool , tripShapeId :: Text , tripStops :: Optional deep (Vector (Stop deep)) } deriving Generic deriving instance Show (Trip Shallow) deriving instance Show (Trip Deep) instance FromJSON (Optional d (Vector (Stop d))) => FromJSON (Trip d) where parseJSON = genericParseJSON (aesonOptions "trip") instance ToJSON (Optional d (Vector (Stop d))) => ToJSON (Trip d) where toJSON = genericToJSON (aesonOptions "trip") -- | helper function to find things in Vectors of things tableLookup :: Eq key => (a -> key) -> key -> Vector a -> Maybe a tableLookup proj key = V.find (\a -> proj a == key) instance CSV.FromNamedRecord Station where parseNamedRecord r = Station <$> r .: "stop_id" <*> r .: "stop_name" <*> r .: "stop_lat" <*> r .: "stop_lon" instance CSV.FromNamedRecord (Stop 'Shallow) where parseNamedRecord r = Stop <$> r .: "trip_id" <*> r .: "arrival_time" <*> r .: "departure_time" <*> r .: "stop_id" <*> r .: "stop_sequence" instance CSV.FromNamedRecord Calendar where parseNamedRecord r = Calendar <$> r .: "service_id" <*> intAsBool' r "monday" <*> intAsBool' r "tuesday" <*> intAsBool' r "wednesday" <*> intAsBool' r "thursday" <*> intAsBool' r "friday" <*> intAsBool' r "saturday" <*> intAsBool' r "sunday" <*> r .: "start_date" <*> r .: "end_date" intAsBool :: CSV.NamedRecord -> BS.ByteString -> CSV.Parser (Maybe Bool) intAsBool r field = do int <- r .: field pure $ case int :: Int of 1 -> Just True 0 -> Just False _ -> Nothing intAsBool' :: CSV.NamedRecord -> BS.ByteString -> CSV.Parser Bool intAsBool' r field = intAsBool r field >>= maybe (fail "unexpected value for a boolean.") pure instance CSV.FromNamedRecord CalendarDate where parseNamedRecord r = CalendarDate <$> r .: "service_id" <*> r .: "date" <*> do int <- r .: "exception_type" case int :: Int of 1 -> pure ServiceAdded 2 -> pure ServiceRemoved _ -> fail $ "unexpected value in exception_type: "+|int|+"." instance CSV.FromNamedRecord (Trip Shallow) where parseNamedRecord r = Trip <$> r .: "route_id" <*> r .: "trip_id" <*> r .: "trip_headsign" <*> r .: "trip_short_name" <*> intAsBool r "direction_id" <*> r .: "service_id" -- NOTE: these aren't booleans but triple-values -- <*> intAsBool r "wheelchair_accessible" -- <*> intAsBool r "bikes_allowed" <*> r .: "shape_id" <*> pure () data RawGTFS = RawGTFS { rawStations :: Vector Station , rawStops :: Vector (Stop Shallow) , rawTrips :: Vector (Trip Shallow) , rawCalendar :: Maybe (Vector Calendar) , rawCalendarDates :: Maybe (Vector CalendarDate) } data GTFS = GTFS { stations :: Map StationID Station , trips :: Map TripID (Trip Deep) , calendar :: Map DayOfWeek (Vector Calendar) , calendarDates :: Map Day (Vector CalendarDate) , fancyCalendar :: Day -> (Vector ServiceID, Vector (Trip Deep)) -- ^ a more "fancy" encoding of the calendar? } -- deriving Show loadRawGtfs :: FilePath -> IO RawGTFS loadRawGtfs path = do zip <- Zip.toArchive <$> LB.readFile "./gtfs.zip" RawGTFS <$> decodeTable' "stops.txt" zip <*> decodeTable' "stop_times.txt" zip <*> decodeTable' "trips.txt" zip <*> decodeTable "calendar.txt" zip <*> decodeTable "calendar_dates.txt" zip where decodeTable :: CSV.FromNamedRecord a => FilePath -> Zip.Archive -> IO (Maybe (Vector a)) decodeTable path zip = case Zip.findEntryByPath path zip of Nothing -> pure Nothing Just csv -> case CSV.decodeByName (Zip.fromEntry csv) of Left err -> error $ "could not decode file "+|path|+": "+|err|+"." Right (_,v :: a) -> pure (Just v) decodeTable' path zip = decodeTable path zip >>= \case Nothing -> fail $ "required file "+|path|+" not found in gtfs.zip" Just a -> pure a loadGtfs :: FilePath -> IO GTFS loadGtfs path = do shallow@RawGTFS{..} <- loadRawGtfs path stops' <- V.mapM (pushStop rawStations) rawStops trips' <- V.mapM (pushTrip stops') rawTrips pure $ GTFS { stations = M.fromList $ (\station -> (stationId station, station)) <$> V.toList rawStations , trips = M.fromList $ (\trip -> (tripTripID trip, trip)) <$> V.toList trips' , calendar = fmap V.fromList $ M.fromListWith (<>) $ concatMap (\cal -> (, [cal]) <$> weekdays cal) $ V.toList (fromMaybe mempty rawCalendar) , calendarDates = fmap V.fromList $ M.fromListWith (<>) $ (\cd -> (caldateDate cd, [cd])) <$> V.toList (fromMaybe mempty rawCalendarDates) } where weekdays Calendar{..} = if calMonday then [Monday] else [] <> if calTuesday then [Tuesday] else [] <> if calWednesday then [Wednesday] else [] <> if calThursday then [Thursday] else [] <> if calFriday then [Friday] else [] <> if calSaturday then [Saturday] else [] <> [Sunday | calSunday] pushStop :: Vector Station -> Stop Shallow -> IO (Stop Deep) pushStop stations stop = do station <- case tableLookup stationId (stopStation stop) stations of Just a -> pure a Nothing -> fail $ "station with id "+|stopStation stop|+"is mentioned but not defined." pure $ stop { stopStation = station } pushTrip :: Vector (Stop Deep) -> Trip Shallow -> IO (Trip Deep) pushTrip stops trip = if V.length alongRoute < 2 then fail $ "trip with id "+|tripTripID trip|+" has no stops" else pure $ trip { tripStops = alongRoute } where alongRoute = -- TODO: sort these according to stops V.filter (\s -> stopTrip s == tripTripID trip) stops servicesOnDay :: GTFS -> Day -> Vector ServiceID servicesOnDay GTFS{..} day = fmap caldateServiceId added <> V.filter notCancelled regular where (added,removed) = V.partition (\cd -> caldateExceptionType cd == ServiceAdded) . fromMaybe mempty $ M.lookup day calendarDates regular = maybe mempty (fmap calServiceId) $ M.lookup (dayOfWeek day) calendar notCancelled serviceID = null (tableLookup caldateServiceId serviceID removed) tripsOfService :: GTFS -> ServiceID -> Map TripID (Trip Deep) tripsOfService GTFS{..} serviceId = M.filter (\trip -> tripServiceId trip == serviceId ) trips -- TODO: this should filter out trips ending there tripsAtStation :: GTFS -> StationID -> Vector TripID tripsAtStation GTFS{..} at = fmap stopTrip stops where stops = V.filter (\(stop :: Stop Deep) -> stationId (stopStation stop) == at) stops tripsOnDay :: GTFS -> Day -> Map TripID (Trip Deep) tripsOnDay gtfs today = foldMap (tripsOfService gtfs) (servicesOnDay gtfs today)