From b092808a65b16688546b4f4f021a84cc120f8a8a Mon Sep 17 00:00:00 2001 From: stuebinm Date: Mon, 6 Jun 2022 21:51:56 +0200 Subject: restructure GTFS types unfortunately doesn't quite get rid of all the type family still since it's just too useful … but does reduce it somewhat. Also, maps are much easier for looking things up than vectors! --- lib/GTFS.hs | 127 +++++++++++++++++++++++++++++++++++------------------------- 1 file changed, 75 insertions(+), 52 deletions(-) (limited to 'lib/GTFS.hs') diff --git a/lib/GTFS.hs b/lib/GTFS.hs index 35a85ea..9ad01f1 100644 --- a/lib/GTFS.hs +++ b/lib/GTFS.hs @@ -13,6 +13,11 @@ {-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE UndecidableInstances #-} +{-# LANGUAGE TupleSections #-} +{-# LANGUAGE TupleSections #-} +{-# LANGUAGE TupleSections #-} +{-# LANGUAGE TupleSections #-} +{-# LANGUAGE TupleSections #-} module GTFS where @@ -47,7 +52,9 @@ import GHC.Generics (Generic) -- import Data.Aeson.Generic (Options(fieldLabelModifier), deriveJSON, defaultOptions) import qualified Data.Text as T import Data.Char (toLower) - +import Data.Map (Map) +import qualified Data.Map as M +import Data.Foldable (Foldable(fold)) aesonOptions prefix = defaultOptions { fieldLabelModifier = fieldModifier (T.length prefix) } @@ -119,7 +126,7 @@ data Stop (deep :: Depth) = Stop { stopTrip :: TripID , stopArrival :: Time , stopDeparture :: Time - , stopStation:: Switch deep Station StationID + , stopStation :: Switch deep Station StationID , stopSequence :: Int } deriving Generic @@ -253,31 +260,39 @@ instance CSV.FromNamedRecord (Trip Shallow) where <*> r .: "shape_id" <*> pure () -data GTFS (depth :: Depth) = GTFS - { stations :: Vector Station - , stops :: Vector (Stop depth) - , trips :: Vector (Trip depth) - , calendar :: Maybe (Vector Calendar) - , calendarDates :: Maybe (Vector CalendarDate) +data RawGTFS = RawGTFS + { rawStations :: Vector Station + , rawStops :: Vector (Stop Shallow) + , rawTrips :: Vector (Trip Shallow) + , rawCalendar :: Maybe (Vector Calendar) + , rawCalendarDates :: Maybe (Vector CalendarDate) } -deriving instance Show (GTFS Shallow) -deriving instance Show (GTFS Deep) -class Loadable depth where - loadGtfs :: FilePath -> IO (GTFS depth) +data GTFS = GTFS + { stations :: Map StationID Station + , trips :: Map TripID (Trip Deep) + , calendar :: Map DayOfWeek (Vector Calendar) + , calendarDates :: Map Day (Vector CalendarDate) + + , fancyCalendar :: Day -> (Vector ServiceID, Vector (Trip Deep)) + -- ^ a more "fancy" encoding of the calendar? + } -- deriving Show + + -instance Loadable Shallow where - loadGtfs path = do +loadRawGtfs :: FilePath -> IO RawGTFS +loadRawGtfs path = do zip <- Zip.toArchive <$> LB.readFile "./gtfs.zip" - GTFS <$> decodeTable' "stops.txt" zip - <*> decodeTable' "stop_times.txt" zip - <*> decodeTable' "trips.txt" zip - <*> decodeTable "calendar.txt" zip - <*> decodeTable "calendar_dates.txt" zip + RawGTFS + <$> decodeTable' "stops.txt" zip + <*> decodeTable' "stop_times.txt" zip + <*> decodeTable' "trips.txt" zip + <*> decodeTable "calendar.txt" zip + <*> decodeTable "calendar_dates.txt" zip where decodeTable :: CSV.FromNamedRecord a => FilePath -> Zip.Archive -> IO (Maybe (Vector a)) - decodeTable path zip = do + decodeTable path zip = case Zip.findEntryByPath path zip of Nothing -> pure Nothing Just csv -> case CSV.decodeByName (Zip.fromEntry csv) of @@ -288,13 +303,37 @@ instance Loadable Shallow where Nothing -> fail $ "required file "+|path|+" not found in gtfs.zip" Just a -> pure a -instance Loadable Deep where - loadGtfs path = do - shallow <- loadGtfs @Shallow path - stops' <- V.mapM (pushStop (stations shallow)) (stops shallow) - trips' <- V.mapM (pushTrip stops') (trips shallow) - pure $ shallow { stops = stops', trips = trips' } +loadGtfs :: FilePath -> IO GTFS +loadGtfs path = do + shallow@RawGTFS{..} <- loadRawGtfs path + stops' <- V.mapM (pushStop rawStations) rawStops + trips' <- V.mapM (pushTrip stops') rawTrips + pure $ GTFS + { stations = + M.fromList $ (\station -> (stationId station, station)) + <$> V.toList rawStations + , trips = + M.fromList $ (\trip -> (tripTripID trip, trip)) + <$> V.toList trips' + , calendar = + fmap V.fromList + $ M.fromListWith (<>) + $ concatMap (\cal -> (, [cal]) <$> weekdays cal) + $ V.toList (fromMaybe mempty rawCalendar) + , calendarDates = + fmap V.fromList + $ M.fromListWith (<>) $ (\cd -> (caldateDate cd, [cd])) + <$> V.toList (fromMaybe mempty rawCalendarDates) + } where + weekdays Calendar{..} = + if calMonday then [Monday] else [] + <> if calTuesday then [Tuesday] else [] + <> if calWednesday then [Wednesday] else [] + <> if calThursday then [Thursday] else [] + <> if calFriday then [Friday] else [] + <> if calSaturday then [Saturday] else [] + <> [Sunday | calSunday] pushStop :: Vector Station -> Stop Shallow -> IO (Stop Deep) pushStop stations stop = do station <- case tableLookup stationId (stopStation stop) stations of @@ -310,41 +349,25 @@ instance Loadable Deep where -servicesOnDay :: GTFS Deep -> Day -> Vector ServiceID +servicesOnDay :: GTFS -> Day -> Vector ServiceID servicesOnDay GTFS{..} day = fmap caldateServiceId added <> V.filter notCancelled regular - where (added,removed) = case calendarDates of - Nothing -> (mempty,mempty) - Just exs -> - V.partition (\cd -> caldateExceptionType cd == ServiceAdded) - $ V.filter (\cd -> caldateDate cd == day) exs - regular = case calendar of - Nothing -> mempty - Just cs -> V.mapMaybe (\cal -> if isRunning cal then Just (calServiceId cal) else Nothing) cs - where isRunning Calendar{..} = - day >= calStartDate && - day <= calEndDate && - case weekday of - Monday -> calMonday - Tuesday -> calTuesday - Wednesday -> calWednesday - Thursday -> calThursday - Friday -> calFriday - Saturday -> calSaturday - Sunday -> calSunday - weekday = dayOfWeek day + where (added,removed) = + V.partition (\cd -> caldateExceptionType cd == ServiceAdded) + . fromMaybe mempty $ M.lookup day calendarDates + regular = maybe mempty (fmap calServiceId) $ M.lookup (dayOfWeek day) calendar notCancelled serviceID = null (tableLookup caldateServiceId serviceID removed) -tripsOfService :: GTFS Deep -> ServiceID -> Vector (Trip Deep) +tripsOfService :: GTFS -> ServiceID -> Map TripID (Trip Deep) tripsOfService GTFS{..} serviceId = - V.filter (\trip -> tripServiceId trip == serviceId ) trips + M.filter (\trip -> tripServiceId trip == serviceId ) trips -- TODO: this should filter out trips ending there -tripsAtStation :: GTFS Deep -> StationID -> Vector TripID +tripsAtStation :: GTFS -> StationID -> Vector TripID tripsAtStation GTFS{..} at = fmap stopTrip stops where stops = V.filter (\(stop :: Stop Deep) -> stationId (stopStation stop) == at) stops -tripsOnDay :: GTFS Deep -> Day -> Vector (Trip Deep) -tripsOnDay gtfs today = V.concatMap (tripsOfService gtfs) (servicesOnDay gtfs today) +tripsOnDay :: GTFS -> Day -> Map TripID (Trip Deep) +tripsOnDay gtfs today = foldMap (tripsOfService gtfs) (servicesOnDay gtfs today) -- cgit v1.2.3