diff options
Diffstat (limited to 'lib/GTFS.hs')
-rw-r--r-- | lib/GTFS.hs | 314 |
1 files changed, 314 insertions, 0 deletions
diff --git a/lib/GTFS.hs b/lib/GTFS.hs new file mode 100644 index 0000000..cadc930 --- /dev/null +++ b/lib/GTFS.hs @@ -0,0 +1,314 @@ +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE TypeFamilies #-} +{-# LANGUAGE KindSignatures #-} +{-# LANGUAGE StandaloneKindSignatures #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE StandaloneDeriving #-} +{-# LANGUAGE TypeApplications #-} +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE DeriveAnyClass #-} +{-# LANGUAGE DerivingStrategies #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} + + +module GTFS where + + +import qualified Data.ByteString.Lazy as LB +import qualified Data.ByteString as BS +import qualified Data.Csv as CSV +import Data.Csv ((.:)) +import qualified Codec.Archive.Zip as Zip +import qualified Data.Vector as V +import Data.Vector (Vector) +import Text.Regex.TDFA ( (=~) ) +import Data.Text (Text) +import Fmt ( (+|), (|+) ) +import Data.Kind (Type) +import Data.Maybe (fromMaybe, fromJust) +import Data.Functor ((<&>)) +import qualified Data.Time.Calendar.OrdinalDate as Day +import Data.Time.Calendar (Day, DayOfWeek(..)) +import Data.Time.Calendar.MonthDay (monthAndDayToDayOfYearValid) +import Data.Time (getCurrentTime, UTCTime (utctDay), dayOfWeek) +import Data.Aeson (ToJSON, FromJSON) +import qualified Data.Aeson as A +import GHC.Generics (Generic) + + +newtype Time = Time { toSeconds :: Int } + deriving newtype (ToJSON, FromJSON) + +instance CSV.FromField Time where + parseField f = do + text :: String <- CSV.parseField f + let (_,_,_,subs) = text =~ ("([0-9][0-9]?):([0-9][0-9]?):([0-9][0-9]?)" :: Text) + :: (String, String, String, [String]) + case subs of + [hh,mm,ss] -> pure $ Time $ read hh * 3600 + read mm * 60 + read ss + _ -> fail $ "encountered an invalid date: " <> text + +instance Show Time where + show (Time seconds) = "" + +|seconds `div` 3600|+":" + +|(seconds `mod` 3600) `div` 60|+":" + +|seconds `mod` 60|+"" + +instance CSV.FromField Day where + parseField f = do + text :: String <- CSV.parseField f + let (_,_,_,subs) = text =~ ("([0-9][0-9][0-9][0-9])([0-9][0-9])([0-9][0-9])" :: Text) + :: (String, String, String, [String]) + case subs of + [yyyy,mm,dd] -> do + let Just dayOfYear = monthAndDayToDayOfYearValid (Day.isLeapYear (read yyyy)) (read mm) (read dd) + pure $ Day.fromOrdinalDate (read yyyy) dayOfYear + _ -> fail $ "invalid date encountered: " <> show f + + + +data Depth = Shallow | Deep +type Switch :: Depth -> Type -> Type -> Type +type family Switch c a b where + Switch Deep a b = a + Switch Shallow a b = b +type family Optional c a where + Optional Deep a = a + Optional Shallow _ = () + +type StationID = Text +type TripID = Text +type ServiceID = Text + + +-- | This is what's called a Stop in GTFS +data Station = Station + { stationId :: StationID + , stationName :: Text + , stationLat :: Float + , stationLon :: Float + } deriving (Show, Generic, ToJSON) + +-- | This is what's called a stop time in GTFS +data Stop (deep :: Depth) = Stop + { stopTrip :: TripID + , stopArrival :: Time + , stopDeparture :: Time + , stopStation:: Switch deep Station StationID + , stopSequence :: Int + } deriving Generic + +deriving instance Show (Stop 'Shallow) +deriving instance Show (Stop 'Deep) +deriving instance ToJSON (Stop 'Deep) + + +data Calendar = Calendar + { calServiceId :: Text + , calMonday :: Bool + , calTuesday :: Bool + , calWednesday :: Bool + , calThursday :: Bool + , calFriday :: Bool + , calSaturday :: Bool + , calSunday :: Bool + , calStartDate :: Day + , calEndDate :: Day + } deriving (Show, Generic, ToJSON) + +data CalendarExceptionType = ServiceAdded | ServiceRemoved + deriving (Show, Eq, Generic, ToJSON) + +data CalendarDate = CalendarDate + { caldateServiceId :: Text + , caldateDate :: Day + , caldateExceptionType :: CalendarExceptionType + } deriving (Show, Generic, ToJSON) + +data Trip (deep :: Depth) = Trip + { tripRoute :: Text + , tripTripID :: TripID + , tripHeadsign :: Maybe Text + , tripShortName :: Maybe Text + , tripDirection :: Maybe Bool + -- NOTE: there's also block_id, which we're unlikely to ever need + , tripServiceId :: Text + -- , tripWheelchairAccessible :: Bool + -- , tripBikesAllowed :: Bool + , tripShapeId :: Text + , tripStops :: Optional deep (Vector (Stop deep)) + } deriving Generic + +deriving instance Show (Trip Shallow) +deriving instance Show (Trip Deep) +deriving instance ToJSON (Trip Deep) + +-- | helper function to find things in Vectors of things +tableLookup :: Eq key => (a -> key) -> key -> Vector a -> Maybe a +tableLookup proj key = V.find (\a -> proj a == key) + +instance CSV.FromNamedRecord Station where + parseNamedRecord r = Station + <$> r .: "stop_id" + <*> r .: "stop_name" + <*> r .: "stop_lat" + <*> r .: "stop_lon" + +instance CSV.FromNamedRecord (Stop 'Shallow) where + parseNamedRecord r = Stop + <$> r .: "trip_id" + <*> r .: "arrival_time" + <*> r .: "departure_time" + <*> r .: "stop_id" + <*> r .: "stop_sequence" + +instance CSV.FromNamedRecord Calendar where + parseNamedRecord r = Calendar + <$> r .: "service_id" + <*> intAsBool' r "monday" + <*> intAsBool' r "tuesday" + <*> intAsBool' r "wednesday" + <*> intAsBool' r "thursday" + <*> intAsBool' r "friday" + <*> intAsBool' r "saturday" + <*> intAsBool' r "sunday" + <*> r .: "start_date" + <*> r .: "end_date" + +intAsBool :: CSV.NamedRecord -> BS.ByteString -> CSV.Parser (Maybe Bool) +intAsBool r field = do + int <- r .: field + pure $ case int :: Int of + 1 -> Just True + 0 -> Just False + _ -> Nothing + +intAsBool' :: CSV.NamedRecord -> BS.ByteString -> CSV.Parser Bool +intAsBool' r field = intAsBool r field >>= maybe + (fail "unexpected value for a boolean.") + pure + + +instance CSV.FromNamedRecord CalendarDate where + parseNamedRecord r = CalendarDate + <$> r .: "service_id" + <*> r .: "date" + <*> do + int <- r .: "exception_type" + case int :: Int of + 1 -> pure ServiceAdded + 2 -> pure ServiceRemoved + _ -> fail $ "unexpected value in exception_type: "+|int|+"." + + +instance CSV.FromNamedRecord (Trip Shallow) where + parseNamedRecord r = Trip + <$> r .: "route_id" + <*> r .: "trip_id" + <*> r .: "trip_headsign" + <*> r .: "trip_short_name" + <*> intAsBool r "direction_id" + <*> r .: "service_id" + -- NOTE: these aren't booleans but triple-values + -- <*> intAsBool r "wheelchair_accessible" + -- <*> intAsBool r "bikes_allowed" + <*> r .: "shape_id" + <*> pure () + +data GTFS (depth :: Depth) = GTFS + { stations :: Vector Station + , stops :: Vector (Stop depth) + , trips :: Vector (Trip depth) + , calendar :: Maybe (Vector Calendar) + , calendarDates :: Maybe (Vector CalendarDate) + } + +deriving instance Show (GTFS Shallow) +deriving instance Show (GTFS Deep) + +class Loadable depth where + loadGtfs :: FilePath -> IO (GTFS depth) + +instance Loadable Shallow where + loadGtfs path = do + zip <- Zip.toArchive <$> LB.readFile "./gtfs.zip" + GTFS <$> decodeTable' "stops.txt" zip + <*> decodeTable' "stop_times.txt" zip + <*> decodeTable' "trips.txt" zip + <*> decodeTable "calendar.txt" zip + <*> decodeTable "calendar_dates.txt" zip + where + decodeTable :: CSV.FromNamedRecord a => FilePath -> Zip.Archive -> IO (Maybe (Vector a)) + decodeTable path zip = do + case Zip.findEntryByPath path zip of + Nothing -> pure Nothing + Just csv -> case CSV.decodeByName (Zip.fromEntry csv) of + Left err -> error $ "could not decode file "+|path|+": "+|err|+"." + Right (_,v :: a) -> pure (Just v) + decodeTable' path zip = + decodeTable path zip >>= \case + Nothing -> fail $ "required file "+|path|+" not found in gtfs.zip" + Just a -> pure a + +instance Loadable Deep where + loadGtfs path = do + shallow <- loadGtfs @Shallow path + stops' <- V.mapM (pushStop (stations shallow)) (stops shallow) + trips' <- V.mapM (pushTrip stops') (trips shallow) + pure $ shallow { stops = stops', trips = trips' } + where + pushStop :: Vector Station -> Stop Shallow -> IO (Stop Deep) + pushStop stations stop = do + station <- case tableLookup stationId (stopStation stop) stations of + Just a -> pure a + Nothing -> fail $ "station with id "+|stopStation stop|+"is mentioned but not defined." + pure $ stop { stopStation = station } + pushTrip :: Vector (Stop Deep) -> Trip Shallow -> IO (Trip Deep) + pushTrip stops trip = if V.length alongRoute < 2 + then fail $ "trip with id "+|tripTripID trip|+" has no stops" + else pure $ trip { tripStops = alongRoute } + where alongRoute = -- TODO: sort these according to stops + V.filter (\s -> stopTrip s == tripTripID trip) stops + + + +servicesOnDay :: GTFS Deep -> Day -> Vector ServiceID +servicesOnDay GTFS{..} day = + fmap caldateServiceId added <> V.filter notCancelled regular + where (added,removed) = case calendarDates of + Nothing -> (mempty,mempty) + Just exs -> + V.partition (\cd -> caldateExceptionType cd == ServiceAdded) + $ V.filter (\cd -> caldateDate cd == day) exs + regular = case calendar of + Nothing -> mempty + Just cs -> V.mapMaybe (\cal -> if isRunning cal then Just (calServiceId cal) else Nothing) cs + where isRunning Calendar{..} = + day >= calStartDate && + day <= calEndDate && + case weekday of + Monday -> calMonday + Tuesday -> calTuesday + Wednesday -> calWednesday + Thursday -> calThursday + Friday -> calFriday + Saturday -> calSaturday + Sunday -> calSunday + weekday = dayOfWeek day + notCancelled serviceID = + null (tableLookup caldateServiceId serviceID removed) + +tripsOfService :: GTFS Deep -> ServiceID -> Vector (Trip Deep) +tripsOfService GTFS{..} serviceId = + V.filter (\trip -> tripServiceId trip == serviceId ) trips + +-- TODO: this should filter out trips ending there +tripsAtStation :: GTFS Deep -> StationID -> Vector TripID +tripsAtStation GTFS{..} at = fmap stopTrip stops + where + stops = V.filter (\(stop :: Stop Deep) -> stationId (stopStation stop) == at) stops + +tripsOnDay :: GTFS Deep -> Day -> Vector (Trip Deep) +tripsOnDay gtfs today = V.concatMap (tripsOfService gtfs) (servicesOnDay gtfs today) |