From 1b2e30c134c996e82c282b21099f21501dd966ac Mon Sep 17 00:00:00 2001 From: stuebinm Date: Sun, 28 Aug 2022 17:40:41 +0200 Subject: this does way too much tbh (also functioning delays) most of it deals with timezones, and all the weird implications that has --- lib/GTFS.hs | 313 +++++++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 228 insertions(+), 85 deletions(-) (limited to 'lib/GTFS.hs') diff --git a/lib/GTFS.hs b/lib/GTFS.hs index bfb1c49..2047d56 100644 --- a/lib/GTFS.hs +++ b/lib/GTFS.hs @@ -19,51 +19,63 @@ -- (i.e. parsing, querying, Aeson instances, etc.) module GTFS where -import qualified Codec.Archive.Zip as Zip -import Control.Monad.ST (runST) -import Data.Aeson (FromJSON, - Options (fieldLabelModifier), - ToJSON, defaultOptions, - genericParseJSON, - genericToJSON) -import qualified Data.Aeson as A -import qualified Data.ByteString as BS -import qualified Data.ByteString.Lazy as LB -import Data.Csv ((.:)) -import qualified Data.Csv as CSV -import Data.Function (on) -import Data.Functor ((<&>)) -import Data.Kind (Type) -import Data.Maybe (fromJust, fromMaybe) -import Data.Text (Text) -import Data.Time (UTCTime (utctDay), dayOfWeek, - getCurrentTime) -import Data.Time.Calendar (Day, DayOfWeek (..)) -import Data.Time.Calendar.MonthDay (monthAndDayToDayOfYearValid) -import qualified Data.Time.Calendar.OrdinalDate as Day -import Data.Vector (Vector) -import qualified Data.Vector as V -import qualified Data.Vector.Algorithms.Intro as V -import Fmt ((+|), (|+)) -import GHC.Generics (Generic) -import Text.Regex.TDFA ((=~)) -import Web.Internal.FormUrlEncoded (FromForm (..)) -import Web.Internal.HttpApiData (FromHttpApiData (..)) +import qualified Codec.Archive.Zip as Zip +import Control.Monad.ST (runST) +import Data.Aeson (FromJSON, + Options (fieldLabelModifier), + ToJSON, defaultOptions, + genericParseJSON, + genericToJSON) +import qualified Data.Aeson as A +import qualified Data.ByteString as BS +import qualified Data.ByteString.Lazy as LB +import Data.Csv ((.:)) +import qualified Data.Csv as CSV +import Data.Function (on) +import Data.Functor ((<&>)) +import Data.Kind (Type) +import Data.Maybe (fromJust, fromMaybe) +import Data.Text (Text) +import Data.Time (TimeZone (timeZoneMinutes), + UTCTime (utctDay), + dayOfWeek, getCurrentTime) +import Data.Time.Calendar (Day, DayOfWeek (..)) +import Data.Time.Calendar.MonthDay (monthAndDayToDayOfYearValid) +import qualified Data.Time.Calendar.OrdinalDate as Day +import Data.Vector (Vector) +import qualified Data.Vector as V +import qualified Data.Vector.Algorithms.Intro as V +import Fmt ((+|), (|+)) +import GHC.Generics (Generic) +import Text.Regex.TDFA ((=~)) +import Web.Internal.FormUrlEncoded (FromForm (..)) +import Web.Internal.HttpApiData (FromHttpApiData (..)) -- import Data.Aeson.Generic (Options(fieldLabelModifier), deriveJSON, defaultOptions) +import Control.Applicative ((<|>)) import Control.Lens -import Control.Monad.IO.Class (MonadIO (liftIO)) -import Data.Char (toLower) -import Data.Foldable (Foldable (fold)) -import Data.Map (Map) -import qualified Data.Map as M -import Data.Proxy (Proxy (Proxy)) -import Data.Swagger (ParamSchema (..), - SchemaOptions, - ToSchema (declareNamedSchema), - defaultSchemaOptions, - genericDeclareNamedSchema) -import qualified Data.Swagger as S -import qualified Data.Text as T +import Control.Monad (when) +import Control.Monad.IO.Class (MonadIO (liftIO)) +import Data.ByteString (ByteString) +import Data.Char (toLower) +import Data.Foldable (Foldable (fold)) +import Data.Map (Map) +import qualified Data.Map as M +import Data.Proxy (Proxy (Proxy)) +import Data.Swagger (ParamSchema (..), + SchemaOptions, + ToSchema (declareNamedSchema), + defaultSchemaOptions, + genericDeclareNamedSchema) +import qualified Data.Swagger as S +import qualified Data.Text as T +import Data.Time.LocalTime.TimeZone.Olson (getTimeZoneSeriesFromOlsonFile) +import Data.Time.LocalTime.TimeZone.Series (TimeZoneSeries, + timeZoneFromSeries) + + +-- | for some reason this doesn't exist already in cassava +(.:?) :: forall a. (CSV.FromField (Maybe a)) => CSV.NamedRecord -> ByteString -> CSV.Parser (Maybe a) +(.:?) r field = r .: field <|> pure Nothing aesonOptions prefix = defaultOptions { fieldLabelModifier = fieldModifier (T.length prefix) } @@ -78,29 +90,54 @@ swaggerOptions prefix = c:rest -> toLower c : rest "" -> "" -newtype Time = Time { toSeconds :: Int } - deriving newtype (ToJSON, FromJSON) +-- | Times in GTFS are a weird beast, lacking any kinda of timezone information +-- whatsoever, but are given in the timezone of the transport agency, and +-- potentially displayed in a different timezone depending on the station they +-- apply to. +data Time = Time { timeSeconds :: Int, timeTZseries :: TimeZoneSeries, timeTZname :: Text } deriving (Generic) -instance CSV.FromField Time where +instance ToJSON Time where + toJSON (Time seconds _ tzname) = + A.object [ "seconds" A..= seconds, "timezone" A..= tzname ] + +-- | converts a value of Time to seconds since midnight in UTC. Itself needs +-- a UTCTime to resolve timezone changes, and the timezone info contained in +-- the GTFS agency +toSeconds :: Time -> TimeZoneSeries -> UTCTime -> Int +toSeconds (Time seconds _ _) tzseries reftime = + seconds - timeZoneMinutes timezone * 60 + where timezone = timeZoneFromSeries tzseries reftime + +-- | Times in GTFS are given without timezone info, which is handled +-- seperately (as an attribute of the stop / the agency). We attach that information +-- back to the Time, this is just an intermediate step during parsing. +newtype RawTime = RawTime { unRawTime :: TimeZoneSeries -> Text -> Time } + deriving (Generic) + +instance CSV.FromField RawTime where parseField f = do text :: String <- CSV.parseField f let (_,_,_,subs) = text =~ ("([0-9][0-9]?):([0-9][0-9]?):([0-9][0-9]?)" :: Text) :: (String, String, String, [String]) case subs of - [hh,mm,ss] -> pure $ Time $ read hh * 3600 + read mm * 60 + read ss + [hh,mm,ss] -> pure $ RawTime $ Time (read hh * 3600 + read mm * 60 + read ss) _ -> fail $ "encountered an invalid date: " <> text instance Show Time where - show (Time seconds) = "" + show (Time seconds _ _) = "" +|pad (seconds `div` 3600)|+":" +|pad ((seconds `mod` 3600) `div` 60)|+ if seconds `mod` 60 /= 0 then ":"+|pad (seconds `mod` 60)|+"" else "" - where pad num = + where + pad num = if length str < 2 then "0"<>str else str where str = show num +instance Show RawTime where + show raw = "[raw time value]" -- show (unRawTime raw 0) + instance CSV.FromField Day where parseField f = do text :: String <- CSV.parseField f @@ -134,10 +171,11 @@ type ServiceID = Text -- | This is what's called a Stop in GTFS data Station = Station - { stationId :: StationID - , stationName :: Text - , stationLat :: Float - , stationLon :: Float + { stationId :: StationID + , stationName :: Text + , stationLat :: Double + , stationLon :: Double + , stationTimezone :: Maybe Text } deriving (Show, Generic) instance ToSchema Station where @@ -147,21 +185,25 @@ instance FromJSON Station where instance ToJSON Station where toJSON = genericToJSON (aesonOptions "station") +stationGeopos :: Station -> (Double, Double) +stationGeopos Station{..} = (stationLat, stationLon) -- | This is what's called a stop time in GTFS data Stop (deep :: Depth) = Stop { stopTrip :: TripID - , stopArrival :: Time - , stopDeparture :: Time + , stopArrival :: Switch deep Time RawTime + , stopDeparture :: Switch deep Time RawTime , stopStation :: Switch deep Station StationID , stopSequence :: Int } deriving Generic -deriving instance Show (Stop 'Shallow) +-- deriving instance Show (Stop 'Shallow) deriving instance Show (Stop 'Deep) -instance FromJSON (Switch a Station StationID) => FromJSON (Stop a) where - parseJSON = genericParseJSON (aesonOptions "stop") -instance ToJSON (Switch a Station StationID) => ToJSON (Stop a) where +-- instance FromJSON (Switch a Station StationID) => FromJSON (Stop a) where +-- parseJSON = genericParseJSON (aesonOptions "stop") +instance (ToJSON (Switch a Station StationID) + , ToJSON (Switch a Time RawTime)) + => ToJSON (Stop a) where toJSON = genericToJSON (aesonOptions "stop") instance ToSchema (Stop Deep)where declareNamedSchema = genericDeclareNamedSchema (swaggerOptions "stop") @@ -203,7 +245,7 @@ instance ToJSON CalendarDate where instance FromForm CalendarDate data Trip (deep :: Depth) (shape :: Depth)= Trip - { tripRoute :: Text + { tripRoute :: Switch deep (Route Deep) Text , tripTripID :: TripID , tripHeadsign :: Maybe Text , tripShortName :: Maybe Text @@ -222,9 +264,9 @@ tripForgetShape trip = trip { tripShape = shapeId (tripShape trip) } deriving instance Show (Trip Shallow Shallow) deriving instance Show (Trip Deep Deep) deriving instance Show (Trip Deep Shallow) -instance (FromJSON (Switch d Shape Text), FromJSON (Optional d (Vector (Stop d))), FromJSON (Switch s Shape Text)) => FromJSON (Trip d s) where - parseJSON = genericParseJSON (aesonOptions "trip") -instance (ToJSON (Switch d Shape Text), ToJSON (Optional d (Vector (Stop d))), ToJSON (Switch s Shape Text)) => ToJSON (Trip d s) where +-- instance (FromJSON (Switch d Shape Text), FromJSON (Optional d (Vector (Stop d))), FromJSON (Switch s Shape Text), FromJSON (Switch d (Route 'Deep) Text)) => FromJSON (Trip d s) where +-- parseJSON = genericParseJSON (aesonOptions "trip") +instance (ToJSON (Switch d Shape Text), ToJSON (Optional d (Vector (Stop d))), ToJSON (Switch s Shape Text), ToJSON (Switch d (Route 'Deep) Text)) => ToJSON (Trip d s) where toJSON = genericToJSON (aesonOptions "trip") instance ToSchema (Trip Deep Deep) where declareNamedSchema = genericDeclareNamedSchema (swaggerOptions "trip") @@ -254,12 +296,54 @@ instance ToJSON Shape where instance ToSchema Shape where declareNamedSchema = genericDeclareNamedSchema (swaggerOptions "shape") +data Agency depth = Agency + { agencyId :: Maybe Text + , agencyName :: Text + , agencyUrl :: Text + , agencyTimezone :: Switch depth TimeZoneSeries Text + , agencyLang :: Maybe Text + , agencyPhone :: Maybe Text + , agencyFareUrl :: Maybe Text + , agencyEmail :: Maybe Text + } deriving (Generic) + +deriving instance Show (Switch depth TimeZoneSeries Text) => Show (Agency depth) +instance ToJSON (Agency Deep) where + toJSON = genericToJSON (aesonOptions "agency") +instance ToSchema (Agency Deep) where + declareNamedSchema = genericDeclareNamedSchema (swaggerOptions "agency") +instance ToJSON TimeZoneSeries where + toJSON tz = "[timezoneseries thingie]" +instance ToSchema TimeZoneSeries where + declareNamedSchema _ = declareNamedSchema (Proxy @String) + +data Route depth = Route + { routeId :: Text + , routeAgency :: Switch depth (Agency Deep) (Maybe Text) + , routeShortName :: Maybe Text + , routeLongName :: Maybe Text + , routeDescription :: Maybe Text + , routeType :: Int + , routeUrl :: Maybe Text + , routeColor :: Maybe Text + , routeTextColor :: Maybe Text + , routeSortOrder :: Maybe Int + , routeNetworkId :: Maybe Text + } deriving (Generic) + +deriving instance Show (Switch depth (Agency Deep) (Maybe Text)) => Show (Route depth) +instance ToJSON (Route Deep) where + toJSON = genericToJSON (aesonOptions "route") +instance ToSchema (Route Deep) where + declareNamedSchema = genericDeclareNamedSchema (swaggerOptions "route") + instance CSV.FromNamedRecord Station where parseNamedRecord r = Station <$> r .: "stop_id" <*> r .: "stop_name" <*> r .: "stop_lat" <*> r .: "stop_lon" + <*> r .:? "stop_timezone" instance CSV.FromNamedRecord (Stop 'Shallow) where parseNamedRecord r = Stop @@ -329,6 +413,33 @@ instance CSV.FromNamedRecord (Trip Shallow Shallow) where <*> r .: "shape_id" <*> pure () +instance CSV.FromNamedRecord (Agency Shallow) where + parseNamedRecord r = Agency + <$> r .:? "agency_id" + <*> r .: "agency_name" + <*> r .: "agency_url" + <*> r .: "agency_timezone" + <*> r .:? "agency_lang" + <*> r .:? "agency_phone" + <*> r .:? "agency_fare_url" + <*> r .:? "agency_email" + +instance CSV.FromNamedRecord (Route Shallow) where + parseNamedRecord r = Route + <$> r .: "route_id" + <*> r .:? "agency_id" + <*> r .:? "route_short_name" + <*> r .:? "route_long_name" + <*> r .:? "route_desc" + <*> r .: "route_type" + <*> r .:? "route_url" + <*> r .:? "route_color" + <*> r .:? "route_text_color" + <*> r .:? "route_sort_order" + <*> r .:? "network_id" + + + data RawGTFS = RawGTFS { rawStations :: Vector Station , rawStops :: Vector (Stop Shallow) @@ -336,6 +447,8 @@ data RawGTFS = RawGTFS , rawCalendar :: Maybe (Vector Calendar) , rawCalendarDates :: Maybe (Vector CalendarDate) , rawShapePoints :: Maybe (Vector ShapePoint) + , rawAgencies :: Vector (Agency Shallow) + , rawRoutes :: Vector (Route Shallow) } @@ -345,11 +458,11 @@ data GTFS = GTFS , calendar :: Map DayOfWeek (Vector Calendar) , calendarDates :: Map Day (Vector CalendarDate) , shapes :: Map Text Shape - - , fancyCalendar :: Day -> (Vector ServiceID, Vector (Trip Deep Deep)) - -- ^ a more "fancy" encoding of the calendar? - } -- deriving Show - + , agencies :: Vector (Agency Deep) + -- ^ not a Map AgencyID Agency since if there's only one agency, it may lack an ID + , routes :: Map Text (Route Deep) + , tzseries :: TimeZoneSeries + } loadRawGtfs :: FilePath -> IO RawGTFS @@ -362,6 +475,8 @@ loadRawGtfs path = do <*> decodeTable "calendar.txt" zip <*> decodeTable "calendar_dates.txt" zip <*> decodeTable "shapes.txt" zip + <*> decodeTable' "agency.txt" zip + <*> decodeTable' "routes.txt" zip where decodeTable :: CSV.FromNamedRecord a => FilePath -> Zip.Archive -> IO (Maybe (Vector a)) decodeTable path zip = @@ -383,15 +498,17 @@ loadGtfs path = do V.foldr' sortShapePoint mempty $ V.modify (V.sortBy (compare `on` shapePtSequence)) (fromMaybe mempty rawShapePoints) - stops' <- V.mapM (pushStop rawStations) rawStops - trips' <- V.mapM (pushTrip stops' shapes) rawTrips + -- all agencies must have the same timezone, so just take the first's + let tzname = agencyTimezone $ V.head rawAgencies + tzseries <- getTimeZoneSeriesFromOlsonFile (T.unpack $ "/etc/zoneinfo/"<>tzname) + let agencies' = fmap (\a -> a { agencyTimezone = tzseries }) rawAgencies + routes' <- V.mapM (\raw -> pushRoute agencies' raw) rawRoutes + <&> mapFromVector routeId + stops' <- V.mapM (pushStop tzseries tzname rawStations) rawStops + trips' <- V.mapM (pushTrip routes' stops' shapes) rawTrips pure $ GTFS - { stations = - M.fromList $ (\station -> (stationId station, station)) - <$> V.toList rawStations - , trips = - M.fromList $ (\trip -> (tripTripID trip, trip)) - <$> V.toList trips' + { stations = mapFromVector stationId rawStations + , trips = mapFromVector tripTripID trips' , calendar = fmap V.fromList $ M.fromListWith (<>) @@ -402,8 +519,13 @@ loadGtfs path = do $ M.fromListWith (<>) $ (\cd -> (caldateDate cd, [cd])) <$> V.toList (fromMaybe mempty rawCalendarDates) , shapes + , agencies = agencies' + , routes = routes' + , tzseries } where + mapFromVector :: Ord k => (a -> k) -> Vector a -> Map k a + mapFromVector by v = M.fromList $ fmap (\a -> (by a,a)) $ V.toList v weekdays Calendar{..} = [Monday | calMonday] <> [Tuesday | calTuesday] @@ -412,23 +534,44 @@ loadGtfs path = do <> [Friday | calFriday] <> [Saturday | calSaturday] <> [Sunday | calSunday] - pushStop :: Vector Station -> Stop Shallow -> IO (Stop Deep) - pushStop stations stop = do + pushStop :: TimeZoneSeries -> Text -> Vector Station -> Stop Shallow -> IO (Stop Deep) + pushStop tzseries tzname stations stop = do station <- case tableLookup stationId (stopStation stop) stations of Just a -> pure a - Nothing -> fail $ "station with id "+|stopStation stop|+"is mentioned but not defined." - pure $ stop { stopStation = station } - pushTrip :: Vector (Stop Deep) -> Map Text Shape -> Trip Shallow Shallow -> IO (Trip Deep Deep) - pushTrip stops shapes trip = if V.length alongRoute < 2 + Nothing -> fail $ "station with id "+|stopStation stop|+" is mentioned but not defined." + (tzseries', tzname') <- case stationTimezone station of + Nothing -> pure (tzseries, tzname) + Just tzname -> do + tzseries <- getTimeZoneSeriesFromOlsonFile (T.unpack $ "/etc/zoneinfo/"<>tzname) + pure (tzseries, tzname) + pure $ stop { stopStation = station + , stopDeparture = unRawTime (stopDeparture stop) tzseries tzname + , stopArrival = unRawTime (stopArrival stop) tzseries tzname } + pushTrip :: Map Text (Route Deep) -> Vector (Stop Deep) -> Map Text Shape -> Trip Shallow Shallow -> IO (Trip Deep Deep) + pushTrip routes stops shapes trip = if V.length alongRoute < 2 then fail $ "trip with id "+|tripTripID trip|+" has no stops" else do - a <- case M.lookup (tripShape trip) shapes of + shape <- case M.lookup (tripShape trip) shapes of Nothing -> fail $ "trip with id "+|tripTripID trip|+" mentions a shape that does not exist." - Just a -> pure a - pure $ trip { tripStops = alongRoute, tripShape = a } + Just shape -> pure shape + route <- case M.lookup (tripRoute trip) routes of + Nothing -> fail $ "trip with id "+|tripTripID trip|+" specifies a route_id which does not exist." + Just route -> pure route + pure $ trip { tripStops = alongRoute, tripShape = shape, tripRoute = route} where alongRoute = V.modify (V.sortBy (compare `on` stopSequence)) $ V.filter (\s -> stopTrip s == tripTripID trip) stops + pushRoute :: Vector (Agency Deep) -> Route Shallow -> IO (Route Deep) + pushRoute agencies route = case routeAgency route of + Nothing -> do + when (V.length agencies /= 1) + $ fail $ "route "+|routeId route|+" has no agency_id specified, but there are multiple agencies specified in agency.txt" + pure $ route {routeAgency = V.head agencies} + Just specifiedId -> case V.headM $ V.filter (\a -> agencyId a == Just specifiedId) agencies of + Nothing -> fail $ "route "+|routeId route|+" specifies an agency_id which does not exist." + Just agency -> pure $ route { routeAgency = agency } + + sortShapePoint :: ShapePoint -> Map Text Shape -> Map Text Shape sortShapePoint ShapePoint{..} shapes = M.alter appendPoint shapePtId shapes where -- cgit v1.2.3