aboutsummaryrefslogtreecommitdiff
path: root/lib/GTFS.hs
diff options
context:
space:
mode:
Diffstat (limited to 'lib/GTFS.hs')
-rw-r--r--lib/GTFS.hs313
1 files changed, 228 insertions, 85 deletions
diff --git a/lib/GTFS.hs b/lib/GTFS.hs
index bfb1c49..2047d56 100644
--- a/lib/GTFS.hs
+++ b/lib/GTFS.hs
@@ -19,51 +19,63 @@
-- (i.e. parsing, querying, Aeson instances, etc.)
module GTFS where
-import qualified Codec.Archive.Zip as Zip
-import Control.Monad.ST (runST)
-import Data.Aeson (FromJSON,
- Options (fieldLabelModifier),
- ToJSON, defaultOptions,
- genericParseJSON,
- genericToJSON)
-import qualified Data.Aeson as A
-import qualified Data.ByteString as BS
-import qualified Data.ByteString.Lazy as LB
-import Data.Csv ((.:))
-import qualified Data.Csv as CSV
-import Data.Function (on)
-import Data.Functor ((<&>))
-import Data.Kind (Type)
-import Data.Maybe (fromJust, fromMaybe)
-import Data.Text (Text)
-import Data.Time (UTCTime (utctDay), dayOfWeek,
- getCurrentTime)
-import Data.Time.Calendar (Day, DayOfWeek (..))
-import Data.Time.Calendar.MonthDay (monthAndDayToDayOfYearValid)
-import qualified Data.Time.Calendar.OrdinalDate as Day
-import Data.Vector (Vector)
-import qualified Data.Vector as V
-import qualified Data.Vector.Algorithms.Intro as V
-import Fmt ((+|), (|+))
-import GHC.Generics (Generic)
-import Text.Regex.TDFA ((=~))
-import Web.Internal.FormUrlEncoded (FromForm (..))
-import Web.Internal.HttpApiData (FromHttpApiData (..))
+import qualified Codec.Archive.Zip as Zip
+import Control.Monad.ST (runST)
+import Data.Aeson (FromJSON,
+ Options (fieldLabelModifier),
+ ToJSON, defaultOptions,
+ genericParseJSON,
+ genericToJSON)
+import qualified Data.Aeson as A
+import qualified Data.ByteString as BS
+import qualified Data.ByteString.Lazy as LB
+import Data.Csv ((.:))
+import qualified Data.Csv as CSV
+import Data.Function (on)
+import Data.Functor ((<&>))
+import Data.Kind (Type)
+import Data.Maybe (fromJust, fromMaybe)
+import Data.Text (Text)
+import Data.Time (TimeZone (timeZoneMinutes),
+ UTCTime (utctDay),
+ dayOfWeek, getCurrentTime)
+import Data.Time.Calendar (Day, DayOfWeek (..))
+import Data.Time.Calendar.MonthDay (monthAndDayToDayOfYearValid)
+import qualified Data.Time.Calendar.OrdinalDate as Day
+import Data.Vector (Vector)
+import qualified Data.Vector as V
+import qualified Data.Vector.Algorithms.Intro as V
+import Fmt ((+|), (|+))
+import GHC.Generics (Generic)
+import Text.Regex.TDFA ((=~))
+import Web.Internal.FormUrlEncoded (FromForm (..))
+import Web.Internal.HttpApiData (FromHttpApiData (..))
-- import Data.Aeson.Generic (Options(fieldLabelModifier), deriveJSON, defaultOptions)
+import Control.Applicative ((<|>))
import Control.Lens
-import Control.Monad.IO.Class (MonadIO (liftIO))
-import Data.Char (toLower)
-import Data.Foldable (Foldable (fold))
-import Data.Map (Map)
-import qualified Data.Map as M
-import Data.Proxy (Proxy (Proxy))
-import Data.Swagger (ParamSchema (..),
- SchemaOptions,
- ToSchema (declareNamedSchema),
- defaultSchemaOptions,
- genericDeclareNamedSchema)
-import qualified Data.Swagger as S
-import qualified Data.Text as T
+import Control.Monad (when)
+import Control.Monad.IO.Class (MonadIO (liftIO))
+import Data.ByteString (ByteString)
+import Data.Char (toLower)
+import Data.Foldable (Foldable (fold))
+import Data.Map (Map)
+import qualified Data.Map as M
+import Data.Proxy (Proxy (Proxy))
+import Data.Swagger (ParamSchema (..),
+ SchemaOptions,
+ ToSchema (declareNamedSchema),
+ defaultSchemaOptions,
+ genericDeclareNamedSchema)
+import qualified Data.Swagger as S
+import qualified Data.Text as T
+import Data.Time.LocalTime.TimeZone.Olson (getTimeZoneSeriesFromOlsonFile)
+import Data.Time.LocalTime.TimeZone.Series (TimeZoneSeries,
+ timeZoneFromSeries)
+
+
+-- | for some reason this doesn't exist already in cassava
+(.:?) :: forall a. (CSV.FromField (Maybe a)) => CSV.NamedRecord -> ByteString -> CSV.Parser (Maybe a)
+(.:?) r field = r .: field <|> pure Nothing
aesonOptions prefix =
defaultOptions { fieldLabelModifier = fieldModifier (T.length prefix) }
@@ -78,29 +90,54 @@ swaggerOptions prefix =
c:rest -> toLower c : rest
"" -> ""
-newtype Time = Time { toSeconds :: Int }
- deriving newtype (ToJSON, FromJSON)
+-- | Times in GTFS are a weird beast, lacking any kinda of timezone information
+-- whatsoever, but are given in the timezone of the transport agency, and
+-- potentially displayed in a different timezone depending on the station they
+-- apply to.
+data Time = Time { timeSeconds :: Int, timeTZseries :: TimeZoneSeries, timeTZname :: Text }
deriving (Generic)
-instance CSV.FromField Time where
+instance ToJSON Time where
+ toJSON (Time seconds _ tzname) =
+ A.object [ "seconds" A..= seconds, "timezone" A..= tzname ]
+
+-- | converts a value of Time to seconds since midnight in UTC. Itself needs
+-- a UTCTime to resolve timezone changes, and the timezone info contained in
+-- the GTFS agency
+toSeconds :: Time -> TimeZoneSeries -> UTCTime -> Int
+toSeconds (Time seconds _ _) tzseries reftime =
+ seconds - timeZoneMinutes timezone * 60
+ where timezone = timeZoneFromSeries tzseries reftime
+
+-- | Times in GTFS are given without timezone info, which is handled
+-- seperately (as an attribute of the stop / the agency). We attach that information
+-- back to the Time, this is just an intermediate step during parsing.
+newtype RawTime = RawTime { unRawTime :: TimeZoneSeries -> Text -> Time }
+ deriving (Generic)
+
+instance CSV.FromField RawTime where
parseField f = do
text :: String <- CSV.parseField f
let (_,_,_,subs) = text =~ ("([0-9][0-9]?):([0-9][0-9]?):([0-9][0-9]?)" :: Text)
:: (String, String, String, [String])
case subs of
- [hh,mm,ss] -> pure $ Time $ read hh * 3600 + read mm * 60 + read ss
+ [hh,mm,ss] -> pure $ RawTime $ Time (read hh * 3600 + read mm * 60 + read ss)
_ -> fail $ "encountered an invalid date: " <> text
instance Show Time where
- show (Time seconds) = ""
+ show (Time seconds _ _) = ""
+|pad (seconds `div` 3600)|+":"
+|pad ((seconds `mod` 3600) `div` 60)|+
if seconds `mod` 60 /= 0 then ":"+|pad (seconds `mod` 60)|+""
else ""
- where pad num =
+ where
+ pad num =
if length str < 2 then "0"<>str else str
where str = show num
+instance Show RawTime where
+ show raw = "[raw time value]" -- show (unRawTime raw 0)
+
instance CSV.FromField Day where
parseField f = do
text :: String <- CSV.parseField f
@@ -134,10 +171,11 @@ type ServiceID = Text
-- | This is what's called a Stop in GTFS
data Station = Station
- { stationId :: StationID
- , stationName :: Text
- , stationLat :: Float
- , stationLon :: Float
+ { stationId :: StationID
+ , stationName :: Text
+ , stationLat :: Double
+ , stationLon :: Double
+ , stationTimezone :: Maybe Text
} deriving (Show, Generic)
instance ToSchema Station where
@@ -147,21 +185,25 @@ instance FromJSON Station where
instance ToJSON Station where
toJSON = genericToJSON (aesonOptions "station")
+stationGeopos :: Station -> (Double, Double)
+stationGeopos Station{..} = (stationLat, stationLon)
-- | This is what's called a stop time in GTFS
data Stop (deep :: Depth) = Stop
{ stopTrip :: TripID
- , stopArrival :: Time
- , stopDeparture :: Time
+ , stopArrival :: Switch deep Time RawTime
+ , stopDeparture :: Switch deep Time RawTime
, stopStation :: Switch deep Station StationID
, stopSequence :: Int
} deriving Generic
-deriving instance Show (Stop 'Shallow)
+-- deriving instance Show (Stop 'Shallow)
deriving instance Show (Stop 'Deep)
-instance FromJSON (Switch a Station StationID) => FromJSON (Stop a) where
- parseJSON = genericParseJSON (aesonOptions "stop")
-instance ToJSON (Switch a Station StationID) => ToJSON (Stop a) where
+-- instance FromJSON (Switch a Station StationID) => FromJSON (Stop a) where
+-- parseJSON = genericParseJSON (aesonOptions "stop")
+instance (ToJSON (Switch a Station StationID)
+ , ToJSON (Switch a Time RawTime))
+ => ToJSON (Stop a) where
toJSON = genericToJSON (aesonOptions "stop")
instance ToSchema (Stop Deep)where
declareNamedSchema = genericDeclareNamedSchema (swaggerOptions "stop")
@@ -203,7 +245,7 @@ instance ToJSON CalendarDate where
instance FromForm CalendarDate
data Trip (deep :: Depth) (shape :: Depth)= Trip
- { tripRoute :: Text
+ { tripRoute :: Switch deep (Route Deep) Text
, tripTripID :: TripID
, tripHeadsign :: Maybe Text
, tripShortName :: Maybe Text
@@ -222,9 +264,9 @@ tripForgetShape trip = trip { tripShape = shapeId (tripShape trip) }
deriving instance Show (Trip Shallow Shallow)
deriving instance Show (Trip Deep Deep)
deriving instance Show (Trip Deep Shallow)
-instance (FromJSON (Switch d Shape Text), FromJSON (Optional d (Vector (Stop d))), FromJSON (Switch s Shape Text)) => FromJSON (Trip d s) where
- parseJSON = genericParseJSON (aesonOptions "trip")
-instance (ToJSON (Switch d Shape Text), ToJSON (Optional d (Vector (Stop d))), ToJSON (Switch s Shape Text)) => ToJSON (Trip d s) where
+-- instance (FromJSON (Switch d Shape Text), FromJSON (Optional d (Vector (Stop d))), FromJSON (Switch s Shape Text), FromJSON (Switch d (Route 'Deep) Text)) => FromJSON (Trip d s) where
+-- parseJSON = genericParseJSON (aesonOptions "trip")
+instance (ToJSON (Switch d Shape Text), ToJSON (Optional d (Vector (Stop d))), ToJSON (Switch s Shape Text), ToJSON (Switch d (Route 'Deep) Text)) => ToJSON (Trip d s) where
toJSON = genericToJSON (aesonOptions "trip")
instance ToSchema (Trip Deep Deep) where
declareNamedSchema = genericDeclareNamedSchema (swaggerOptions "trip")
@@ -254,12 +296,54 @@ instance ToJSON Shape where
instance ToSchema Shape where
declareNamedSchema = genericDeclareNamedSchema (swaggerOptions "shape")
+data Agency depth = Agency
+ { agencyId :: Maybe Text
+ , agencyName :: Text
+ , agencyUrl :: Text
+ , agencyTimezone :: Switch depth TimeZoneSeries Text
+ , agencyLang :: Maybe Text
+ , agencyPhone :: Maybe Text
+ , agencyFareUrl :: Maybe Text
+ , agencyEmail :: Maybe Text
+ } deriving (Generic)
+
+deriving instance Show (Switch depth TimeZoneSeries Text) => Show (Agency depth)
+instance ToJSON (Agency Deep) where
+ toJSON = genericToJSON (aesonOptions "agency")
+instance ToSchema (Agency Deep) where
+ declareNamedSchema = genericDeclareNamedSchema (swaggerOptions "agency")
+instance ToJSON TimeZoneSeries where
+ toJSON tz = "[timezoneseries thingie]"
+instance ToSchema TimeZoneSeries where
+ declareNamedSchema _ = declareNamedSchema (Proxy @String)
+
+data Route depth = Route
+ { routeId :: Text
+ , routeAgency :: Switch depth (Agency Deep) (Maybe Text)
+ , routeShortName :: Maybe Text
+ , routeLongName :: Maybe Text
+ , routeDescription :: Maybe Text
+ , routeType :: Int
+ , routeUrl :: Maybe Text
+ , routeColor :: Maybe Text
+ , routeTextColor :: Maybe Text
+ , routeSortOrder :: Maybe Int
+ , routeNetworkId :: Maybe Text
+ } deriving (Generic)
+
+deriving instance Show (Switch depth (Agency Deep) (Maybe Text)) => Show (Route depth)
+instance ToJSON (Route Deep) where
+ toJSON = genericToJSON (aesonOptions "route")
+instance ToSchema (Route Deep) where
+ declareNamedSchema = genericDeclareNamedSchema (swaggerOptions "route")
+
instance CSV.FromNamedRecord Station where
parseNamedRecord r = Station
<$> r .: "stop_id"
<*> r .: "stop_name"
<*> r .: "stop_lat"
<*> r .: "stop_lon"
+ <*> r .:? "stop_timezone"
instance CSV.FromNamedRecord (Stop 'Shallow) where
parseNamedRecord r = Stop
@@ -329,6 +413,33 @@ instance CSV.FromNamedRecord (Trip Shallow Shallow) where
<*> r .: "shape_id"
<*> pure ()
+instance CSV.FromNamedRecord (Agency Shallow) where
+ parseNamedRecord r = Agency
+ <$> r .:? "agency_id"
+ <*> r .: "agency_name"
+ <*> r .: "agency_url"
+ <*> r .: "agency_timezone"
+ <*> r .:? "agency_lang"
+ <*> r .:? "agency_phone"
+ <*> r .:? "agency_fare_url"
+ <*> r .:? "agency_email"
+
+instance CSV.FromNamedRecord (Route Shallow) where
+ parseNamedRecord r = Route
+ <$> r .: "route_id"
+ <*> r .:? "agency_id"
+ <*> r .:? "route_short_name"
+ <*> r .:? "route_long_name"
+ <*> r .:? "route_desc"
+ <*> r .: "route_type"
+ <*> r .:? "route_url"
+ <*> r .:? "route_color"
+ <*> r .:? "route_text_color"
+ <*> r .:? "route_sort_order"
+ <*> r .:? "network_id"
+
+
+
data RawGTFS = RawGTFS
{ rawStations :: Vector Station
, rawStops :: Vector (Stop Shallow)
@@ -336,6 +447,8 @@ data RawGTFS = RawGTFS
, rawCalendar :: Maybe (Vector Calendar)
, rawCalendarDates :: Maybe (Vector CalendarDate)
, rawShapePoints :: Maybe (Vector ShapePoint)
+ , rawAgencies :: Vector (Agency Shallow)
+ , rawRoutes :: Vector (Route Shallow)
}
@@ -345,11 +458,11 @@ data GTFS = GTFS
, calendar :: Map DayOfWeek (Vector Calendar)
, calendarDates :: Map Day (Vector CalendarDate)
, shapes :: Map Text Shape
-
- , fancyCalendar :: Day -> (Vector ServiceID, Vector (Trip Deep Deep))
- -- ^ a more "fancy" encoding of the calendar?
- } -- deriving Show
-
+ , agencies :: Vector (Agency Deep)
+ -- ^ not a Map AgencyID Agency since if there's only one agency, it may lack an ID
+ , routes :: Map Text (Route Deep)
+ , tzseries :: TimeZoneSeries
+ }
loadRawGtfs :: FilePath -> IO RawGTFS
@@ -362,6 +475,8 @@ loadRawGtfs path = do
<*> decodeTable "calendar.txt" zip
<*> decodeTable "calendar_dates.txt" zip
<*> decodeTable "shapes.txt" zip
+ <*> decodeTable' "agency.txt" zip
+ <*> decodeTable' "routes.txt" zip
where
decodeTable :: CSV.FromNamedRecord a => FilePath -> Zip.Archive -> IO (Maybe (Vector a))
decodeTable path zip =
@@ -383,15 +498,17 @@ loadGtfs path = do
V.foldr' sortShapePoint mempty
$ V.modify (V.sortBy (compare `on` shapePtSequence))
(fromMaybe mempty rawShapePoints)
- stops' <- V.mapM (pushStop rawStations) rawStops
- trips' <- V.mapM (pushTrip stops' shapes) rawTrips
+ -- all agencies must have the same timezone, so just take the first's
+ let tzname = agencyTimezone $ V.head rawAgencies
+ tzseries <- getTimeZoneSeriesFromOlsonFile (T.unpack $ "/etc/zoneinfo/"<>tzname)
+ let agencies' = fmap (\a -> a { agencyTimezone = tzseries }) rawAgencies
+ routes' <- V.mapM (\raw -> pushRoute agencies' raw) rawRoutes
+ <&> mapFromVector routeId
+ stops' <- V.mapM (pushStop tzseries tzname rawStations) rawStops
+ trips' <- V.mapM (pushTrip routes' stops' shapes) rawTrips
pure $ GTFS
- { stations =
- M.fromList $ (\station -> (stationId station, station))
- <$> V.toList rawStations
- , trips =
- M.fromList $ (\trip -> (tripTripID trip, trip))
- <$> V.toList trips'
+ { stations = mapFromVector stationId rawStations
+ , trips = mapFromVector tripTripID trips'
, calendar =
fmap V.fromList
$ M.fromListWith (<>)
@@ -402,8 +519,13 @@ loadGtfs path = do
$ M.fromListWith (<>) $ (\cd -> (caldateDate cd, [cd]))
<$> V.toList (fromMaybe mempty rawCalendarDates)
, shapes
+ , agencies = agencies'
+ , routes = routes'
+ , tzseries
}
where
+ mapFromVector :: Ord k => (a -> k) -> Vector a -> Map k a
+ mapFromVector by v = M.fromList $ fmap (\a -> (by a,a)) $ V.toList v
weekdays Calendar{..} =
[Monday | calMonday]
<> [Tuesday | calTuesday]
@@ -412,23 +534,44 @@ loadGtfs path = do
<> [Friday | calFriday]
<> [Saturday | calSaturday]
<> [Sunday | calSunday]
- pushStop :: Vector Station -> Stop Shallow -> IO (Stop Deep)
- pushStop stations stop = do
+ pushStop :: TimeZoneSeries -> Text -> Vector Station -> Stop Shallow -> IO (Stop Deep)
+ pushStop tzseries tzname stations stop = do
station <- case tableLookup stationId (stopStation stop) stations of
Just a -> pure a
- Nothing -> fail $ "station with id "+|stopStation stop|+"is mentioned but not defined."
- pure $ stop { stopStation = station }
- pushTrip :: Vector (Stop Deep) -> Map Text Shape -> Trip Shallow Shallow -> IO (Trip Deep Deep)
- pushTrip stops shapes trip = if V.length alongRoute < 2
+ Nothing -> fail $ "station with id "+|stopStation stop|+" is mentioned but not defined."
+ (tzseries', tzname') <- case stationTimezone station of
+ Nothing -> pure (tzseries, tzname)
+ Just tzname -> do
+ tzseries <- getTimeZoneSeriesFromOlsonFile (T.unpack $ "/etc/zoneinfo/"<>tzname)
+ pure (tzseries, tzname)
+ pure $ stop { stopStation = station
+ , stopDeparture = unRawTime (stopDeparture stop) tzseries tzname
+ , stopArrival = unRawTime (stopArrival stop) tzseries tzname }
+ pushTrip :: Map Text (Route Deep) -> Vector (Stop Deep) -> Map Text Shape -> Trip Shallow Shallow -> IO (Trip Deep Deep)
+ pushTrip routes stops shapes trip = if V.length alongRoute < 2
then fail $ "trip with id "+|tripTripID trip|+" has no stops"
else do
- a <- case M.lookup (tripShape trip) shapes of
+ shape <- case M.lookup (tripShape trip) shapes of
Nothing -> fail $ "trip with id "+|tripTripID trip|+" mentions a shape that does not exist."
- Just a -> pure a
- pure $ trip { tripStops = alongRoute, tripShape = a }
+ Just shape -> pure shape
+ route <- case M.lookup (tripRoute trip) routes of
+ Nothing -> fail $ "trip with id "+|tripTripID trip|+" specifies a route_id which does not exist."
+ Just route -> pure route
+ pure $ trip { tripStops = alongRoute, tripShape = shape, tripRoute = route}
where alongRoute =
V.modify (V.sortBy (compare `on` stopSequence))
$ V.filter (\s -> stopTrip s == tripTripID trip) stops
+ pushRoute :: Vector (Agency Deep) -> Route Shallow -> IO (Route Deep)
+ pushRoute agencies route = case routeAgency route of
+ Nothing -> do
+ when (V.length agencies /= 1)
+ $ fail $ "route "+|routeId route|+" has no agency_id specified, but there are multiple agencies specified in agency.txt"
+ pure $ route {routeAgency = V.head agencies}
+ Just specifiedId -> case V.headM $ V.filter (\a -> agencyId a == Just specifiedId) agencies of
+ Nothing -> fail $ "route "+|routeId route|+" specifies an agency_id which does not exist."
+ Just agency -> pure $ route { routeAgency = agency }
+
+
sortShapePoint :: ShapePoint -> Map Text Shape -> Map Text Shape
sortShapePoint ShapePoint{..} shapes = M.alter appendPoint shapePtId shapes
where