From 35bb210c9aced65795ba09a5ed30e9d28a89dc3b Mon Sep 17 00:00:00 2001 From: stuebinm Date: Sun, 5 Jun 2022 00:03:43 +0200 Subject: initial hacking --- .gitignore | 1 + CHANGELOG.md | 5 + app/Main.hs | 308 +++++++++++++++++++++++++++++++++++++++++++++++++++++ haskell-gtfs.cabal | 44 ++++++++ hie.yaml | 4 + shell.nix | 5 + 6 files changed, 367 insertions(+) create mode 100644 .gitignore create mode 100644 CHANGELOG.md create mode 100644 app/Main.hs create mode 100644 haskell-gtfs.cabal create mode 100644 hie.yaml create mode 100644 shell.nix diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b5e3679 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +dist-newstyle/* diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..1df15d0 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,5 @@ +# Revision history for haskell-gtfs + +## 0.1.0.0 -- YYYY-mm-dd + +* First version. Released on an unsuspecting world. diff --git a/app/Main.hs b/app/Main.hs new file mode 100644 index 0000000..31c9882 --- /dev/null +++ b/app/Main.hs @@ -0,0 +1,308 @@ +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE TypeFamilies #-} +{-# LANGUAGE KindSignatures #-} +{-# LANGUAGE StandaloneKindSignatures #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE StandaloneDeriving #-} +{-# LANGUAGE TypeApplications #-} +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE RecordWildCards #-} + + +module Main where + + +import qualified Data.ByteString.Lazy as LB +import qualified Data.ByteString as BS +import qualified Data.Csv as CSV +import Data.Csv ((.:)) +import qualified Codec.Archive.Zip as Zip +import qualified Data.Vector as V +import Data.Vector (Vector) +import Text.Regex.TDFA ( (=~) ) +import Data.Text (Text) +import Fmt ( (+|), (|+) ) +import Data.Kind (Type) +import Data.Maybe (fromMaybe, fromJust) +import Data.Functor ((<&>)) +import qualified Data.Time.Calendar.OrdinalDate as Day +import Data.Time.Calendar.OrdinalDate (Day) +import Data.Time.Calendar.MonthDay (monthAndDayToDayOfYearValid) +import Data.Time (getCurrentTime, UTCTime (utctDay), dayOfWeek) +import Data.Time.Calendar.WeekDate (DayOfWeek(..)) + + +newtype Time = Time { toSeconds :: Int } + +instance CSV.FromField Time where + parseField f = do + text :: String <- CSV.parseField f + let (_,_,_,subs) = text =~ ("([0-9][0-9]?):([0-9][0-9]?):([0-9][0-9]?)" :: Text) + :: (String, String, String, [String]) + case subs of + [hh,mm,ss] -> pure $ Time $ read hh * 3600 + read mm * 60 + read ss + _ -> fail $ "encountered an invalid date: " <> text + +instance Show Time where + show (Time seconds) = "" + +|seconds `div` 3600|+":" + +|(seconds `mod` 3600) `div` 60|+":" + +|seconds `mod` 60|+"" + +instance CSV.FromField Day where + parseField f = do + text :: String <- CSV.parseField f + let (_,_,_,subs) = text =~ ("([0-9][0-9][0-9][0-9])([0-9][0-9])([0-9][0-9])" :: Text) + :: (String, String, String, [String]) + case subs of + [yyyy,mm,dd] -> do + let Just dayOfYear = monthAndDayToDayOfYearValid (Day.isLeapYear (read yyyy)) (read mm) (read dd) + pure $ Day.fromOrdinalDate (read yyyy) dayOfYear + _ -> fail $ "invalid date encountered: " <> show f + + + +data Depth = Shallow | Deep +type Switch :: Depth -> Type -> Type -> Type +type family Switch c a b where + Switch Deep a b = a + Switch Shallow a b = b + +type StationID = Text + +-- | This is what's called a Stop in GTFS +data Station = Station + { stationId :: StationID + , stationName :: Text + , stationLat :: Float + , stationLon :: Float + } deriving Show + +-- | This is what's called a stop time in GTFS +data Stop (deep :: Depth) = Stop + { stopTrip :: Int + , stopArrival :: Time + , stopDeparture :: Time + , stopStation:: Switch deep Station StationID + , stopSequence :: Int + } + +deriving instance Show (Stop 'Shallow) +deriving instance Show (Stop 'Deep) + + +data Calendar = Calendar + { calServiceId :: Text + , calMonday :: Bool + , calTuesday :: Bool + , calWednesday :: Bool + , calThursday :: Bool + , calFriday :: Bool + , calSaturday :: Bool + , calSunday :: Bool + , calStartDate :: Day + , calEndDate :: Day + } deriving Show + +data CalendarExceptionType = ServiceAdded | ServiceRemoved + deriving (Show, Eq) + +data CalendarDate = CalendarDate + { caldateServiceId :: Text + , caldateDate :: Day + , caldateExceptionType :: CalendarExceptionType + } deriving Show + +type TripID = Text +type ServiceID = Text + +data Trip = Trip + { tripRoute :: Text + , tripTripID :: TripID + , tripHeadsign :: Maybe Text + , tripShortName :: Maybe Text + , tripDirection :: Maybe Bool + -- NOTE: there's also block_id, which we're unlikely to ever need + , tripServiceId :: Text + -- , tripWheelchairAccessible :: Bool + -- , tripBikesAllowed :: Bool + , tripShapeId :: Text + } deriving Show + +-- | helper function to find things in Vectors of things +tableLookup :: Eq key => (a -> key) -> key -> Vector a -> Maybe a +tableLookup proj key = V.find (\a -> proj a == key) + +instance CSV.FromNamedRecord Station where + parseNamedRecord r = Station + <$> r .: "stop_id" + <*> r .: "stop_name" + <*> r .: "stop_lat" + <*> r .: "stop_lon" + +instance CSV.FromNamedRecord (Stop 'Shallow) where + parseNamedRecord r = Stop + <$> r .: "trip_id" + <*> r .: "arrival_time" + <*> r .: "departure_time" + <*> r .: "stop_id" + <*> r .: "stop_sequence" + +instance CSV.FromNamedRecord Calendar where + parseNamedRecord r = Calendar + <$> r .: "service_id" + <*> intAsBool' r "monday" + <*> intAsBool' r "tuesday" + <*> intAsBool' r "wednesday" + <*> intAsBool' r "thursday" + <*> intAsBool' r "friday" + <*> intAsBool' r "saturday" + <*> intAsBool' r "sunday" + <*> r .: "start_date" + <*> r .: "end_date" + +intAsBool :: CSV.NamedRecord -> BS.ByteString -> CSV.Parser (Maybe Bool) +intAsBool r field = do + int <- r .: field + pure $ case int :: Int of + 1 -> Just True + 0 -> Just False + _ -> Nothing + +intAsBool' :: CSV.NamedRecord -> BS.ByteString -> CSV.Parser Bool +intAsBool' r field = intAsBool r field >>= maybe + (fail "unexpected value for a boolean.") + pure + + +instance CSV.FromNamedRecord CalendarDate where + parseNamedRecord r = CalendarDate + <$> r .: "service_id" + <*> r .: "date" + <*> do + int <- r .: "exception_type" + case int :: Int of + 1 -> pure ServiceAdded + 2 -> pure ServiceRemoved + _ -> fail $ "unexpected value in exception_type: "+|int|+"." + + +instance CSV.FromNamedRecord Trip where + parseNamedRecord r = Trip + <$> r .: "route_id" + <*> r .: "trip_id" + <*> r .: "trip_headsign" + <*> r .: "trip_short_name" + <*> intAsBool r "direction_id" + <*> r .: "service_id" + -- NOTE: these aren't booleans but triple-values + -- <*> intAsBool r "wheelchair_accessible" + -- <*> intAsBool r "bikes_allowed" + <*> r .: "shape_id" + +data GTFS (depth :: Depth) = GTFS + { stations :: Vector Station + , stops :: Vector (Stop depth) + , trips :: Vector Trip + , calendar :: Maybe (Vector Calendar) + , calendarDates :: Maybe (Vector CalendarDate) + } + +deriving instance Show (GTFS Shallow) +deriving instance Show (GTFS Deep) + +class Loadable depth where + loadGtfs :: FilePath -> IO (GTFS depth) + +instance Loadable Shallow where + loadGtfs path = do + zip <- Zip.toArchive <$> LB.readFile "./gtfs.zip" + GTFS <$> decodeTable' "stops.txt" zip + <*> decodeTable' "stop_times.txt" zip + <*> decodeTable' "trips.txt" zip + <*> decodeTable "calendar.txt" zip + <*> decodeTable "calendar_dates.txt" zip + where + decodeTable :: CSV.FromNamedRecord a => FilePath -> Zip.Archive -> IO (Maybe (Vector a)) + decodeTable path zip = do + case Zip.findEntryByPath path zip of + Nothing -> pure Nothing + Just csv -> case CSV.decodeByName (Zip.fromEntry csv) of + Left err -> error "blah" + Right (_,v :: a) -> pure (Just v) + decodeTable' path zip = + decodeTable path zip >>= \case + Nothing -> fail $ "required file "+|path|+" not found in gtfs.zip" + Just a -> pure a + +instance Loadable Deep where + loadGtfs path = do + shallow <- loadGtfs @Shallow path + stops' <- V.mapM (pushStop (stations shallow)) (stops shallow) + pure $ shallow { stops = stops' } + where + pushStop :: Vector Station -> Stop Shallow -> IO (Stop Deep) + pushStop stations stop = do + station <- case tableLookup stationId (stopStation stop) stations of + Just a -> pure a + Nothing -> fail $ "station with id "+|stopStation stop|+"is mentioned but not defined." + pure $ stop { stopStation = station } + + + +servicesOnDay :: GTFS Deep -> Day -> Vector ServiceID +servicesOnDay GTFS{..} day = + fmap caldateServiceId added <> V.filter notCancelled regular + where (added,removed) = case calendarDates of + Nothing -> (mempty,mempty) + Just exs -> + V.partition (\cd -> caldateExceptionType cd == ServiceAdded) + $ V.filter (\cd -> caldateDate cd == day) exs + regular = case calendar of + Nothing -> mempty + Just cs -> V.mapMaybe (\cal -> if isRunning cal then Just (calServiceId cal) else Nothing) cs + where isRunning Calendar{..} = + day >= calStartDate && + day <= calEndDate && + case weekday of + Monday -> calMonday + Tuesday -> calTuesday + Wednesday -> calWednesday + Thursday -> calThursday + Friday -> calFriday + Saturday -> calSaturday + Sunday -> calSunday + weekday = dayOfWeek day + notCancelled serviceID = + null (tableLookup caldateServiceId serviceID removed) + +tripsOfService :: GTFS Deep -> ServiceID -> Vector Trip +tripsOfService GTFS{..} serviceId = + V.filter (\trip -> tripServiceId trip == serviceId ) trips + + +main :: IO () +main = do + gtfs <- loadGtfs @Deep "./gtfs.zip" + + -- today <- getCurrentTime <&> utctDay + -- print (calendar gtfs) + let today = Day.fromOrdinalDate 2022 (fromJust $ monthAndDayToDayOfYearValid False 6 6) + print today + + putStrLn "trips today:" + print (fmap (tripsOfService gtfs) (servicesOnDay gtfs today)) + + +{- +TODO: +there should be a basic API allowing the questions: + - what are the next trips leaving from $station? (or $geolocation?) + - all stops of a given tripID + +then the "ingress" API: + - train ping (location, estimated delay, etc.) + - cancel trip + - add trip? + +-} diff --git a/haskell-gtfs.cabal b/haskell-gtfs.cabal new file mode 100644 index 0000000..642c5d9 --- /dev/null +++ b/haskell-gtfs.cabal @@ -0,0 +1,44 @@ +cabal-version: 2.4 +name: haskell-gtfs +version: 0.1.0.0 + +-- A short (one-line) description of the package. +-- synopsis: + +-- A longer description of the package. +-- description: + +-- A URL where users can report bugs. +-- bug-reports: + +-- The license under which the package is released. +-- license: +author: stuebinm +maintainer: stuebinm@disroot.org + +-- A copyright notice. +-- copyright: +-- category: +extra-source-files: CHANGELOG.md + +executable haskell-gtfs + main-is: Main.hs + + -- Modules included in this executable, other than Main. + -- other-modules: + + -- LANGUAGE extensions used by modules in this package. + -- other-extensions: + build-depends: base ^>=4.14.3.0 + , zip-archive >= 0.4.2.1 + , cassava >= 0.5.2.0 + , bytestring >= 0.10.10.0 + , vector >= 0.12.3.1 + , regex-tdfa >= 1.3.1.2 + , text >= 2.0 + , fmt >= 0.6.3.0 + , time >= 1.12.2 + hs-source-dirs: app + default-language: Haskell2010 + default-extensions: OverloadedStrings + , ScopedTypeVariables diff --git a/hie.yaml b/hie.yaml new file mode 100644 index 0000000..7cca5eb --- /dev/null +++ b/hie.yaml @@ -0,0 +1,4 @@ +cradle: + cabal: + - path: "app/Main.hs" + component: "haskell-gtfs:exe:haskell-gtfs" diff --git a/shell.nix b/shell.nix new file mode 100644 index 0000000..e010081 --- /dev/null +++ b/shell.nix @@ -0,0 +1,5 @@ +{ pkgs ? import {} }: + +pkgs.mkShell { + buildInputs = [ pkgs.zlib pkgs.openssh ]; +} -- cgit v1.2.3